<a href="https://colab.research.google.com/github/Saswata020/LLM_CHAT/blob/main/Finance_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os  # Importing the 'os' module for operating system-related functionality
import streamlit as st  # Importing the 'streamlit' library and naming it as 'st' for creating web applications
import pickle  # Importing 'pickle' for serializing and deserializing Python objects
import time  # Importing the 'time' module for time-related functions
from langchain import OpenAI  # Importing 'OpenAI' from the 'langchain' package
from langchain.chains import RetrievalQAWithSourcesChain  # Importing 'RetrievalQAWithSourcesChain' from 'langchain.chains'
from langchain.text_splitter import RecursiveCharacterTextSplitter  # Importing 'RecursiveCharacterTextSplitter' for text splitting
from langchain.document_loaders import UnstructuredURLLoader  # Importing 'UnstructuredURLLoader' for loading data from URLs
from langchain.embeddings import OpenAIEmbeddings  # Importing 'OpenAIEmbeddings' for embeddings
from langchain.vectorstores import FAISS  # Importing 'FAISS' for vector storage

from dotenv import load_dotenv  # Importing 'load_dotenv' from 'dotenv' to load environment variables
load_dotenv()  # Loading environment variables from a .env file (e.g., an OpenAI API key)

st.title("RockyBot: News Research Tool 📈")  # Setting the title for the web application
st.sidebar.title("News Article URLs")  # Setting the title for the sidebar

urls = []  # Initializing an empty list to store URLs
for i in range(3):  # Iterating three times to get URLs from the user via the sidebar
    url = st.sidebar.text_input(f"URL {i+1}")  # Getting input URLs from the user
    urls.append(url)  # Appending the URLs to the list

process_url_clicked = st.sidebar.button("Process URLs")  # Creating a button to trigger URL processing

file_path = "faiss_store_openai.pkl"  # Setting the file path for storing FAISS index

main_placeholder = st.empty()  # Creating a placeholder for updating messages dynamically
llm = OpenAI(temperature=0.9, max_tokens=100)  # Initializing OpenAI with certain parameters

if process_url_clicked:  # Checking if the 'Process URLs' button is clicked
    loader = UnstructuredURLLoader(urls=urls)  # Loading data from the provided URLs
    main_placeholder.text("Data Loading...Started...✅✅✅")  # Displaying a message for data loading
    data = loader.load()  # Loading the data from the URLs
    text_splitter = RecursiveCharacterTextSplitter(separators=['\n\n', '\n', '.', ','], chunk_size=1000)  # Initializing text splitter
    main_placeholder.text("Text Splitter...Started...✅✅✅")  # Displaying a message for text splitting
    docs = text_splitter.split_documents(data)  # Splitting the data into documents

    embeddings = OpenAIEmbeddings()  # Creating embeddings
    vectorstore_openai = FAISS.from_documents(docs, embeddings)  # Building a FAISS index with the documents
    main_placeholder.text("Embedding Vector Started Building...✅✅✅")  # Displaying a message for building vectors
    time.sleep(2)  # Introducing a delay for 2 seconds

    with open(file_path, "wb") as f:  # Saving the FAISS index to a pickle file
        pickle.dump(vectorstore_openai, f)

query = main_placeholder.text_input("Question: ")  # Getting user input for a question
if query:  # Checking if the user has input a question
    if os.path.exists(file_path):  # Checking if the FAISS index file exists
        with open(file_path, "rb") as f:  # Opening the FAISS index file
            vectorstore = pickle.load(f)  # Loading the FAISS index
            chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())  # Creating a chain for question-answering
            result = chain({"question": query}, return_only_outputs=True)  # Getting the answer from the chain

            st.header("Answer")  # Displaying the answer header
            st.write(result["answer"])  # Showing the answer

            sources = result.get("sources", "")  # Getting sources, if available
            if sources:  # Checking if sources are available
                st.subheader("Sources:")  # Displaying a header for sources
                sources_list = sources.split("\n")  # Splitting the sources by newline
                for source in sources_list:  # Iterating through the sources
                    st.write(source)  # Displaying each source


In [None]:
#cd C:\Users\SASWATA\PycharmProjects\LLM


#python C:\Users\SASWATA\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\streamlit run main.py