In [2]:
import streamlit as st
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings,ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough,RunnableParallel
from langchain_core.prompts import ChatPromptTemplate,PromptTemplate,MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain,create_history_aware_retriever
from langchain_core.messages import HumanMessage
from PyPDF2 import PdfReader
from tempfile import NamedTemporaryFile

from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory


from langchain_core.runnables.history import RunnableWithMessageHistory


import dotenv
dotenv.load_dotenv()

True

In [9]:
llm=ChatOpenAI(openai_api_key=os.getenv('OPENAI_API_KEY'),model="gpt-3.5-turbo")
embeddings=OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY'))
store={}
def get_and_convert_pdf(input_pdf):
    bytes_data = input_pdf.read()
    with NamedTemporaryFile(delete=False) as tmp: 
        tmp.write(bytes_data)                      
        data = PyPDFLoader(tmp.name).load_and_split()
    os.remove(tmp.name)
    return data 

In [10]:
def get_chunks(docs):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    split= text_splitter.split_documents(docs)
    return split
    #for page in pages:
    #    text+=page.extract_text(page)
    #return text

def create_Vectorstore(chunks):
    
    vectorstore=FAISS.from_documents(chunks,embeddings)
    vectorstore.save_local('faiss_index')


def get_rag_chain(vectorstore):

    contextualize_q_system_prompt = """Given a chat history and the latest user question \
    which might reference context in the chat history, formulate a standalone question \
    which can be understood without the chat history. Do NOT answer the question, \
    just reformulate it if needed and otherwise return it as is."""
    retriever=vectorstore.as_retriever()
    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )
    history_aware_retriever = create_history_aware_retriever(
        llm, retriever, contextualize_q_prompt
    )

    qa_system_prompt="""
    you are the assistant for question and answering tasks. use the following given retrieved context to answer the question.if you dont know the 
    answer just say that you don't know. give answer simply in three lines.
    {context}
    """
    qa_prompt=ChatPromptTemplate.from_messages(
        [
            ('system',qa_system_prompt),
            MessagesPlaceholder('chat_history'),
            ('human',"{input}"),
        ]
    )
    question_answer_chain=create_stuff_documents_chain(llm,qa_prompt)

    rag_chain=create_retrieval_chain(history_aware_retriever,question_answer_chain)

    return rag_chain

In [14]:
def create_conversation_rag_chain(vectorstore):
    rag_chain=get_rag_chain(vectorstore)

    
    

    def get_session_history(session_id: str) -> BaseChatMessageHistory:
        if session_id not in store:
            print('0')
            store[session_id] = ChatMessageHistory()
        print('1')
        return store[session_id]

    conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
    )

    return conversational_rag_chain




In [15]:
database = FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)
conversational_rag_chain=create_conversation_rag_chain(database)
session_id="abc123"
user_question="what is hyperparameter tuning"
get_response(user_question,conversational_rag_chain,session_id)

1


"Hyperparameter tuning is the process of selecting the best set of hyperparameters for a machine learning algorithm. Hyperparameters are parameters that are not learned during training and must be set before the learning process begins. Tuning these hyperparameters involves finding the optimal values to improve the model's performance."

In [16]:
database = FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)
conversational_rag_chain=create_conversation_rag_chain(database)
session_id="abc123"
user_question="what are common ways of doing it"
get_response(user_question,conversational_rag_chain,session_id)

1


'Common ways of hyperparameter tuning include techniques like Grid Search, Random Search, Bayesian Optimization, and more recently, techniques like Genetic Algorithms and Neural Architecture Search. These methods help in systematically exploring the hyperparameter space to find the best configuration for a machine learning model.'

In [19]:
store['abc123']

ChatMessageHistory(messages=[HumanMessage(content='what is hyperparameter tuning'), AIMessage(content='Hyperparameter tuning is the process of selecting the best set of hyperparameters for a machine learning algorithm. Hyperparameters are parameters that are set before the learning process begins, and tuning them involves finding the optimal values to improve the performance of the model. Techniques like grid search, random search, and Bayesian optimization are commonly used for hyperparameter tuning.'), HumanMessage(content='what are common ways of doing it'), AIMessage(content='Common ways of hyperparameter tuning include:\n1. Grid Search: Exhaustively searching through a manually specified subset of hyperparameters to find the best combination.\n2. Random Search: Randomly sampling hyperparameter combinations for evaluation.\n3. Bayesian Optimization: Using probabilistic models to predict the performance of different hyperparameter configurations and selecting the most promising ones

In [6]:
def main():
    st.set_page_config("Chat with multiple PDFS")
    st.header("Chat with PDF using OpenAI")
    preprocess()
    database = FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)
    conversational_rag_chain=create_conversation_rag_chain(database)
    session_id="abc123"
    user_question = st.text_input("Ask question related to the given context")
    if user_question:
        submit=st.button('submit')
        if submit:
            with st.spinner("processing..."):
                st.write(get_response(user_question,conversational_rag_chain,session_id))
                st.success("done")
            

if __name__=='__main__':
    main()

2024-04-24 17:54:02.948 
  command:

    streamlit run c:\Users\user\Desktop\langchain\RAG ChatBot\venv\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [4]:
pip install -U langchain-community streamlit

Note: you may need to restart the kernel to use updated packages.


In [4]:
import streamlit as st
from langchain_community.chat_message_histories import (
    StreamlitChatMessageHistory,
)
if 'chat_history' not in st.session_state:
    st.session_state['chat_history'] = []
history = StreamlitChatMessageHistory(key='chat_history')

history.add_user_message("hi!")
history.add_ai_message("whats up?")

KeyError: 'st.session_state has no key "chat_history". Did you forget to initialize it? More info: https://docs.streamlit.io/library/advanced-features/session-state#initialization'

In [7]:
import streamlit as st
# Now import and use StreamlitChatMessageHistory
from langchain_community.chat_message_histories import StreamlitChatMessageHistory

history = StreamlitChatMessageHistory(key='chat_history')

history.add_user_message("hi!")
history.add_ai_message("whats up?")


KeyError: 'st.session_state has no key "chat_history". Did you forget to initialize it? More info: https://docs.streamlit.io/library/advanced-features/session-state#initialization'