In [1]:
!pip install langchain langchain-google-genai langchain-community faiss-cpu gpt4all pypdf -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.4/40.4 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m47.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.6/121.6 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.5/294.5 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.8/160.8 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# RAG Application where history of each prompt is stored to use its reference in the future prompt

In [3]:
# Importing all the required libraries
from pypdf import PdfReader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.docstore import InMemoryDocstore
import faiss
from langchain_google_genai import GoogleGenerativeAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
# from langchain.memory import ConversationBufferMemory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
import os


# Defining the class RAG_PDF
class RAG_PDF:
    """
    This class is reading the pdf file and chunking the text into smaller pieces.
    It is then converting the text into embeddings and storing them in a vector store.
    It is then loading the LLM model and creating a conversational RAG chain.

    :param pdf_file: The pdf file to be read
    :type pdf_file: str

    :param store: The dictionary to store the session history
    :type store: dict

    :param text: The text extracted from the pdf file
    :type text: str

    :param user_prompt: The user prompt to ask the question
    :type user_prompt: str

    :param system_prompt: The system prompt to ask the question
    :type system_prompt: str

    :param embeddings: The embeddings of the text
    :type embeddings: GPT4AllEmbeddings
    
    :param prompt: The prompt template to ask the question
    :type prompt: ChatPromptTemplate

    :param splits: The text chunked into smaller pieces
    :type splits: list

    :param vector_store: The vector store to store the embeddings
    :type vector_store: FAISS

    :param retriever: The retriever to retrieve the embeddings
    :type retriever: FAISS

    :param llm: The LLM model to load
    :type llm: GoogleGenerativeAI

    :param contextualize_q_system_prompt: The system prompt to contextualize the question
    :type contextualize_q_system_prompt: str

    :param contextualize_q_prompt: The prompt template to contextualize the question
    :type contextualize_q_prompt: ChatPromptTemplate

    :param history_aware_retriever: The retriever to retrieve the history
    :type history_aware_retriever: create_history_aware_retriever
    
    :param qa_prompt: The prompt template to ask the question
    :type qa_prompt: ChatPromptTemplate

    :param question_answer_chain: The chain to answer the question
    :type question_answer_chain: create_stuff_documents_chain
    
    :param rag_chain: The chain to retrieve the answer
    :type rag_chain: create_retrieval_chain

    :param conversational_rag_chain: The chain to have a conversation
    :type conversational_rag_chain: RunnableWithMessageHistory

    :return: None
    :rtype: None


    """
    def __init__(self, pdf_file = ""):
        self.store = {}
        self.pdf_file = pdf_file
        self.text = ""
        self.user_prompt = ""
        self.system_prompt = (
            "You are an assistant for question-answering tasks. "
            "Use the following pieces of retrieved context to answer "
            "the question. If you don't know the answer, say that you "
            "don't know. Use ten sentences maximum and keep the "
            "answer detailed."
            "\n\n"
            "{context}"
            )
        self.embeddings = GPT4AllEmbeddings()

        self.prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", self.system_prompt),
                    ("human", "{input}"),
                ]
            )

    
    def get_pdf_text(self):
        """
        This function reads the pdf file and extracts the text from it.

        :return: The text extracted from the pdf file
        :rtype: str

        :raises ValueError: _description_
        """
        if not self.pdf_file:
            raise ValueError("Please provide a pdf file")
        else:
            pdf_reader = PdfReader(self.pdf_file)

            for page in pdf_reader.pages:
                self.text += page.extract_text()
        # return self.text

    def get_text_chunk(self):
        """
        This function chunks the text into smaller pieces.
        """
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        self.splits = text_splitter.split_text(self.text)

    def get_text_embeddings(self):
        """
        This function converts the text into embeddings and stores them in a vector store and then also creates the retriever.
        """
        # self.embeddings = GPT4AllEmbeddings()
        index = faiss.IndexFlatL2(len(self.embeddings.embed_query("Hello World")))

        self.vector_store = FAISS(
            embedding_function=self.embeddings,
            index=index,
            docstore=InMemoryDocstore(),
            index_to_docstore_id={},
        )
        self.vector_store.add_texts(texts=self.splits)
        self.vector_store.save_local("vector_store")
        self.retriever = self.vector_store.as_retriever()


    def load_llm(self):
        """
        This function loads the LLM model.
        """
        self.llm = GoogleGenerativeAI(model="gemini-1.5-pro", google_api_key="AIzaSyCnsg5BXgC-NgBPw2mWoKAG8-GBJF4MyX8")

    def context_aware_retrieval(self):
        """
        This function creates a history aware retriever and the question answer chain.
        """

        self.contextualize_q_system_prompt = (
            "Given a chat history and the latest user question "
            "which might reference context in the chat history, "
            "formulate a standalone question which can be understood "
            "without the chat history. Do NOT answer the question, "
            "just reformulate it if needed and otherwise return it as is."
        )

        self.contextualize_q_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", self.contextualize_q_system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )

        self.history_aware_retriever = create_history_aware_retriever(
            self.llm, self.retriever, self.contextualize_q_prompt
        )


        self.system_prompt = (
            "You are an assistant for question-answering tasks. "
            "Use the following pieces of retrieved context to answer "
            "the question. If you don't know the answer, say that you "
            "don't know. Use ten sentences maximum and keep the "
            "answer detailed."
            "\n\n"
            "{context}"
        )
        self.qa_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", self.system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )
        self.question_answer_chain = create_stuff_documents_chain(self.llm, self.qa_prompt)

        self.rag_chain = create_retrieval_chain(self.history_aware_retriever, self.question_answer_chain)

    def get_session_history(self, session_id: str = "") -> BaseChatMessageHistory:
        """
        This function gets the session history.

        :param session_id: Session id generated by user, defaults to ""
        :type session_id: str
        :return: The session history
        :rtype: BaseChatMessageHistory
        """
        if session_id not in self.store.keys():
            self.store[session_id] = ChatMessageHistory()
        return self.store[session_id]

    def chat_history(self):
        """
        This function creates a conversational RAG chain.
        """
        self.conversational_rag_chain = RunnableWithMessageHistory(
            self.rag_chain,
            self.get_session_history,
            input_messages_key="input",
            history_messages_key="chat_history",
            output_messages_key="answer",
        )

    def prompt_template_text_response(self, user_prompt):
        """
        This function takes the user prompt and returns the response.

        :param user_prompt: The user prompt to ask the question
        :type user_prompt: str
        :return: Response to the user prompt
        :rtype: dict
        """
        if not os.path.exists("vector_store") or not os.path.exists("vector_store/index.faiss") or not os.path.exists("vector_store/index.pkl"):
            self.get_pdf_text()
            self.get_text_chunk()
            self.get_text_embeddings()
            self.load_llm()
            self.context_aware_retrieval()
            self.chat_history()
        else:
            self.vector_store = FAISS.load_local("vector_store", self.embeddings, allow_dangerous_deserialization=True)
            self.retriever = self.vector_store.as_retriever()
            self.load_llm()
            self.context_aware_retrieval()
            self.chat_history()

        self.user_prompt = user_prompt
        # question_answer_chain = create_stuff_documents_chain(self.llm, self.prompt)
        # rag_chain = create_retrieval_chain(self.retriever, question_answer_chain)
        # results = rag_chain.invoke({"input": self.user_prompt})
        results = self.conversational_rag_chain.invoke(
            {"input": self.user_prompt},
            config={
                "configurable": {"session_id": "abc123"}
            },  # constructs a key "abc123" in `store`.
        )
        return results


def main():
    """
    This function is the main function which creates an object of the class RAG_PDF and calls the function prompt_template_text_response.
    """
    rag_pdf = RAG_PDF(pdf_file="Airport_Rules_Regs_7_27_22.pdf")
    # rag_pdf.get_pdf_text()
    # rag_pdf.get_text_chunk()
    # rag_pdf.get_text_embeddings()
    # prompt="What is the use of baggage carts?",
    # rag_pdf.load_llm()
    response = rag_pdf.prompt_template_text_response(user_prompt="What is the use of baggage carts?")
    print(response)

if __name__ == "__main__":
    """
    This is the main block of the code.
    """
    main()

{'input': 'What is the use of baggage carts?', 'chat_history': [], 'context': [Document(metadata={}, page_content='B. USE OF  BAGGAGE  CARTS  \n \n1. Use of baggage  carts is restricted  to use  by or for  ticketed  airport passengers only. \n2. No person shall  tamper with  any baggage cart rental  device.  \n3. Using baggage carts to assist passengers for a fee or gratuity, or to solicit a fee or  \ngratuity, for services  similar  to the  service  provided by airport  skycaps  or baggage  \nhandlers, is strictly  prohibited. \n4. Baggage  carts  are not  allowed on escalators.  \n5. Baggage  carts are  not allowed to be used by children under the  age of 13. \n6. Baggage  carts  are not allowed  to be  taken  out of an  Airport  or Airport  Rail  \nTransportation System.  \n7. Airport  employees  and tenants are not allowed to  keep  baggage  carts  for personal  use \nor for any  use in connection with a business, such as to transport materials.  \n8. No person other  than  duly au

# Examples of the project created. 

** Note only use pdf file to use this code. **

In [8]:
rag = RAG_PDF()
response = rag.prompt_template_text_response(user_prompt="What is the use of baggage carts?")


In [9]:
response

{'input': 'What is the use of baggage carts?',
 'chat_history': [],
 'context': [Document(metadata={}, page_content='B. USE OF  BAGGAGE  CARTS  \n \n1. Use of baggage  carts is restricted  to use  by or for  ticketed  airport passengers only. \n2. No person shall  tamper with  any baggage cart rental  device.  \n3. Using baggage carts to assist passengers for a fee or gratuity, or to solicit a fee or  \ngratuity, for services  similar  to the  service  provided by airport  skycaps  or baggage  \nhandlers, is strictly  prohibited. \n4. Baggage  carts  are not  allowed on escalators.  \n5. Baggage  carts are  not allowed to be used by children under the  age of 13. \n6. Baggage  carts  are not allowed  to be  taken  out of an  Airport  or Airport  Rail  \nTransportation System.  \n7. Airport  employees  and tenants are not allowed to  keep  baggage  carts  for personal  use \nor for any  use in connection with a business, such as to transport materials.  \n8. No person other  than  duly 

In [10]:
response_1 = rag.prompt_template_text_response(user_prompt="What about identification cards?")

In [14]:
response_1

{'input': 'What about identification cards?',
 'chat_history': [HumanMessage(content='What is the use of baggage carts?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='According to the provided text, the use of baggage carts is restricted to ticketed airport passengers only. This means that only individuals who have purchased a ticket for a flight are permitted to use these carts. The document emphasizes that the carts are intended to assist passengers with their luggage and should not be used for any other purposes. \n', additional_kwargs={}, response_metadata={})],
 'context': [Document(metadata={}, page_content='than those stated herein, or if it is impossible for a flight crewmember to follow  \nthe regulations as stated  herein, an escort is required. \n \n2.5 DISPLAYING  VALID IDENTIFICATION  \n \nWhile in the AOA or SIDA, individuals who are not under escort must display the  \napproved ID in full view, above waist level, on their outermost garment. Such  \na

In [15]:
response_2 = rag.prompt_template_text_response(user_prompt="What are the emergency rules on the airport?")



In [16]:
response_2

{'input': 'What are the emergency rules on the airport?',
 'chat_history': [HumanMessage(content='What is the use of baggage carts?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='According to the provided text, the use of baggage carts is restricted to ticketed airport passengers only. This means that only individuals who have purchased a ticket for a flight are permitted to use these carts. The document emphasizes that the carts are intended to assist passengers with their luggage and should not be used for any other purposes. \n', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='What about identification cards?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='', additional_kwargs={}, response_metadata={})],
 'context': [Document(metadata={}, page_content='grant  of any permission or permit,  or suspend any  permission or  permit already  granted, \nin the event of an emergency. An emergency is any condition which would creat

In [17]:
response_2['answer']

'In the event of an emergency at the airport, specific rules and procedures are activated. An emergency is defined as any situation that could create a dangerous condition or significantly disrupt airport operations. This includes events like snowstorms, hurricanes, flooding, accidents, power failures, or strikes. \n\nThe text highlights a few key emergency rules. Firstly, all aircraft must clear active runways and hold their positions when an emergency is declared by the FAA, Port Authority, or other relevant parties, unless instructed otherwise by Air Traffic Control.  \n\nSecondly, the Airport Manager has the authority to restrict access to certain areas during emergencies. This means individuals must comply with any restrictions or instructions given by the Airport Manager in such situations. \n'