In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
from langchain_community.document_loaders.pdf import PyPDFLoader

loader = PyPDFLoader(file_path="letter.pdf")
documents = loader.load()

In [None]:
pip install faiss-cpu

In [9]:
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS

text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=10, separator="")
chunks = text_splitter.split_documents(documents=documents)
    
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(chunks, embeddings)
vectorstore.save_local("faiss_store")

In [10]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_openai import ChatOpenAI

message = """
            Answer this question using the provided context only.
            If the information is not available in the context, justy reply with "I dont know"
            {input}
            Context: {context}
          """
          
prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", message)
])    
llm = ChatOpenAI(model="gpt-4o-mini")

qa_chain = create_stuff_documents_chain(llm, prompt)
retriever_chain = create_retrieval_chain(vectorstore.as_retriever(), qa_chain)

chat_history = []
query = "what is the person number of leela"
response = retriever_chain.invoke({"input": query, "chat_history": chat_history})

In [11]:
response["answer"]

'The person number of Leela is 27353082.'

In [12]:
from langchain_core.messages import HumanMessage, AIMessage

chat_history.extend(
    [
        HumanMessage(content=query),
        AIMessage(content= response["answer"])
    ]
)

query2 = "what is his title"
response2 = retriever_chain.invoke({"input": query2, "chat_history": chat_history})

In [13]:
response2["answer"]

"I don't know."

In [None]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_history_aware_retriever

contextualize_q_system_prompt = """Given above chat history and below latest user question
    which might reference context in the chat history,
    formulate a standalone question which can be understood
    without the chat history. Do NOT answer the question,
    just reformulate it if needed and otherwise return it as is.
    Below is the latest question:
    
    {input}
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages([
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", contextualize_q_system_prompt)
])

retriever = vectorstore.as_retriever()
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)


In [16]:
query3 = "what is his title"
response3 = history_aware_retriever.invoke({"input": query3, "chat_history": chat_history})

response3

[Document(id='ebd354d3-ba83-48e6-956a-b3c40f42b5cc', metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2023-08-25T12:28:08+05:30', 'author': 'Suresh Babu', 'moddate': '2023-08-25T12:28:08+05:30', 'source': 'letter.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='1751752/A22 \n \n   \n25-Aug-23 \n \n \nFull Name  : Leela Prasad Jagu \n \nPerson No.  : 27353082 \n \n \nTO W'),
 Document(id='bf708970-1632-42c5-8a4c-05366ca9601c', metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Microsoft 365', 'creationdate': '2023-08-25T12:28:08+05:30', 'author': 'Suresh Babu', 'moddate': '2023-08-25T12:28:08+05:30', 'source': 'letter.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Ref: # 1751752/A2'),
 Document(id='da3dc87b-fe0c-4117-b1e2-4e55a697c299', metadata={'producer': 'Microsoft® Word for Microsoft 365', 'creator': 'Microsoft® Word for Micros

In [19]:
from langchain.chains.combine_documents.stuff import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

system_prompt = """You are assistant for question-answering tasks.
    Answer this question using the provided context only. If you dont know the answer, just stay I don't know
    context: {context}
"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

qa_chain = create_stuff_documents_chain(llm, prompt)
retriever_chain = create_retrieval_chain(history_aware_retriever, qa_chain)

query = "when is his last day"
response = retriever_chain.invoke({"input": query, "chat_history": chat_history})

response["answer"]

"Leela Prasad Jagu's last day is 21-Aug-23, close of business hours."