In [68]:
%pip install langchain-openai langchain-community python-dotenv faiss-cpu pypdf

Note: you may need to restart the kernel to use updated packages.


In [69]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.tools import tool

In [70]:
load_dotenv()

True

In [71]:
llm = ChatOpenAI(model="gpt-4o-mini")

In [72]:
loader = PyPDFLoader("my_thesis.pdf")
docs = loader.load()

In [73]:
len(docs)

145

In [74]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

chunks = splitter.split_documents(docs)

In [75]:
len(chunks)

305

In [76]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vector_store = FAISS.from_documents(chunks, embeddings)

In [77]:
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x20f6aabdd90>

In [78]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)

In [79]:
retriever.invoke("Who is the author of this thesis?")

[Document(id='982cfa19-e35a-4e32-a330-2b0296806dd5', metadata={'producer': 'Microsoft® Word 2016', 'creator': 'Microsoft® Word 2016', 'creationdate': '2026-02-23T13:14:07+05:30', 'author': 'Windows User', 'moddate': '2026-02-23T13:14:07+05:30', 'source': 'my_thesis.pdf', 'total_pages': 145, 'page': 1, 'page_label': '2'}, page_content='Acknowledgement  \n \nIt’s the solemn benediction of God who has given me the opportunity to \nprepare this manuscript. All my sincere gratitude goes to him for his blessings. \nI am filled with everlasting enthusiasm, pride, and pleasure as I express my \ndeep sense of gratitude to my Advisor and Chairman of the Advisory Committee Dr. \nVinay Kumar Singh, Professor, Department of Mechanical Engineering for his inspiring \nguidance, encouragement, and unwavering efforts throughout the course of this work. \nHis timely assistance, constructive criticism, and diligent efforts made it possible for \nme to present the work contained in this thesis. \nEspecial

In [80]:
@tool
def rag_tool(query: str):
    """
    Retrieve relevant information from the PDF document.
    Use this tool when the user asks factual / conceptual questions
    that might be answered from the stored documents.
    """
    
    result = retriever.invoke(query)

    context = [doc.page_content for doc in result]
    metadata = [doc.metadata for doc in result]

    return {
        "query": query,
        "context": context,
        "metadata": metadata
    }

In [81]:
query = "Who is the author of this thesis ?"

In [82]:
rag = rag_tool.invoke({"query": query})


prompt = f"""Answer the question using the context and metadata.

Question:
{rag['query']}

Context:
{"\n\n".join(rag["context"])}

Metadata:
{"\n".join([str(m) for m in rag["metadata"]])}

"""

answer = llm.invoke(prompt)

print("Answer:\n", answer.content)

Answer:
 The author of the thesis is Mr. Pawan Kumar Agrawal.
