In [1]:
from dotenv import load_dotenv
import os

load_dotenv()  # Loads variables from .env

api_key = os.environ.get("GOOGLE_API_KEY")

In [4]:
from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader,PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain.memory import SimpleMemory,ConversationBufferMemory
from langchain.agents import initialize_agent, Tool
from langgraph.prebuilt import create_react_agent
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainFilter, LLMChainExtractor
from langchain.chains import RetrievalQA,ConversationalRetrievalChain
from langgraph.prebuilt import create_react_agent


# llm model
llm= ChatGoogleGenerativeAI(model="gemini-2.0-flash",temperature=0.8)

# Load PDF
file_path=  r"C:\Users\Ganesh Jaiswar\Desktop\ODINSCHOOL\2025 ML\LLM\903020011.pdf"
loader = PyMuPDFLoader(file_path)
documents = loader.load()

# Split pdf into chunk
split= RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=250,separators=["\n\n", "\n", ".", "?", "!", ",", " "])
documents =split.split_documents(documents=documents)

# Create Hugging Face embeddings
embedding= HuggingFaceEmbeddings(model="sentence-transformers/all-mpnet-base-v2")

# Vector store
vector_store= FAISS.from_documents(documents,embedding)

# Create Retriever
retriever= vector_store.as_retriever(search_kwargs={"k":5},search_type="similarity") #âœ… Set k=5 so you fetch more relevant passages per query.

memory= ConversationBufferMemory(memory_key='history',return_messages=True)

# Compression
compressor = LLMChainFilter.from_llm(llm=llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)

# Prompt reteiever QnA
prompt = PromptTemplate(
    template="""
    You are a geography expert. Use ONLY the provided context to answer the question clearly and factually.

    Context:
    {context}

    Question:
    {question}

    Rules:
    - Use full sentences.
    - If answer not found, say: "The PDF does not contain this information."
    """,
    input_variables=["context", "question"]
)

# RetrievalQA chain

pdf_qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=compression_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

# define tool for agent

tools= [
    Tool(
        name="PDF QA",
        func=lambda q: pdf_qa_chain.invoke({"query": q})["result"],
        description=(
             "A specialized tool for finding facts and information ONLY within the loaded PDF document. "
            "Use this for questions about specific geography topics, chapters, or explanations found in the PDF. "
            "Always use this tool if the question is likely covered in the PDF."
        
               )
    )
]

# Create Memory 

# Agent initialization
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent_type="zero-shot-react-description", 
    verbose=False,
    memory=memory,
    handle_parsing_errors=True
)

while True:
    query=input("Ask a question (Type 'exit' or 'stop'):")
    if query.lower()in['exit', 'stop']:
        break
    try:
        result=agent.invoke(query)
        print("\nAnswer:\n",result['output'])
    except Exception as e:
        print("\nError:",str(e))

Ask a question (Type 'exit' or 'stop'): what is Cyclonic rainfall?



Answer:
 Cyclonic rainfall is the specific air formation when the pressure at an area is less than the surrounding regions.


Ask a question (Type 'exit' or 'stop'): what is Snow?



Answer:
 Precipitation in the form of solid particles of snow is known as snowfall. When the temperature in the atmosphere falls below the freezing point, water vapor directly turns into snowflakes.


Ask a question (Type 'exit' or 'stop'): earthquakes and valcanoes distinguish between?



Answer:
 Earthquakes are caused by movements occurring in the interior of the earth, creating tension in the earth's crust, and releasing energy in the form of waves. Volcanoes occur due to instability in the interior of the earth and involve the eruption of molten magma. Earthquakes result in trembling.


Ask a question (Type 'exit' or 'stop'): what are main types of chemical weathering?



Answer:
 The main types of chemical weathering are carbonation, oxidation, and solution.


Ask a question (Type 'exit' or 'stop'): what is carbonation, oxidation  and solution?



Answer:
 Carbonation is when rainwater travels through the atmosphere and carbon dioxide in the air gets mixed in the water, forming dilute carbonic acid. Oxidation is a type of chemical weathering that occurs when oxygen reacts with iron in the rock, forming rust, and it is the reaction of any element with oxygen. The definition of solution is not found in this document.


Ask a question (Type 'exit' or 'stop'): a map is prepared using statistical data?



Answer:
 A dot map is prepared using statistical data.


Ask a question (Type 'exit' or 'stop'): Economic is an important ___ subject?



Answer:
 I am unable to answer the question due to its incompleteness and my inability to reliably search the PDF.


Ask a question (Type 'exit' or 'stop'): Economic is an important subject?



Answer:
 Yes, economics is an important subject.


Ask a question (Type 'exit' or 'stop'): is know as the father of economics?



Answer:
 Adam Smith


Ask a question (Type 'exit' or 'stop'): the term economics is derived from the greek word?



Answer:
 The term "economics" is derived from the Greek word "oikonomia".


Ask a question (Type 'exit' or 'stop'): exit
