In [None]:
!pip install langchain langchain-community langchain-core langchain-groq langchain-huggingface pypdf faiss-cpu sentence-transformers requests

Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-groq
  Downloading langchain_groq-1.0.0-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-huggingface
  Downloading langchain_huggingface-1.0.1-py3-none-any.whl.metadata (2.1 kB)
Collecting pypdf
  Downloading pypdf-6.2.0-py3-none-any.whl.metadata (7.1 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
INFO: pip is looking at multiple versions of langchain-community to determine which version is compatible with other requirements. This could take a while.
Collecting langchain-community
  Downloading langchain_community-0.4-py3-none-any.whl.metadata (3.0 kB)
  Downloading langchain_community-0.3.31-py3-none-any.whl.metadata (3.0 kB)
Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-c

In [None]:
import os
os.environ["GROQ_API_KEY"] = "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"

In [None]:
import os
from dotenv import load_dotenv
import requests
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.agents import Tool, initialize_agent
from langchain.memory import ChatMessageHistory
from langchain_core.prompts import MessagesPlaceholder



In [None]:
pdf_url="https://arxiv.org/pdf/2207.13050.pdf"
pdf_file = "my.pdf"
if not os.path.exists(pdf_file):
    pdf = requests.get(pdf_url)
    with open(pdf_file, "wb") as f:
        f.write(pdf.content)


In [None]:
loader = PyPDFLoader(pdf_file)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size = 3000, chunk_overlap = 450)
chunks = splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(chunks, embeddings)
retriever = vector_store.as_retriever()


In [None]:
llm=ChatGroq(model="openai/gpt-oss-20b", temperature=0)
qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever)
tools = [
    Tool(
        name="PDF_QA",
        func=lambda q: qa_chain.invoke({"question": q, "chat_history": []})["answer"],
        description="Use this to answer questions based on the loaded PDF."
    )
]


In [None]:

chat_history = ChatMessageHistory()

memory = ConversationBufferMemory(
    memory_key="chat_history",
    chat_memory=chat_history,
    return_messages=True
)

In [None]:
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent="chat-conversational-react-description",
    memory=memory,
    verbose=False,
    system_message=(
        "You are an AI assistant that MUST answer questions using the tool 'PDF_QA' "
        "which retrieves information from the research paper. "
        "You are NOT allowed to answer from your general knowledge. "
        "Every answer must clearly say: 'Tool used: PDF_QA'. "
        "If the tool does not provide an answer, say 'The PDF does not contain this information'."
    )
)

In [None]:
from IPython.display import display, Markdown, clear_output

chat_history = []


In [None]:
while True:
    question = input("You: ").strip()
    if question.lower() == "exit":
        print("Goodbye ")
        break
    if not question:
        continue

    # Add user message
    chat_history.append(("User", question))

    # Get model response
    response = agent.invoke({"input": question})
    answer = response["output"]

    # Add bot message
    chat_history.append(("Bot", answer))

    # Clear previous output to simulate chat flow
    clear_output(wait=True)

    # Display the full conversation cleanly
    for role, msg in chat_history:
        if role == "User":
            display(Markdown(f"** You:** {msg}"))
        else:
            display(Markdown(f"** Bot:** {msg}"))

    print("\n(Type 'exit' to stop)")








You: exit
Goodbye 
