In [None]:
from utils import get_groq_api_key
groq_api_key = get_groq_api_key()

In [2]:
from langchain.document_loaders import PyPDFLoader
# Loading PDF
Loader=PyPDFLoader("Christopher_Nolan_Context.pdf")
data=Loader.load()
len(data)

2

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Splitting long text into smaller chunks

text_splitter=RecursiveCharacterTextSplitter(chunk_size=300 , chunk_overlap=50)
text= text_splitter.split_documents(data)
print(len(text))

21


In [4]:
from langchain_community.embeddings import HuggingFaceEmbeddings
# Loading the Embeddinigs

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [5]:
from langchain.vectorstores import FAISS
# Store Chunks in vector DB

db=FAISS.from_documents(text,embeddings)

In [None]:
# Loading the moodel
from langchain_groq import ChatGroq
llm = ChatGroq(
    model="llama-3.1-8b-instant",  # choose the model you prefer
    temperature=0.1,
    max_tokens=512,                # adjust as per your context size
    api_key=groq_api_key
)

In [7]:
from langchain.chains import RetrievalQA
# Creating RetrievaQA Chain

chain = RetrievalQA.from_llm(
    llm=llm,
    retriever=db.as_retriever(),
    return_source_documents=True  
)

In [8]:
while True:
    query=input("Ask something about the PDf(or type 'exit'): \n\n")
    if query.lower()=="exit":
        break
    result=chain.invoke(query)
    print(result)

Ask something about the PDf(or type 'exit'): 

 who is Christoper Nolan?


{'query': 'who is Christoper Nolan?', 'result': 'Christopher Nolan is a British-American film director, producer, and screenwriter. He is widely regarded as one of the most influential and important filmmakers of the 21st century.', 'source_documents': [Document(id='12a80112-690f-4b9e-bc10-a27cc47be22c', metadata={'producer': 'ReportLab PDF Library - www.reportlab.com', 'creator': '(unspecified)', 'creationdate': '2025-10-09T08:01:05+00:00', 'author': '(anonymous)', 'keywords': '', 'moddate': '2025-10-09T08:01:05+00:00', 'subject': '(unspecified)', 'title': '(anonymous)', 'trapped': '/False', 'source': 'Christopher_Nolan_Context.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='Christopher Nolan - Detailed Context\nChristopher Nolan (born July 30, 1970) is a British-American film director, producer, and\nscreenwriter widely regarded as one of the most influential and important filmmakers of the 21st'), Document(id='9ec2d4f0-b18e-4787-b14f-ac658e7ba908', metadata={'pr

Ask something about the PDf(or type 'exit'): 

 his famous movie?


{'query': 'his famous movie?', 'result': 'Based on the provided context, one of Christopher Nolan\'s most famous movies is "Memento" (2000). It\'s a breakthrough film known for its reverse chronological storytelling about a man with short-term memory loss trying to find his wife\'s killer.', 'source_documents': [Document(id='710d968a-4e05-4409-a7b0-0e00e272b228', metadata={'producer': 'ReportLab PDF Library - www.reportlab.com', 'creator': '(unspecified)', 'creationdate': '2025-10-09T08:01:05+00:00', 'author': '(anonymous)', 'keywords': '', 'moddate': '2025-10-09T08:01:05+00:00', 'subject': '(unspecified)', 'title': '(anonymous)', 'trapped': '/False', 'source': 'Christopher_Nolan_Context.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='Recurring themes in his films include: - Time and temporality: films like "Memento", "Interstellar",\nand "Tenet" play with the structure and perception of time. - Memory and identity: "Memento" and\n"Insomnia" explore how memory shap

Ask something about the PDf(or type 'exit'): 

 exit


In [None]:
import os
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain.chains import ConversationalRetrievalChain

# 🔑 Set your Groq API key
os.environ["GROQ_API_KEY"] = "GROQ_API_KEY"

chat_chain = None
chat_history = []

# 📘 Function to process uploaded PDF(s)
def process_pdfs(pdf_files):
    global chat_chain, chat_history

    if not pdf_files:
        return "⚠️ Please upload at least one PDF file."

    all_docs = []
    for pdf in pdf_files:
        pdf_path = pdf.name if hasattr(pdf, "name") else pdf  # Handle Gradio file object safely
        loader = PyPDFLoader(pdf_path)
        docs = loader.load()
        all_docs.extend(docs)

    # Split PDF text into chunks for better embedding
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=80)
    chunks = splitter.split_documents(all_docs)

    # Create vector database with embeddings
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = FAISS.from_documents(chunks, embeddings)

    # Initialize Groq LLM
    llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0.2)

    # Build conversational retrieval chain
    chat_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=db.as_retriever(search_kwargs={"k": 3}),
        return_source_documents=True
    )

    chat_history = []
    return "✅ PDF processed successfully! You can now ask questions."

# 💬 Function to chat with the processed PDFs
def chat_with_pdfs(message, history):
    global chat_chain, chat_history

    if chat_chain is None:
        return history + [[message, "⚠️ Please upload and process a PDF first!"]]

    result = chat_chain({"question": message, "chat_history": chat_history})
    answer = result["answer"]

    chat_history.append((message, answer))
    history.append([message, answer])
    return history

# 🖥️ Build Gradio App
with gr.Blocks(title="Chat with PDF") as demo:
    gr.Markdown("# 🤖 Chat with Your PDF")
    gr.Markdown("Upload one or more PDF files, process them, and ask questions interactively.")

    with gr.Row():
        pdf_files = gr.File(label="📂 Upload PDF(s)", file_count="multiple", file_types=[".pdf"])
        process_button = gr.Button("⚙️ Process PDFs")

    status_box = gr.Textbox(label="Status", interactive=False)

    chatbot = gr.Chatbot(label="Chat with your PDFs", height=400)
    msg = gr.Textbox(label="💬 Ask a question", placeholder="Type here and press Enter...")
    clear = gr.Button("🧹 Clear Chat")

    process_button.click(process_pdfs, inputs=[pdf_files], outputs=[status_box])
    msg.submit(chat_with_pdfs, [msg, chatbot], [chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()


  chatbot = gr.Chatbot(label="Chat with your PDFs", height=400)


* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "C:\Users\Arbaz Aslam\AppData\Roaming\Python\Python313\site-packages\uvicorn\protocols\http\h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        self.scope, self.receive, self.send
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    )
    ^
  File "C:\Users\Arbaz Aslam\AppData\Roaming\Python\Python313\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Arbaz Aslam\AppData\Roaming\Python\Python313\site-packages\fastapi\applications.py", line 1082, in __call__
    await super().__call__(scope, receive, send)
  File "C:\Users\Arbaz Aslam\AppData\Roaming\Python\Python313\site-packages\starlette\applications.py", line 113, in __call__
    await self.middleware_stack(scope, 