In [None]:
!pip install langchain langchain-community langchain-core langchain-experimental langchain-huggingface
!pip install faiss-cpu pdfplumber sentence-transformers openai gradio


Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-experimental
  Downloading langchain_experimental-0.4.0-py3-none-any.whl.metadata (1.3 kB)
Collecting langchain-huggingface
  Downloading langchain_huggingface-1.0.1-py3-none-any.whl.metadata (2.1 kB)
INFO: pip is looking at multiple versions of langchain-community to determine which version is compatible with other requirements. This could take a while.
Collecting langchain-community
  Downloading langchain_community-0.4-py3-none-any.whl.metadata (3.0 kB)
  Downloading langchain_community-0.3.31-py3-none-any.whl.metadata (3.0 kB)
Collecting requests<3,>=2 (from langchain)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
INFO: pip is looking at multiple versions of langchain-experimental to determine which v

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting pdfplumber
  Downloading pdfplumber-0.11.8-py3-none-any.whl.metadata (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pdfminer.six==20251107 (from pdfplumber)
  Downloading pdfminer_six-20251107-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-5.0.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.9/67.9 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m

In [9]:
import os
import gradio as gr
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# OpenRouter API Key
os.environ["OPENROUTER_API_KEY"] = "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"
os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"

# ========== Functions ==========
qa = None

def process_pdf(pdf_file):
    global qa
    status_msgs = []

    try:
        loader = PDFPlumberLoader(pdf_file.name)
        docs = loader.load()
        status_msgs.append(f"Loaded {len(docs)} pages from PDF.")

        # 2. Chunking
        status_msgs.append("Splitting text into chunks...")
        embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        text_splitter = SemanticChunker(embedder)
        documents = text_splitter.split_documents(docs)
        status_msgs.append(f"Created {len(documents)} chunks.")

        # 3. Vector DB
        status_msgs.append("Building FAISS vector database...")
        vector = FAISS.from_documents(documents, embedder)
        retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 2})
        status_msgs.append(" Vector database ready.")

        # 4. LLM
        status_msgs.append("Initializing LLM...")
        llm = ChatOpenAI(
              model="mistralai/mistral-7b-instruct",
              temperature=0.7,
              openai_api_base="https://openrouter.ai/api/v1",
              openai_api_key=os.environ["OPENROUTER_API_KEY"],
              request_timeout=180,
        )

        # 5. Prompt
        prompt_template = """
        You are an expert Python tutor.
Use ONLY the information provided in the context extracted from the PDF
"Python Programming – Wikibooks".

Rules:
- Do NOT use any outside knowledge.
- If the answer is not found in the context, say:
  "I could not find this information in the provided document."
- Keep answers short, clear, and accurate.
- Use examples ONLY if they appear in the context.
- Do not create your own examples.

Context:
{context}

Question:
{question}

Answer:

        """
        QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt_template)

        llm_chain = LLMChain(llm=llm, prompt=QA_CHAIN_PROMPT, verbose=False)
        document_prompt = PromptTemplate(
            input_variables=["page_content", "source"],
            template="Context:\ncontent:{page_content}\nsource:{source}",
        )

        combine_documents_chain = StuffDocumentsChain(
            llm_chain=llm_chain,
            document_variable_name="context",
            document_prompt=document_prompt,
        )

        qa = RetrievalQA(
            combine_documents_chain=combine_documents_chain,
            retriever=retriever,
            return_source_documents=True,
            verbose=False,
        )

        status_msgs.append("Ready for questions!")

    except Exception as e:
        return f"Error: {str(e)}"

    return "\n".join(status_msgs)

def ask_question(question):
    global qa
    if qa is None:
        return "Please upload a PDF first."

    try:
        result = qa({"query": question})

        # Handle different output formats safely
        if "result" in result:
            return result["result"]
        elif "answer" in result:
            return result["answer"]
        else:
            return f"⚠ Unexpected response: {result}"
    except Exception as e:
        return f"Error while answering: {str(e)}"


# ========== Gradio UI ==========
with gr.Blocks() as demo:
    gr.Markdown("## Q&A Bot")
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        status = gr.Textbox(label="Status")
    upload_btn = gr.Button("Process PDF")
    with gr.Row():
        q_in = gr.Textbox(label="Ask a Question")
        q_out = gr.Textbox(label="Answer")
    ask_btn = gr.Button("Submit")

    upload_btn.click(process_pdf, inputs=[pdf_input], outputs=[status])
    ask_btn.click(ask_question, inputs=[q_in], outputs=[q_out])

demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://12b64f9bbb6f3dcf11.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


