# PDF Question Answering System with LangChain and Gemini

## 1. Install Required Packages

In [None]:
!pip install -q langchain-google-genai langchain pypdf faiss-cpu python-dotenv

## 2. Import Libraries

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from IPython.display import display, Markdown
import os
import tempfile
import time
from dotenv import load_dotenv

## 3. Load Environment Variables

In [None]:
load_dotenv()
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
if not GOOGLE_API_KEY:
    raise ValueError("Please set GOOGLE_API_KEY in your .env file")

## 4. Initialize Models

In [None]:
# Initialize embedding model
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=GOOGLE_API_KEY
)

# Initialize LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-pro",
    temperature=0,
    google_api_key=GOOGLE_API_KEY
)

# Text splitter configuration
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

## 5. Define Processing Functions

In [None]:
def process_pdf(file_path):
    """Process PDF and create vector store"""
    try:
        # Load PDF
        loader = PyPDFLoader(file_path)
        pages = loader.load_and_split()
        
        # Split text
        chunks = text_splitter.split_documents(pages)
        
        # Create vector store
        vectorstore = FAISS.from_documents(chunks, embeddings)
        return vectorstore
    
    except Exception as e:
        print(f"Error processing PDF: {e}")
        return None

def ask_question(vectorstore, question):
    """Get answer to question from vector store"""
    try:
        # Custom prompt
        prompt_template = """
        Answer the question based on the context below. If you don't know the answer, 
        just say that you don't know, don't try to make up an answer.

        Context: {context}

        Question: {question}
        Answer:"""
        
        PROMPT = PromptTemplate(
            template=prompt_template, 
            input_variables=["context", "question"]
        )
        
        # Create QA chain
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vectorstore.as_retriever(),
            chain_type_kwargs={"prompt": PROMPT}
        )
        
        result = qa_chain.invoke({"query": question})
        return result['result']
    
    except Exception as e:
        return f"Error answering question: {e}"

## 6. Upload and Process PDF

In [None]:
from ipywidgets import FileUpload
from IPython.display import clear_output

upload = FileUpload(accept='.pdf', multiple=False)
display(upload)

# After uploading file
if upload.value:
    file_name = next(iter(upload.value))
    with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
        tmp.write(upload.value[file_name]['content'])
        tmp_path = tmp.name
    
    clear_output()
    print(f"Processing {file_name}...")
    
    # Process PDF
    vectorstore = process_pdf(tmp_path)
    
    if vectorstore:
        print(f"Successfully processed {file_name}")
        os.unlink(tmp_path)  # Clean up temp file
    else:
        print(f"Failed to process {file_name}")

## 7. Interactive Question Answering

In [None]:
from ipywidgets import widgets

if 'vectorstore' in locals() and vectorstore:
    question_input = widgets.Text(
        placeholder='Enter your question about the PDF',
        description='Question:',
        layout=widgets.Layout(width='80%')
    )
    
    display(question_input)
    
    def on_ask_clicked(b):
        question = question_input.value
        if question:
            print("Thinking...")
            answer = ask_question(vectorstore, question)
            display(Markdown(f"**Question:** {question}"))
            display(Markdown(f"**Answer:** {answer}"))
    
    ask_button = widgets.Button(description="Ask")
    ask_button.on_click(on_ask_clicked)
    display(ask_button)
else:
    print("Please upload and process a PDF first")

## 8. Save/Load VectorStore (Optional)

In [None]:
# Save
if 'vectorstore' in locals() and vectorstore:
    vectorstore.save_local("pdf_vectorstore")
    print("VectorStore saved to 'pdf_vectorstore' folder")

# Load
try:
    loaded_vectorstore = FAISS.load_local(
        "pdf_vectorstore", 
        embeddings,
        allow_dangerous_deserialization=True
    )
    vectorstore = loaded_vectorstore
    print("VectorStore loaded successfully")
except Exception as e:
    print(f"Error loading VectorStore: {e}")