# Chat with PDF using Gemini

## 1. Install Required Packages

In [None]:
!pip install -q streamlit PyPDF2 langchain google-generativeai faiss-cpu python-dotenv

## 2. Import Libraries

In [None]:
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
import google.generativeai as genai
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from io import BytesIO
import requests
from IPython.display import display, Markdown

## 3. Load Environment Variables

In [None]:
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise ValueError("GOOGLE_API_KEY not found in environment variables")
genai.configure(api_key=GOOGLE_API_KEY)

## 4. PDF Processing Functions

In [None]:
def get_pdf_text(pdf_docs):
    """Extract text from multiple PDF files."""
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def get_text_chunks(text):
    """Split text into manageable chunks for processing."""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=10000, 
        chunk_overlap=1000
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vector_store(text_chunks):
    """Create and save a vector store from text chunks."""
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")
    return vector_store

## 5. Question Answering Setup

In [None]:
def get_conversational_chain():
    """Create the question answering chain with prompt template."""
    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """

    model = ChatGoogleGenerativeAI(
        model="gemini-pro",
        temperature=0.3
    )

    prompt = PromptTemplate(
        template=prompt_template, 
        input_variables=["context", "question"]
    )
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    return chain

def user_input(user_question):
    """Process user question and return answer."""
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    
    try:
        new_db = FAISS.load_local("faiss_index", embeddings)
        docs = new_db.similarity_search(user_question)
        
        chain = get_conversational_chain()
        response = chain(
            {"input_documents": docs, "question": user_question},
            return_only_outputs=True
        )
        
        return response["output_text"]
    except Exception as e:
        return f"Error processing your question: {str(e)}"

## 6. Process Sample PDFs

In [None]:
# Download sample PDFs
def download_pdf(url):
    response = requests.get(url)
    return BytesIO(response.content)

# Sample PDF URLs
sample_pdf_urls = [
    "https://arxiv.org/pdf/1706.03762.pdf",  # Attention is All You Need paper
    "https://arxiv.org/pdf/1810.04805.pdf"   # BERT paper
]

# Download and process sample PDFs
pdf_docs = [download_pdf(url) for url in sample_pdf_urls]
print("Processing PDFs...")
raw_text = get_pdf_text(pdf_docs)
text_chunks = get_text_chunks(raw_text)
vector_store = get_vector_store(text_chunks)
print("PDF processing complete! Vector store created.")

## 7. Interactive Question Answering

In [None]:
from ipywidgets import widgets

# Create widgets for interactive demo
question_input = widgets.Textarea(
    value='',
    placeholder='Ask a question about the PDF content...',
    description='Question:',
    layout={'width': '80%'}
)

submit_button = widgets.Button(description="Submit")
output = widgets.Output()

def on_submit_clicked(b):
    with output:
        output.clear_output()
        if question_input.value.strip():
            answer = user_input(question_input.value)
            display(Markdown(f"**Question:** {question_input.value}"))
            display(Markdown(f"**Answer:** {answer}"))
        else:
            print("Please enter a question.")

submit_button.on_click(on_submit_clicked)

display(question_input, submit_button, output)

## 8. Save/Load VectorStore (Optional)

In [None]:
# Save the vector store
if 'vector_store' in locals() and vector_store:
    vector_store.save_local("faiss_index")
    print("VectorStore saved to 'faiss_index' folder")

# Load the vector store
try:
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    loaded_vectorstore = FAISS.load_local("faiss_index", embeddings)
    vector_store = loaded_vectorstore
    print("VectorStore loaded successfully")
except Exception as e:
    print(f"Error loading VectorStore: {e}")