In [11]:
# Import Libraries
import os
from google import genai
import gradio as gr
import PyPDF2
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import numpy as np

In [12]:
# Load Gemini API KEY
client = genai.Client(api_key="AIzaSyCh4Itul6KkNnrJA4K9dsBmoHHHoKRVJcU")

In [13]:
# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

In [14]:
# Function to extract and chunk text from PDF
def extract_text_chunks(pdf_file, chunk_size=500):
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()

    # Split into chunks
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

    return chunks

In [15]:
# Embed all chunks
def embed_chunks(chunks):
    return embedding_model.encode(chunks)

In [16]:
# Search for relavant chunks based on query
def search_chunks(query, chunks, chunks_embeddings):
    query_embedding = embedding_model.encode([query])[0]
    similarities = cosine_similarity([query_embedding], chunks_embeddings)[0]
    top_indices = np.argsort(similarities)[-3:][::-1] # Top 3 most similar
    return [chunks[i] for i in top_indices]

In [17]:
# Generate answer using OpenAI and relevant chunks
def ask_pdf_question(pdf_file, query):
    chunks = extract_text_chunks(pdf_file)
    chunks_embeddings = embed_chunks(chunks)
    top_chunks = search_chunks(query, chunks, chunks_embeddings)

    context = "\n\n".join(top_chunks)

    prompt = (
        f"You are an assisstant who answers questions based on the following PDF content:\n\n"
        f"{context}\n\n"
        f"Question: {query}\nAnswer:"
    )

    try:
        response = client.models.generate_content(
            model = "gemini-2.5-flash",
            contents = prompt
        )
        return response.text.strip()
    except Exception as e:
        return f"Error: {str(e)}"

In [18]:
# Gradio Interface
iface = gr.Interface(
    fn = ask_pdf_question,
    inputs = [
        gr.File(label="Upload PDF", file_types=[".pdf"]),
        gr.Textbox(label="Ask a question about the PDF")
    ],
    outputs="text",
    title="Chat with PDF",
    description="Upload a PDF file and ask questions about its content. The AI will find relevant sections and answer based on them."
)

In [20]:
# Launch the app
iface.launch(share=True)

Rerunning server... use `close()` to stop if you need to change `launch()` parameters.
----
* Running on public URL: https://7640b83eb0cc1fcc6f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


