# Import Packages

In [None]:
#  Import libraries
import os
import fitz  # PyMuPDF
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import FAISS
import google.generativeai as genai


# Set Up API Key

In [None]:
#  Set Google Gemini API Key
os.environ["GOOGLE_API_KEY"] = "your-api-key"  # Replace with your key
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])


# Load PDF File

In [None]:
#  Upload or define PDF path
# from google.colab import files
# uploaded = files.upload()
# pdf_filename = list(uploaded.keys())[0]

pdf_filename = "/content/Research paper final project.pdf"  # Update if needed


# Extract Text from PDF

In [None]:
#  Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as doc:
        for page in doc:
            text += page.get_text("text") + "\n"
    return text

#  Extract
pdf_text = extract_text_from_pdf(pdf_filename)
print("PDF Text Extracted")


# Split Text into Chunks

In [None]:
#  Split text into overlapping chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
text_chunks = splitter.split_text(pdf_text)


# Embed Text and Store in FAISS

In [None]:
#  Create embeddings and store in FAISS
embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vector_db = FAISS.from_texts(text_chunks, embedding_model)
print("Vector Database Created")


# Search Function

In [None]:
#  Retrieve relevant chunks from vector DB
def retrieve_relevant_text(query):
    results = vector_db.similarity_search(query, k=10)
    return "\n\n".join([doc.page_content for doc in results])


# Ask Gemini API

In [None]:
#  Use Gemini API to answer based on retrieved chunks
def ask_gemini(query):
    retrieved = retrieve_relevant_text(query)
    print("\n Relevant Context:\n", retrieved)

    model = genai.GenerativeModel("gemini-pro")
    prompt = f"""Please answer the following question using only the provided document content.

Document Content:
{retrieved}

User Question:
{query}

Answer:"""

    response = model.generate_content(prompt)
    return response.text


# Final Prompt

In [None]:
#  Get input and respond
query = input("Enter your question: ")
response = ask_gemini(query)

print("\n Gemini's Answer:\n", response)
