In [None]:
pip install ipywidgets PyMuPDF google-generativeai scikit-learn

In [1]:
import ipywidgets as widgets
from IPython.display import display

uploader = widgets.FileUpload(accept='.pdf', multiple=True)
display(uploader)


FileUpload(value=(), accept='.pdf', description='Upload', multiple=True)

In [2]:
import tempfile
import fitz  # PyMuPDF

combined_pdf_text = ""

# ✅ Automatically handles dict or list formats
uploaded_files = uploader.value

if uploaded_files:
    file_list = list(uploaded_files.values()) if isinstance(uploaded_files, dict) else uploaded_files

    for uploaded_file in file_list:
        file_data = uploaded_file['content']

        # Save uploaded file temporarily
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
            tmp.write(file_data)
            tmp_path = tmp.name

        # Extract text from this PDF
        with fitz.open(tmp_path) as doc:
            for page in doc:
                combined_pdf_text += page.get_text()

    print(f"✅ Extracted text from {len(file_list)} PDF(s).")
else:
    print("⚠️ No files uploaded.")


✅ Extracted text from 1 PDF(s).


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def chunk_text(text, chunk_size=200):
    words = text.split()
    return [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]

def retrieve_context(query, chunks, top_k=3):
    vectorizer = TfidfVectorizer().fit(chunks + [query])
    vectors = vectorizer.transform(chunks + [query])
    query_vector = vectors[-1]
    chunk_vectors = vectors[:-1]
    similarities = cosine_similarity(query_vector, chunk_vectors).flatten()
    top_indices = np.argsort(similarities)[-top_k:][::-1]
    return "\n".join([chunks[i] for i in top_indices])


In [4]:
import google.generativeai as genai

GEMINI_API_KEY = "AIzaSyAUJrMKFNR2YgkE22Orzufwo-tD2xggDVk"  # 🔁 Replace this!
genai.configure(api_key=GEMINI_API_KEY)

model = genai.GenerativeModel("gemini-2.0-flash")  # Use the latest Gemini model

def ask_gemini(query, context):
    if not query.strip():
        return "⚠️ No query provided."
    if not context.strip():
        return "⚠️ No context retrieved from PDF."

    prompt = f"""
You are a helpful assistant.

Given the context from documents:
---------------------
{context}
---------------------

Answer this query:
"{query}"

Be concise and use only the given context.
"""
    try:
        response = model.generate_content(prompt)
        if hasattr(response, 'text') and response.text:
            return response.text
        else:
            return "⚠️ Gemini returned no response text."
    except Exception as e:
        return f"❌ Gemini API error: {str(e)}"


In [5]:
chunks = chunk_text(combined_pdf_text)

print("✅ You can now ask questions about the uploaded PDFs.")
print("💬 Type 'exit' to stop.\n")

while True:
    user_query = input("🔎 Your question: ").strip()
    if user_query.lower() == "exit":
        print("👋 Exiting. Done!")
        break

    context = retrieve_context(user_query, chunks)

    print("\n📚 Relevant Context:\n")
    print(context)

    print("\n🤖 Gemini's Answer:\n")
    answer = ask_gemini(user_query, context)
    print(answer)
    print("-" * 80)


✅ You can now ask questions about the uploaded PDFs.
💬 Type 'exit' to stop.


📚 Relevant Context:

removes unnecessary files, clears browsing history, and can help improve system performance. CCleaner also offers features to uninstall programs and manage startup programs. Key features of CCleaner:  Cleans up unnecessary files: Removes temporary files, cache, and other junk that can slow down your computer.  Clears browser history and cookies: Protects your privacy by deleting browsing data.  Removes unused programs: Helps you uninstall unwanted software.  Manages startup programs: Controls which programs launch when your computer starts, potentially improving boot times.  Registry cleaner: Can help remove invalid or broken registry entries, which can also impact system performance.  Performance optimization: CCleaner can help optimize your computer's performance by removing unnecessary files and managing background processes. CCleaner is a widely used tool and is available for fr