# Step 1: Install Libraries & Setup
We will use google-genai (the latest Google SDK), pypdf for reading PDFs, and numpy for vector math.

In [None]:
!pip install google-genai pypdf numpy requests

In [None]:
import os
from google.colab import userdata
from google import genai
import os
import requests
import numpy as np
import pypdf
from google import genai
from google.genai import types

# --- CONFIGURATION ---

# Get the API key from Colab secrets
API_KEY = userdata.get('GOOGLE_API_KEY')

try:
    # Initialize the Google GenAI Client
    client = genai.Client(api_key=API_KEY)

    # 2. Make a simple test call
    print("Testing API connection...")
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents="Say 'Hello, World! The API is working perfectly.'"
    )

    # 3. Print Result
    print("\nSUCCESS! üéâ")
    print(f"Model Response: {response.text}")

except Exception as e:
    print("\nERROR ‚ùå")
    print(f"Something went wrong: {e}")

# Step 2: Download PDF Files
We will download a classic AI paper ("Attention Is All You Need") to serve as our knowledge base.

In [None]:
def download_pdf(url, filename):
    response = requests.get(url)
    if response.status_code == 200:
        with open(filename, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded: {filename}")
    else:
        print(f"Failed to download {url}")

# Create a data directory
os.makedirs("data", exist_ok=True)

# URL for "Attention Is All You Need" (The Transformer paper)
pdf_url = "https://arxiv.org/pdf/1706.03762.pdf"
pdf_filename = "data/attention_paper.pdf"

download_pdf(pdf_url, pdf_filename)

# Step 3: PDF Parsing and Chunking
RAG requires breaking large documents into smaller, manageable "chunks" of text

In [None]:
def parse_and_chunk_pdf(filepath, chunk_size=1000, overlap=100):
    """
    Reads a PDF and splits it into overlapping text chunks.
    """
    text_content = ""

    # 1. Parse PDF to Text
    with open(filepath, 'rb') as f:
        reader = pypdf.PdfReader(f)
        for page in reader.pages:
            text = page.extract_text()
            if text:
                text_content += text + "\n"

    # 2. Simple Chunking (Character based for simplicity)
    chunks = []
    start = 0
    text_length = len(text_content)

    while start < text_length:
        end = start + chunk_size
        # If we are not at the end, try to find a space to avoid cutting words
        if end < text_length:
            # Look for the last space within the chunk to break cleanly
            last_space = text_content.rfind(' ', start, end)
            if last_space != -1:
                end = last_space

        chunk = text_content[start:end].strip()
        if chunk:
            chunks.append(chunk)

        # Move forward, subtracting overlap to keep context
        start = end - overlap

    print(f"Total Text Length: {len(text_content)} characters")
    print(f"Total Chunks Created: {len(chunks)}")
    return chunks

# Execute
chunks = parse_and_chunk_pdf(pdf_filename)

# Preview a chunk
print("\n--- Sample Chunk ---")
print(chunks[0][:500] + "...")

# Step 4: Embedding & Vector Store Creation
We transform text chunks into "vectors" (lists of numbers). Similar meanings have mathematically similar vectors. We will use a simple Python list as our "Vector Store" to show exactly how it works.

In [None]:
class SimpleVectorStore:
    def __init__(self):
        self.vectors = []
        self.texts = []

    def add_documents(self, texts):
        """
        Embeds texts using Google's embedding model and stores them.
        """
        print("Generating embeddings... this may take a moment.")

        # Batching is recommended for production, but we'll do simple loops for clarity
        # or use the SDK's batch support if available.
        # Here we map specific model: text-embedding-004

        batch_size = 10 # Process in small batches to avoid hitting limits
        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i+batch_size]

            # Call Google GenAI Embedding API
            response = client.models.embed_content(
                model="gemini-embedding-001",
                contents=batch_texts
            )

            # The response contains a list of embeddings
            batch_embeddings = [e.values for e in response.embeddings]

            self.vectors.extend(batch_embeddings)
            self.texts.extend(batch_texts)
            print(f"Processed batch {i} to {i+len(batch_texts)}")

        # Convert to numpy array for fast calculation
        self.vectors = np.array(self.vectors)
        print(f"Vector Store Created with {len(self.vectors)} documents.")

    def search(self, query, k=3):
        """
        Retrieves the top-k most relevant chunks for a query.
        """
        # 1. Embed the query
        response = client.models.embed_content(
            model="gemini-embedding-001",
            contents=query
        )
        query_vector = np.array(response.embeddings[0].values)

        # 2. Calculate Cosine Similarity
        # (Dot product of normalized vectors)

        # Normalize stored vectors
        norm_vectors = self.vectors / np.linalg.norm(self.vectors, axis=1, keepdims=True)
        # Normalize query vector
        norm_query = query_vector / np.linalg.norm(query_vector)

        # Dot product
        similarities = np.dot(norm_vectors, norm_query)

        # 3. Get Top-K Indices
        top_k_indices = np.argsort(similarities)[-k:][::-1]

        results = []
        for idx in top_k_indices:
            results.append({
                "text": self.texts[idx],
                "score": similarities[idx]
            })

        return results

# Initialize and Populate Vector Store
vector_store = SimpleVectorStore()
vector_store.add_documents(chunks)

# Step 5: Query Retrieval
Now we test the system. We ask a question, and the system finds the specific paragraphs in the PDF that contain the answer.

In [None]:
user_query = "How does the Transformer model use self-attention?"

print(f"Query: {user_query}")
print("-" * 30)

# Retrieve context
retrieved_docs = vector_store.search(user_query, k=3)

print("Retrieved Contexts:")
for i, doc in enumerate(retrieved_docs):
    print(f"\n[Result {i+1}] (Score: {doc['score']:.4f})")
    print(doc['text'][:300] + "...") # Print first 300 chars of the chunk

# Step 6: Answer Generation (The "G" in RAG)
Finally, we combine the Query + Retrieved Text and send it to Gemini. This prevents hallucinations by forcing the model to use the source material.

In [None]:
def generate_rag_answer(query, retrieved_docs):
    # 1. Construct the Prompt with Context
    context_text = "\n\n".join([f"Context {i+1}: {doc['text']}" for i, doc in enumerate(retrieved_docs)])

    prompt = f"""
    You are a helpful AI assistant. Use the following context to answer the user's question.
    If the answer is not in the context, say "I cannot find the answer in the provided document."

    User Question: {query}

    ---
    {context_text}
    ---

    Answer:
    """

    # 2. Generate Answer using Gemini
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt
    )

    return response.text

# Run the full RAG pipeline
print(f"User Question: {user_query}\n")
final_answer = generate_rag_answer(user_query, retrieved_docs)

print("Gemini's RAG Answer:")
print("=" * 50)
print(final_answer)
print("=" * 50)