<img src="https://drive.google.com/uc?export=view&id=1wYSMgJtARFdvTt5g7E20mE4NmwUFUuog" width="200">

[![Gen AI Experiments](https://img.shields.io/badge/Gen%20AI%20Experiments-GenAI%20Bootcamp-blue?style=for-the-badge&logo=artificial-intelligence)](https://github.com/buildfastwithai/gen-ai-experiments)
[![Gen AI Experiments GitHub](https://img.shields.io/github/stars/buildfastwithai/gen-ai-experiments?style=for-the-badge&logo=github&color=gold)](http://github.com/buildfastwithai/gen-ai-experiments)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/[NOTEBOOK_ID])

## Master Generative AI in 8 Weeks
**What You'll Learn:**
- Master cutting-edge AI tools & frameworks
- 6 weeks of hands-on, project-based learning
- Weekly live mentorship sessions
- No coding experience required
- Join Innovation Community

Transform your AI ideas into reality through hands-on projects and expert mentorship.

[Start Your Journey](https://www.buildfastwithai.com/genai-course)

---

# Gemini 3 Pro - Advanced RAG Techniques

**Created by:** @BuildFastWithAI  
**Model:** Google Gemini 3 Pro  
**Last Updated:** November 2025

Advanced RAG implementations with hybrid search, reranking, and optimization.

In [None]:
!pip install -q google-generativeai langchain langchain-google-genai faiss-cpu rank-bm25 sentence-transformers

In [None]:
import google.generativeai as genai
from google.colab import userdata
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from rank_bm25 import BM25Okapi
import numpy as np

GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

## 1. Hybrid Search (BM25 + Vector)

In [None]:
class HybridRetriever:
    def __init__(self, embeddings, alpha=0.5):
        self.embeddings = embeddings
        self.alpha = alpha  # Weight for vector search
        self.vectorstore = None
        self.bm25 = None
        self.documents = []
    
    def add_documents(self, docs):
        self.documents = docs
        
        # Vector store
        self.vectorstore = FAISS.from_documents(docs, self.embeddings)
        
        # BM25 index
        tokenized_docs = [doc.page_content.split() for doc in docs]
        self.bm25 = BM25Okapi(tokenized_docs)
    
    def retrieve(self, query, k=5):
        # Vector search
        vector_docs = self.vectorstore.similarity_search_with_score(query, k=k*2)
        
        # BM25 search
        tokenized_query = query.split()
        bm25_scores = self.bm25.get_scores(tokenized_query)
        
        # Normalize scores
        vector_scores = {i: 1/(1+score) for i, (doc, score) in enumerate(vector_docs)}
        bm25_scores_norm = bm25_scores / (bm25_scores.max() + 1e-6)
        
        # Combine scores
        combined_scores = {}
        for i in range(len(self.documents)):
            vec_score = vector_scores.get(i, 0)
            bm_score = bm25_scores_norm[i]
            combined_scores[i] = self.alpha * vec_score + (1 - self.alpha) * bm_score
        
        # Get top k
        top_indices = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:k]
        return [self.documents[i] for i, _ in top_indices]

# Test hybrid search
from langchain.schema import Document

docs = [
    Document(page_content="Gemini 3 Pro has a 1 million token context window."),
    Document(page_content="The model excels at mathematical reasoning and code generation."),
    Document(page_content="Multimodal capabilities include text, image, and audio processing.")
]

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
hybrid = HybridRetriever(embeddings)
hybrid.add_documents(docs)

results = hybrid.retrieve("context window size", k=2)
for doc in results:
    print(doc.page_content)

## 2. Query Transformation

In [None]:
# Multi-query expansion
class QueryExpander:
    def __init__(self, llm):
        self.llm = llm
    
    def expand_query(self, query: str, num_variants: int = 3) -> list:
        prompt = f"""
Generate {num_variants} different ways to ask this question:

Original: {query}

Return only the variants, one per line.
"""
        response = self.llm.predict(prompt)
        variants = [line.strip() for line in response.split('\n') if line.strip()]
        return [query] + variants[:num_variants]

llm = ChatGoogleGenerativeAI(model="gemini-3-pro", temperature=0.7)
expander = QueryExpander(llm)

queries = expander.expand_query("What are the features of Gemini 3 Pro?")
for i, q in enumerate(queries, 1):
    print(f"{i}. {q}")

## 3. Advanced Chunking Strategies

In [None]:
# Semantic chunking
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

class SemanticChunker:
    def __init__(self, similarity_threshold=0.5):
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.threshold = similarity_threshold
    
    def chunk_text(self, text: str) -> list:
        sentences = text.split('. ')
        embeddings = self.model.encode(sentences)
        
        chunks = []
        current_chunk = [sentences[0]]
        
        for i in range(1, len(sentences)):
            similarity = cosine_similarity(
                [embeddings[i-1]], 
                [embeddings[i]]
            )[0][0]
            
            if similarity > self.threshold:
                current_chunk.append(sentences[i])
            else:
                chunks.append('. '.join(current_chunk))
                current_chunk = [sentences[i]]
        
        if current_chunk:
            chunks.append('. '.join(current_chunk))
        
        return chunks

# Test
text = """Gemini 3 Pro is a powerful model. It has many features. 
The context window is very large. It can handle 1 million tokens. 
The model is multimodal. It processes text and images."""

chunker = SemanticChunker()
chunks = chunker.chunk_text(text)
for i, chunk in enumerate(chunks, 1):
    print(f"Chunk {i}: {chunk}\n")

## 4. Reranking

In [None]:
# LLM-based reranking
class LLMReranker:
    def __init__(self, llm):
        self.llm = llm
    
    def rerank(self, query: str, documents: list, top_k: int = 3) -> list:
        # Score each document
        scored_docs = []
        
        for doc in documents:
            prompt = f"""
Rate how relevant this document is to the query on a scale of 0-10.
Return only the number.

Query: {query}
Document: {doc.page_content}

Relevance score:
"""
            try:
                score = float(self.llm.predict(prompt).strip())
            except:
                score = 0
            
            scored_docs.append((doc, score))
        
        # Sort by score
        scored_docs.sort(key=lambda x: x[1], reverse=True)
        return [doc for doc, _ in scored_docs[:top_k]]

# Test reranking
reranker = LLMReranker(llm)
reranked = reranker.rerank("context window", docs, top_k=2)

print("Reranked results:")
for doc in reranked:
    print(f"- {doc.page_content}")

## 5. Complete Advanced RAG System

In [None]:
class AdvancedRAG:
    def __init__(self, api_key: str):
        self.embeddings = GoogleGenerativeAIEmbeddings(
            model="models/embedding-001",
            google_api_key=api_key
        )
        self.llm = ChatGoogleGenerativeAI(
            model="gemini-3-pro",
            google_api_key=api_key
        )
        self.retriever = HybridRetriever(self.embeddings)
        self.reranker = LLMReranker(self.llm)
    
    def add_documents(self, texts: list):
        docs = [Document(page_content=text) for text in texts]
        self.retriever.add_documents(docs)
    
    def query(self, question: str, k: int = 5) -> dict:
        # Retrieve with hybrid search
        docs = self.retriever.retrieve(question, k=k*2)
        
        # Rerank
        reranked_docs = self.reranker.rerank(question, docs, top_k=k)
        
        # Generate answer
        context = "\n\n".join([doc.page_content for doc in reranked_docs])
        
        prompt = f"""
Answer based on the context below.

Context:
{context}

Question: {question}

Answer:
"""
        
        answer = self.llm.predict(prompt)
        
        return {
            "answer": answer,
            "sources": [doc.page_content for doc in reranked_docs]
        }

# Test
advanced_rag = AdvancedRAG(GOOGLE_API_KEY)
advanced_rag.add_documents([
    "Gemini 3 Pro has a 1 million token context window.",
    "The model supports multimodal inputs.",
    "It excels at code generation and reasoning."
])

result = advanced_rag.query("What is the context window?", k=2)
print(f"Answer: {result['answer']}\n")
print(f"Sources: {len(result['sources'])}")

## Key Takeaways

âœ… **Advanced Techniques:**
- Hybrid search (BM25 + vector)
- Query transformation and expansion
- Semantic chunking
- LLM-based reranking

ðŸ“Œ **When to Use:**
- Hybrid search for better recall
- Reranking for precision
- Query expansion for complex questions

ðŸ”— **Resources:**
- Follow [@BuildFastWithAI](https://twitter.com/BuildFastWithAI)