In [None]:
# --- COLAB SETUP START ---
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    print("üîß Running in Google Colab - Installing dependencies...")
    
    # 1. Clone the repository
    !rm -rf gemini-fullstack-langgraph-quickstart
    !git clone https://github.com/MasumRab/gemini-fullstack-langgraph-quickstart.git
    
    # 2. Navigate to the correct directory
    import os
    repo_name = "gemini-fullstack-langgraph-quickstart"
    target_dir = os.path.join(repo_name, "notebooks")
    
    if os.path.exists(target_dir):
        os.chdir(target_dir)
        print(f"  [OK] Changed directory to {os.getcwd()}")
    else:
        # Fallback to repo root if specific dir not found
        if os.path.exists(repo_name):
            os.chdir(repo_name)
            print(f"  [OK] Changed directory to {os.getcwd()} (Fallback)")
    
    # 3. Install Backend (Quietly)
    # We install from the backend directory which should be reachable
    # relative to current dir or absolute
    
    # Find backend relative to current position
    import sys
    if os.path.exists("backend"):
        !pip install -q -e backend
    elif os.path.exists("../backend"):
        !pip install -q -e ../backend
    elif os.path.exists("src"): # We might be IN backend
        !pip install -q -e .
    else:
        print("  [X] Error: Could not find backend directory to install.")
        sys.exit(1)
        
    print("  [OK] Dependencies installed!")
else:
    print("  [OK] Running locally")
# --- COLAB SETUP END ---

In [None]:
# Universal Setup for Backend Environment
import sys
import os
import subprocess
from pathlib import Path

def setup_environment():
    """Setup the environment by installing necessary dependencies and setting paths."""
    # Get the backend directory. If we are in 'backend', it is cwd.
    backend_dir = Path.cwd()
    if backend_dir.name != 'backend':
        # Search for backend
        if (backend_dir / 'backend').exists():
             backend_dir = backend_dir / 'backend'
        elif (backend_dir.parent / 'backend').exists():
             backend_dir = backend_dir.parent / 'backend'
    
    # Add src to path if it exists (for 'from agent import ...' style)
    src_dir = backend_dir / 'src'
    if src_dir.exists():
        if str(src_dir) not in sys.path:
            sys.path.append(str(src_dir))
            print(f"  [OK] Added {src_dir} to sys.path")
    
    if str(backend_dir) not in sys.path:
        sys.path.append(str(backend_dir))
        
    # Verify backend/agent can be imported
    try:
        import agent
        print("  [OK] Agent module found and imported.")
    except ImportError:
        print("  [!] Agent module not found. Installing dependencies...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-e", str(backend_dir)])
        print("  [OK] Backend installed in editable mode.")

setup_environment()

In [None]:
# --- MODEL CONFIGURATION ---
# @title Select Gemini Model
# @markdown Choose the Gemini model to use. Only Gemini 2.5 models are currently accessible via the API.

MODEL_STRATEGY = "Gemini 2.5 Flash (Recommended)" # @param ["Gemini 2.5 Flash (Recommended)", "Gemini 2.5 Flash-Lite (Fastest)", "Gemini 2.5 Pro (Best Quality)"]

import os

# Map selection to model ID
# Note: Gemini 1.5 and 2.0 models are deprecated/not accessible via this API
if MODEL_STRATEGY == "Gemini 2.5 Flash (Recommended)":
    SELECTED_MODEL = "gemma-3-27b-it"
elif MODEL_STRATEGY == "Gemini 2.5 Flash-Lite (Fastest)":
    SELECTED_MODEL = "gemma-3-27b-it-lite"
elif MODEL_STRATEGY == "Gemini 2.5 Pro (Best Quality)":
    SELECTED_MODEL = "gemma-3-27b-it"
else:
    # Default fallback
    SELECTED_MODEL = "gemma-3-27b-it"

print(f"Selected Model: {SELECTED_MODEL}")
print(f"Strategy: {MODEL_STRATEGY}")

# Set Environment Variables to override defaults
os.environ["QUERY_GENERATOR_MODEL"] = SELECTED_MODEL
os.environ["REFLECTION_MODEL"] = SELECTED_MODEL
os.environ["ANSWER_MODEL"] = SELECTED_MODEL
os.environ["TOOLS_MODEL"] = SELECTED_MODEL

# Ensure GOOGLE_API_KEY is set if GEMINI_API_KEY is present (for LangChain compatibility)
if "GEMINI_API_KEY" in os.environ:
    os.environ["GOOGLE_API_KEY"] = os.environ["GEMINI_API_KEY"]
    print("  [OK] Synced GEMINI_API_KEY to GOOGLE_API_KEY for LangChain")

In [None]:
# --- MODEL VERIFICATION (Optional) ---
# @title Verify Model Configuration
# @markdown Run this cell to verify that your API key is configured correctly and the selected model is available.

import os

# Check if API key is set
if "GEMINI_API_KEY" not in os.environ:
    print("‚ö†Ô∏è  GEMINI_API_KEY not found in environment variables!")
    print("   Please set it before proceeding:")
    print("   export GEMINI_API_KEY='your-api-key-here'")
else:
    try:
        from google import genai
        
        # Initialize the client
        client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
        
        # Test the selected model
        print(f"üß™ Testing model: {SELECTED_MODEL}")
        response = client.models.generate_content(
            model=SELECTED_MODEL,
            contents="Explain how AI works in a few words"
        )
        
        print(f"  [OK] Model verification successful!")
        print(f"   Model: {SELECTED_MODEL}")
        print(f"   Response: {response.text[:100]}...")
        
    except ImportError:
        print("  [!] google-genai package not installed!")
        print("   Installing now...")
        import subprocess
        import sys
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "google-genai"])
        print("  [OK] Installed! Please re-run this cell.")
        
    except Exception as e:
        print(f"  [X] Model verification failed: {e}")
        print(f"   This could mean:")
        print(f"   - Invalid API key")
        print(f"   - Model '{SELECTED_MODEL}' not available in your region")
        print(f"   - Quota/billing issues (for experimental models)")
        print(f"   - Network connectivity issues")

# Search Tool Comparison

This notebook compares the performance of different search providers (Google, Bing, Brave, DuckDuckGo, Tavily) based on citations, highlights, and result quality across different domains.

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display, HTML
from src.search.router import SearchRouter, SearchProviderType
from src.config.app_config import config

# Set API keys if not already in env (USER SHOULD REPLACE THESE)
# os.environ["TAVILY_API_KEY"] = "..."
# os.environ["BING_API_KEY"] = "..."
# os.environ["GEMINI_API_KEY"] = "..."
# os.environ["BRAVE_API_KEY"] = "..."

# Initialize Router
router = SearchRouter()

print("Registered Providers:", list(router.providers.keys()))

In [None]:
def compare_search_results(queries, providers=None, max_results=3):
    if providers is None:
        providers = [p.value for p in SearchProviderType]
        
    results_data = []
    
    for query_info in queries:
        domain = query_info['domain']
        query = query_info['query']
        print(f"Processing Query: {query} ({domain})...")
        
        for provider_name in providers:
            try:
                # Force specific provider
                results = router.search(query, max_results=max_results, provider_name=provider_name, attempt_fallback=False)
                
                for i, res in enumerate(results):
                    # Highlight detection: Check for bold tags or source specific metadata
                    has_highlight = False
                    if "<b>" in res.content or "<strong>" in res.content:
                        has_highlight = True
                    
                    # Citation check: Does it have a clear source URL and title?
                    has_citation = bool(res.url and res.title)
                    
                    results_data.append({
                        "Domain": domain,
                        "Query": query,
                        "Provider": provider_name,
                        "Rank": i + 1,
                        "Title": res.title,
                        "URL": res.url,
                        "Snippet": res.content[:200] + "..." if len(res.content) > 200 else res.content,
                        "Has Highlight": has_highlight,
                        "Has Citation": has_citation
                    })
            except Exception as e:
                print(f"Error with {provider_name}: {e}")
                results_data.append({
                    "Domain": domain,
                    "Query": query,
                    "Provider": provider_name,
                    "Rank": -1,
                    "Title": "ERROR",
                    "URL": "",
                    "Snippet": str(e),
                    "Has Highlight": False,
                    "Has Citation": False
                })
                
    return pd.DataFrame(results_data)

In [None]:
# Define Test Queries
queries = [
    {"domain": "Healthcare", "query": "latest diabetes type 2 treatments 2024"},
    {"domain": "Tech", "query": "python 3.13 new features gil"},
    {"domain": "News", "query": "current events in space exploration October 2024"},
    {"domain": "Legal", "query": "GDPR requirements for AI companies"}
]

# Run Comparison
df = compare_search_results(queries)

# Display Results (Grouped by Query)
display(HTML("<h2>Search Results Comparison</h2>"))
display(df)

# Visualization: Citation Quality (Simulated by average snippet length and highlight presence)
# Inspired by SOTA Comparison Notebook (metrics visualization)
if not df.empty:
    # Count valid results per provider
    valid_counts = df[df['Rank'] != -1].groupby('Provider').count()['URL']
    
    plt.figure(figsize=(10, 5))
    valid_counts.plot(kind='bar', color='skyblue')
    plt.title('Valid Results Returned per Provider')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()
    
    # Highlight presence
    highlight_counts = df[df['Has Highlight'] == True].groupby('Provider').count()['URL']
    if not highlight_counts.empty:
        plt.figure(figsize=(10, 5))
        highlight_counts.plot(kind='bar', color='orange')
        plt.title('Results with Highlights/Bolding')
        plt.ylabel('Count')
        plt.xticks(rotation=45)
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        plt.show()

In [None]:
# Detailed View of Highlights (HTML Rendering)
print("Detailed HTML Snippets (First result for Tech Query):")
if not df.empty:
    tech_results = df[(df['Domain'] == 'Tech') & (df['Rank'] == 1)]

    for _, row in tech_results.iterrows():
        display(HTML(f"<h3>{row['Provider']}</h3><p><b>Title:</b> {row['Title']}</p><p><b>URL:</b> <a href='{row['URL']}'>{row['URL']}</a></p><div style='border:1px solid #ccc; padding:10px; background:#f9f9f9'>{row['Snippet']}</div>"))