In [1]:
# ============================================================
# NOTEBOOK 3: MCP WEB SEARCH INTEGRATION
# ============================================================
# Purpose: Set up web search using Model Context Protocol (MCP)
# What YOU need to do:
#   1. Get a Tavily API key (free at https://tavily.com)
#   2. Replace 'your-tavily-api-key' below
#   3. Run all cells
# ============================================================

# ============================================================
# STEP 1: Install Dependencies
# ============================================================
!pip install tavily-python requests beautifulsoup4

# ============================================================
# STEP 2: Import Libraries
# ============================================================
import os
import json
from tavily import TavilyClient
import requests
from typing import Dict, List, Optional

# ============================================================
# STEP 3: Configure API Keys
# ============================================================
"""
⚠️ YOUR TASK:
1. Get Tavily API key from: https://tavily.com
2. Replace 'your-tavily-api-key' with your actual key
"""
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

# os.environ['TAVILY_API_KEY'] = TAVILY_API_KEY

# Verify OpenAI key is still set
# OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', 'your-openai-api-key')
# if OPENAI_API_KEY == 'your-openai-api-key':
#     print("⚠️  Warning: OpenAI API key not set. Please set it:")
#     print("   OPENAI_API_KEY = 'your-key'")
#     print("   os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY")

# ============================================================
# STEP 4: Initialize Tavily Client
# ============================================================
"""
Tavily is a search API optimized for AI/LLM applications
It provides clean, relevant results perfect for RAG systems
"""
try:
    tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
    print("✅ Tavily client initialized")
except Exception as e:
    print(f"❌ Error initializing Tavily: {e}")
    print("   Make sure you've set TAVILY_API_KEY correctly")

# ============================================================
# STEP 5: Create Web Search Function (MCP-Style)
# ============================================================
"""
This implements Model Context Protocol (MCP) pattern for search
MCP allows the model to call external tools/APIs as needed
"""

def web_search_mcp(query: str, max_results: int = 3) -> Dict:
    """
    Perform web search using Tavily API (MCP pattern)

    Args:
        query: Search query (math question)
        max_results: Maximum number of results to return

    Returns:
        Dictionary with search results and metadata
    """
    try:
        # Perform search
        response = tavily_client.search(
            query=query,
            search_depth="advanced",  # More thorough search
            max_results=max_results,
            include_answer=True  # Get AI-generated answer
        )

        return {
            'success': True,
            'query': query,
            'answer': response.get('answer', ''),
            'results': response.get('results', []),
            'num_results': len(response.get('results', [])),
            'error': None
        }

    except Exception as e:
        return {
            'success': False,
            'query': query,
            'answer': '',
            'results': [],
            'num_results': 0,
            'error': str(e)
        }

print("✅ Web search function created")

# ============================================================
# STEP 6: Create Context Extraction Function
# ============================================================
"""
Extract relevant context from search results
"""

def extract_search_context(search_results: Dict) -> str:
    """
    Extract and format context from search results
    """
    if not search_results['success']:
        return f"Search failed: {search_results['error']}"

    if search_results['num_results'] == 0:
        return "No relevant information found on the web."

    # Start with Tavily's AI answer if available
    context_parts = []

    if search_results['answer']:
        context_parts.append(f"Summary: {search_results['answer']}")

    # Add individual results
    for i, result in enumerate(search_results['results'], 1):
        title = result.get('title', 'No title')
        content = result.get('content', '')
        url = result.get('url', '')

        context_parts.append(f"\nSource {i}: {title}")
        context_parts.append(f"Content: {content[:300]}...")  # First 300 chars
        context_parts.append(f"URL: {url}")

    return "\n".join(context_parts)

print("✅ Context extraction function created")

# ============================================================
# STEP 7: Test Web Search
# ============================================================
print("\n" + "="*60)
print("🧪 TESTING WEB SEARCH")
print("="*60)

test_queries = [
    "What is the Pythagorean theorem?",
    "How to solve differential equations?",
    "Explain Euler's formula in mathematics"
]

for query in test_queries:
    print(f"\n📝 Query: {query}")
    print("-" * 60)

    # Perform search
    results = web_search_mcp(query, max_results=2)

    if results['success']:
        print(f"✅ Found {results['num_results']} results")
        if results['answer']:
            print(f"\n📄 AI Summary:\n{results['answer'][:200]}...")

        print(f"\n🔗 Sources:")
        for i, result in enumerate(results['results'], 1):
            print(f"   {i}. {result.get('title', 'No title')}")
            print(f"      {result.get('url', '')}")
    else:
        print(f"❌ Search failed: {results['error']}")

# ============================================================
# STEP 8: Create MCP Tool Registry
# ============================================================
"""
This is the Model Context Protocol pattern - registering available tools
The agent can choose which tool to use based on the task
"""

MCP_TOOLS = {
    "web_search": {
        "name": "web_search",
        "description": "Search the web for mathematical information not in knowledge base",
        "function": web_search_mcp,
        "parameters": {
            "query": "string - the search query",
            "max_results": "int - maximum number of results (default: 3)"
        }
    },
    "knowledge_base_search": {
        "name": "knowledge_base_search",
        "description": "Search the local knowledge base for similar questions",
        "function": None,  # Will be loaded from Notebook 2
        "parameters": {
            "query": "string - the question to search for",
            "top_k": "int - number of results (default: 3)"
        }
    }
}

print("✅ MCP Tool Registry created")
print(f"\n📋 Available Tools:")
for tool_name, tool_info in MCP_TOOLS.items():
    print(f"   - {tool_name}: {tool_info['description']}")

# ============================================================
# STEP 9: Create Enhanced Search with Fallback
# ============================================================
"""
Smart search that tries web search and handles failures gracefully
"""

def enhanced_web_search(query: str, max_retries: int = 2) -> Dict:
    """
    Web search with retry logic and fallback
    """
    for attempt in range(max_retries):
        try:
            results = web_search_mcp(query, max_results=3)
            if results['success']:
                return results

            # If failed, wait and retry
            if attempt < max_retries - 1:
                print(f"   ⚠️  Attempt {attempt + 1} failed, retrying...")
                import time
                time.sleep(1)

        except Exception as e:
            if attempt == max_retries - 1:
                return {
                    'success': False,
                    'query': query,
                    'answer': '',
                    'results': [],
                    'num_results': 0,
                    'error': f"All retry attempts failed: {str(e)}"
                }

    return {
        'success': False,
        'query': query,
        'answer': '',
        'results': [],
        'num_results': 0,
        'error': "Search failed after all retries"
    }

print("✅ Enhanced search with fallback created")

# ============================================================
# STEP 10: Test Questions NOT in Knowledge Base
# ============================================================
print("\n" + "="*60)
print("🧪 TESTING QUESTIONS NOT IN KNOWLEDGE BASE")
print("="*60)
print("These should trigger web search:\n")

non_kb_questions = [
    "What is Fermat's Last Theorem?",
    "Explain the Riemann Hypothesis",
    "How to solve partial differential equations?"
]

for question in non_kb_questions:
    print(f"\n📝 Question: {question}")
    print("-" * 60)

    results = enhanced_web_search(question, max_retries=1)

    if results['success']:
        print(f"✅ Web search successful")
        print(f"   Found: {results['num_results']} sources")
        if results['answer']:
            print(f"   Summary: {results['answer'][:150]}...")
    else:
        print(f"❌ Could not find information")
        print(f"   Error: {results['error']}")

# ============================================================
# STEP 11: Save MCP Configuration
# ============================================================
mcp_config = {
    'search_provider': 'Tavily',
    'max_results_default': 3,
    'search_depth': 'advanced',
    'tools_available': list(MCP_TOOLS.keys()),
    'fallback_enabled': True
}

# with open('data/mcp_config.json', 'w') as f:
#     json.dump(mcp_config, f, indent=2)

# print("\n✅ MCP configuration saved")



import os
import json
from pathlib import Path
# Get project root (parent of notebooks folder)
project_root = Path.cwd().parent  # assuming notebook is in 'notebooks/'

# Create data folder at project root
data_dir = project_root / "data"
data_dir.mkdir(exist_ok=True)

# Save MCP config
with open(data_dir / "mcp_config.json", "w") as f:
    json.dump(mcp_config, f, indent=2)
print(f"\n✅ MCP configuration saved to '{data_dir / 'mcp_config.json'}'")

# ============================================================
# STEP 12: Create Unified Search Function
# ============================================================
"""
This combines both KB and Web Search in one function
This will be used by the main agent
"""

def unified_search(query: str, kb_search_func=None, confidence_threshold=0.5):
    """
    Unified search that tries KB first, then web search

    Args:
        query: User's question
        kb_search_func: Function to search knowledge base (from Notebook 2)
        confidence_threshold: Minimum confidence to use KB

    Returns:
        Dict with search results and source information
    """
    result = {
        'query': query,
        'source': None,
        'content': None,
        'confidence': 0.0,
        'success': False
    }

    # Try knowledge base first if available
    if kb_search_func:
        kb_results = kb_search_func(query, top_k=1, score_threshold=0.0)

        if kb_results and kb_results[0]['score'] >= confidence_threshold:
            result['source'] = 'knowledge_base'
            result['content'] = kb_results[0]
            result['confidence'] = kb_results[0]['score']
            result['success'] = True
            return result

    # Fall back to web search
    web_results = enhanced_web_search(query)

    if web_results['success']:
        result['source'] = 'web_search'
        result['content'] = web_results
        result['confidence'] = 0.7  # Assume good confidence for successful web search
        result['success'] = True
    else:
        result['source'] = 'none'
        result['content'] = {'error': 'No information found'}
        result['confidence'] = 0.0
        result['success'] = False

    return result

print("✅ Unified search function created")

print("\n" + "="*60)
print("✅ NOTEBOOK 3 COMPLETE!")
print("="*60)
print("\n📝 WHAT YOU DID:")
print("   - Set up Tavily web search API")
print("   - Created MCP-style tool registry")
print("   - Built fallback and retry logic")
print("   - Tested with non-KB questions")
print("\n🔜 NEXT: Move to Notebook 4 (Main Routing Agent)")
print("="*60)

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: C:\Users\brije\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


✅ Tavily client initialized
✅ Web search function created
✅ Context extraction function created

🧪 TESTING WEB SEARCH

📝 Query: What is the Pythagorean theorem?
------------------------------------------------------------
✅ Found 2 results

📄 AI Summary:
The Pythagorean theorem states that in a right triangle, the square of the hypotenuse equals the sum of the squares of the other two sides. It is expressed as a^2 + b^2 = c^2, where c is the hypotenus...

🔗 Sources:
   1. Pythagorean theorem - Wikipedia
      https://en.wikipedia.org/wiki/Pythagorean_theorem
   2. Pythagorean Theorem Calculator
      https://www.calculator.net/pythagorean-theorem-calculator.html

📝 Query: How to solve differential equations?
------------------------------------------------------------
✅ Found 2 results

📄 AI Summary:
To solve differential equations, identify the type, choose a method (e.g., separation of variables), solve using the method, and express the general solution with arbitrary constants....

