# -------------------------------MCP F1 BOT SETUP-----------------------------------

In [1]:
import os
os.chdir("/Users/naveenkumar/Desktop/formula-1-bot")
%pwd

'/Users/naveenkumar/Desktop/formula-1-bot'

In [2]:
import os
import json
import asyncio
from typing import Dict, Any, List, Optional, Union
from dataclasses import dataclass, asdict
from datetime import datetime
import pandas as pd
from sqlalchemy import create_engine, text, inspect
from dotenv import load_dotenv
import warnings
warnings.filterwarnings('ignore')

# Load environment variables
load_dotenv()

print("✅ MCP F1 Bot dependencies loaded successfully!")

✅ MCP F1 Bot dependencies loaded successfully!


In [3]:
# Database configuration
DB_HOST = os.getenv('DB_HOST', '')
DB_PORT = os.getenv('DB_PORT', '')
DB_NAME = os.getenv('DB_NAME', '')
DB_USER = os.getenv('DB_USER', '')
DB_PASSWORD = os.getenv('DB_PASSWORD', '')

# Create SQLAlchemy engine
DATABASE_URL = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"

try:
    engine = create_engine(DATABASE_URL)
    # Test connection
    with engine.connect() as conn:
        result = conn.execute(text("SELECT version();"))
        print("✅ Database connection successful!")
        print(f"PostgreSQL version: {result.fetchone()[0]}")
except Exception as e:
    print(f"❌ Database connection failed: {e}")
    print("Please check your environment variables")

# Get database schema information
def get_db_schema():
    """Get comprehensive database schema information"""
    schema_info = {}
    try:
        inspector = inspect(engine)
        tables = inspector.get_table_names()
        
        for table in tables:
            columns = inspector.get_columns(table)
            schema_info[table] = {
                'columns': [{'name': col['name'], 'type': str(col['type'])} for col in columns],
                'primary_keys': inspector.get_pk_constraint(table)['constrained_columns'],
                'foreign_keys': [fk['constrained_columns'] for fk in inspector.get_foreign_keys(table)]
            }
        
        print(f"✅ Schema loaded for {len(tables)} tables")
        return schema_info
    except Exception as e:
        print(f"❌ Error loading schema: {e}")
        return {}

db_schema = get_db_schema()

✅ Database connection successful!
PostgreSQL version: PostgreSQL 14.18 (Homebrew) on aarch64-apple-darwin24.4.0, compiled by Apple clang version 17.0.0 (clang-1700.0.13.3), 64-bit
✅ Schema loaded for 18 tables


# ---- MCP TOOL DEFINITIONS ------

In [4]:
# Enhanced Specialized MCP Tools
@dataclass
class MCPTool:
    """Represents an MCP tool with its capabilities"""
    name: str
    description: str
    input_schema: Dict[str, Any]
    examples: List[Dict[str, Any]]

# Define comprehensive F1-specific tools
ENHANCED_F1_TOOLS = {
    # === CORE IDENTIFICATION TOOLS ===
    "get_meeting_key": MCPTool(
        name="get_meeting_key",
        description="Get the meeting_key for a specific race event by name and year",
        input_schema={
            "type": "object",
            "properties": {
                "event_name": {
                    "type": "string",
                    "description": "Name of the race event (e.g., 'Australian Grand Prix', 'Monaco Grand Prix')"
                },
                "year": {
                    "type": "integer",
                    "description": "Year of the event (e.g., 2025)"
                }
            },
            "required": ["event_name", "year"]
        },
        examples=[
            {
                "event_name": "Australian Grand Prix",
                "year": 2025,
                "explanation": "Get meeting key for 2025 Australian Grand Prix"
            }
        ]
    ),
    
    "get_session_key": MCPTool(
        name="get_session_key",
        description="Get the session_key for a specific session type within a meeting",
        input_schema={
            "type": "object",
            "properties": {
                "meeting_key": {
                    "type": "integer",
                    "description": "Meeting key from get_meeting_key"
                },
                "session_type": {
                    "type": "string",
                    "enum": ["Race", "Qualifying", "Practice 1", "Practice 2", "Practice 3", "Sprint", "Sprint Qualifying"],
                    "description": "Type of session"
                }
            },
            "required": ["meeting_key", "session_type"]
        },
        examples=[
            {
                "meeting_key": 1254,
                "session_type": "Race",
                "explanation": "Get session key for the race session"
            }
        ]
    ),
    
    # === PERFORMANCE ANALYSIS TOOLS ===
    "get_fastest_lap": MCPTool(
        name="get_fastest_lap",
        description="Get the fastest lap time and details for a specific session",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {
                    "type": "integer",
                    "description": "Session key from get_session_key"
                },
                "driver_filter": {
                    "type": "string",
                    "description": "Optional driver name to filter results"
                },
                "team_filter": {
                    "type": "string", 
                    "description": "Optional team name to filter results"
                }
            },
            "required": ["session_key"]
        },
        examples=[
            {
                "session_key": 9693,
                "explanation": "Get fastest lap of the session"
            },
            {
                "session_key": 9693,
                "driver_filter": "Max VERSTAPPEN",
                "explanation": "Get Verstappen's fastest lap"
            }
        ]
    ),
    
    "get_driver_performance": MCPTool(
        name="get_driver_performance",
        description="Get comprehensive performance data for a specific driver in a session",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "driver_name": {"type": "string"},
                "metrics": {
                    "type": "array",
                    "items": {"type": "string"},
                    "enum": ["lap_times", "consistency", "sector_times", "position_progression", "all"],
                    "description": "Specific metrics to retrieve"
                }
            },
            "required": ["session_key", "driver_name"]
        },
        examples=[
            {
                "session_key": 9693,
                "driver_name": "Max VERSTAPPEN",
                "metrics": ["all"],
                "explanation": "Get comprehensive performance data for Verstappen"
            }
        ]
    ),
    
    "get_team_performance": MCPTool(
        name="get_team_performance",
        description="Get performance data for all drivers in a team for a session",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "team_name": {"type": "string"},
                "metrics": {
                    "type": "array",
                    "items": {"type": "string"},
                    "enum": ["lap_times", "consistency", "positions", "all"]
                }
            },
            "required": ["session_key", "team_name"]
        },
        examples=[
            {
                "session_key": 9693,
                "team_name": "Red Bull Racing",
                "metrics": ["all"],
                "explanation": "Get Red Bull team performance"
            }
        ]
    ),
    
    # === COMPARISON TOOLS ===
    "compare_drivers": MCPTool(
        name="compare_drivers",
        description="Compare performance between two drivers in a session",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "driver1": {"type": "string"},
                "driver2": {"type": "string"},
                "comparison_metrics": {
                    "type": "array",
                    "items": {"type": "string"},
                    "enum": ["lap_times", "consistency", "sector_times", "positions", "pit_stops", "all"]
                }
            },
            "required": ["session_key", "driver1", "driver2"]
        },
        examples=[
            {
                "session_key": 9693,
                "driver1": "Max VERSTAPPEN",
                "driver2": "Lewis HAMILTON",
                "comparison_metrics": ["all"],
                "explanation": "Compare Verstappen vs Hamilton performance"
            }
        ]
    ),
    
    "compare_teams": MCPTool(
        name="compare_teams",
        description="Compare performance between two teams in a session",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "team1": {"type": "string"},
                "team2": {"type": "string"},
                "comparison_metrics": {
                    "type": "array",
                    "items": {"type": "string"},
                    "enum": ["best_lap", "avg_lap", "consistency", "positions", "all"]
                }
            },
            "required": ["session_key", "team1", "team2"]
        },
        examples=[
            {
                "session_key": 9693,
                "team1": "Red Bull Racing",
                "team2": "Mercedes",
                "comparison_metrics": ["all"],
                "explanation": "Compare Red Bull vs Mercedes performance"
            }
        ]
    ),
    
    # === RESULTS TOOLS ===
    "get_race_results": MCPTool(
        name="get_race_results",
        description="Get final race results with positions, lap times, and other details",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "result_type": {
                    "type": "string",
                    "enum": ["full_results", "top_10", "podium", "winner_only", "dnf_list"],
                    "description": "Level of detail for results"
                },
                "include_lap_times": {
                    "type": "boolean",
                    "description": "Whether to include lap time statistics"
                }
            },
            "required": ["session_key"]
        },
        examples=[
            {
                "session_key": 9693,
                "result_type": "podium",
                "include_lap_times": True,
                "explanation": "Get podium finishers with lap time details"
            }
        ]
    ),
    
    "get_qualifying_results": MCPTool(
        name="get_qualifying_results",
        description="Get qualifying results with best lap times and positions",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "result_type": {
                    "type": "string",
                    "enum": ["full_results", "top_10", "q3_only", "eliminated_q1", "eliminated_q2"],
                    "description": "Level of detail for results"
                }
            },
            "required": ["session_key"]
        },
        examples=[
            {
                "session_key": 9693,
                "result_type": "top_10",
                "explanation": "Get top 10 qualifying results"
            }
        ]
    ),
    
    # === STRATEGY TOOLS ===
    "get_pit_stop_analysis": MCPTool(
        name="get_pit_stop_analysis",
        description="Analyze pit stop strategy and performance for drivers/teams",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "driver_filter": {"type": "string"},
                "team_filter": {"type": "string"},
                "analysis_type": {
                    "type": "string",
                    "enum": ["timing", "strategy", "comparison", "all"],
                    "description": "Type of pit stop analysis"
                }
            },
            "required": ["session_key"]
        },
        examples=[
            {
                "session_key": 9693,
                "analysis_type": "all",
                "explanation": "Get comprehensive pit stop analysis"
            }
        ]
    ),
    
    "get_tire_strategy": MCPTool(
        name="get_tire_strategy",
        description="Analyze tire strategy and stint information for drivers/teams",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "driver_filter": {"type": "string"},
                "team_filter": {"type": "string"},
                "strategy_type": {
                    "type": "string",
                    "enum": ["stint_analysis", "compound_usage", "strategy_comparison", "all"],
                    "description": "Type of tire strategy analysis"
                }
            },
            "required": ["session_key"]
        },
        examples=[
            {
                "session_key": 9693,
                "strategy_type": "all",
                "explanation": "Get comprehensive tire strategy analysis"
            }
        ]
    ),
    
    # === INCIDENT ANALYSIS TOOLS ===
    "investigate_incident": MCPTool(
        name="investigate_incident",
        description="Investigate incidents, slow laps, or unusual performance patterns",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "driver_name": {"type": "string"},
                "lap_number": {"type": "integer"},
                "investigation_type": {
                    "type": "string",
                    "enum": ["slow_lap", "incident", "unusual_pattern", "sector_analysis", "all"],
                    "description": "Type of investigation"
                },
                "context_laps": {
                    "type": "integer",
                    "description": "Number of laps before/after to include for context",
                    "default": 3
                }
            },
            "required": ["session_key", "driver_name"]
        },
        examples=[
            {
                "session_key": 9693,
                "driver_name": "Lando NORRIS",
                "lap_number": 67,
                "investigation_type": "all",
                "context_laps": 5,
                "explanation": "Investigate Norris's performance around lap 67"
            }
        ]
    ),
    
    # === POSITION ANALYSIS TOOLS ===
    "get_position_progression": MCPTool(
        name="get_position_progression",
        description="Track position changes throughout the session for drivers",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "driver_filter": {"type": "string"},
                "team_filter": {"type": "string"},
                "progression_type": {
                    "type": "string",
                    "enum": ["lap_by_lap", "key_moments", "overtakes", "all"],
                    "description": "Type of position progression analysis"
                }
            },
            "required": ["session_key"]
        },
        examples=[
            {
                "session_key": 9693,
                "progression_type": "all",
                "explanation": "Get comprehensive position progression analysis"
            }
        ]
    ),
    
    # === SECTOR ANALYSIS TOOLS ===
    "get_sector_analysis": MCPTool(
        name="get_sector_analysis",
        description="Analyze sector times and identify strengths/weaknesses",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"},
                "driver_filter": {"type": "string"},
                "team_filter": {"type": "string"},
                "sector_analysis_type": {
                    "type": "string",
                    "enum": ["best_sectors", "sector_comparison", "sector_consistency", "all"],
                    "description": "Type of sector analysis"
                }
            },
            "required": ["session_key"]
        },
        examples=[
            {
                "session_key": 9693,
                "sector_analysis_type": "all",
                "explanation": "Get comprehensive sector analysis"
            }
        ]
    ),
    
    # === UTILITY TOOLS ===
    "explore_schema": MCPTool(
        name="explore_schema",
        description="Get information about database tables, columns, and relationships",
        input_schema={
            "type": "object",
            "properties": {
                "table_name": {
                    "type": "string",
                    "description": "Name of the table to explore (optional)"
                },
                "detail_level": {
                    "type": "string",
                    "enum": ["overview", "columns", "sample_data", "relationships"],
                    "description": "Level of detail to return"
                }
            }
        },
        examples=[
            {
                "table_name": "laps_transformed",
                "detail_level": "columns",
                "explanation": "Get detailed column information for laps table"
            }
        ]
    ),
    
    "get_session_info": MCPTool(
        name="get_session_info",
        description="Get basic information about a session including type, date, and participants",
        input_schema={
            "type": "object",
            "properties": {
                "session_key": {"type": "integer"}
            },
            "required": ["session_key"]
        },
        examples=[
            {
                "session_key": 9693,
                "explanation": "Get basic session information"
            }
        ]
    )
}

print(f"✅ {len(ENHANCED_F1_TOOLS)} Enhanced MCP tools defined successfully!")
print("📋 Tool Categories:")
print("  • Core Identification: get_meeting_key, get_session_key")
print("  • Performance Analysis: get_fastest_lap, get_driver_performance, get_team_performance")
print("  • Comparison: compare_drivers, compare_teams")
print("  • Results: get_race_results, get_qualifying_results")
print("  • Strategy: get_pit_stop_analysis, get_tire_strategy")
print("  • Incident Analysis: investigate_incident")
print("  • Position Analysis: get_position_progression")
print("  • Sector Analysis: get_sector_analysis")
print("  • Utility: explore_schema, get_session_info")

✅ 16 Enhanced MCP tools defined successfully!
📋 Tool Categories:
  • Core Identification: get_meeting_key, get_session_key
  • Performance Analysis: get_fastest_lap, get_driver_performance, get_team_performance
  • Comparison: compare_drivers, compare_teams
  • Results: get_race_results, get_qualifying_results
  • Strategy: get_pit_stop_analysis, get_tire_strategy
  • Incident Analysis: investigate_incident
  • Position Analysis: get_position_progression
  • Sector Analysis: get_sector_analysis
  • Utility: explore_schema, get_session_info


# MCP TOOL IMPLEMENTATIONS

In [5]:
# Enhanced MCP Tool Implementations
class EnhancedMCPToolExecutor:
    """Executes enhanced MCP tools with specialized functionality"""
    
    """Corrected tool executor that matches the actual database schema"""
    
    def __init__(self, engine, db_schema):
        self.engine = engine
        self.db_schema = db_schema
    
    async def execute_tool(self, tool_name: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """Execute a specific enhanced MCP tool"""
        try:
            # Core Identification Tools
            if tool_name == "get_meeting_key":
                return await self._get_meeting_key(parameters)
            elif tool_name == "get_session_key":
                return await self._get_session_key(parameters)
            
            # Performance Analysis Tools
            elif tool_name == "get_fastest_lap":
                return await self._get_fastest_lap(parameters)
            elif tool_name == "get_driver_performance":
                return await self._get_driver_performance(parameters)
            elif tool_name == "get_team_performance":
                return await self._get_team_performance(parameters)
            
            # Comparison Tools
            elif tool_name == "compare_drivers":
                return await self._compare_drivers(parameters)
            elif tool_name == "compare_teams":
                return await self._compare_teams(parameters)
            
            # Results Tools
            elif tool_name == "get_race_results":
                return await self._get_race_results(parameters)
            elif tool_name == "get_qualifying_results":
                return await self._get_qualifying_results(parameters)
            
            # Strategy Tools
            elif tool_name == "get_pit_stop_analysis":
                return await self._get_pit_stop_analysis(parameters)
            elif tool_name == "get_tire_strategy":
                return await self._get_tire_strategy(parameters)
            
            # Incident Analysis Tools
            elif tool_name == "investigate_incident":
                return await self._investigate_incident(parameters)
            
            # Position Analysis Tools
            elif tool_name == "get_position_progression":
                return await self._get_position_progression(parameters)
            
            # Sector Analysis Tools
            elif tool_name == "get_sector_analysis":
                return await self._get_sector_analysis(parameters)
            
            # Utility Tools
            elif tool_name == "explore_schema":
                return await self._explore_schema(parameters)
            elif tool_name == "get_session_info":
                return await self._get_session_info(parameters)
            
            else:
                return {"error": f"Unknown tool: {tool_name}"}
        except Exception as e:
            return {"error": f"Tool execution failed: {str(e)}"}
    
    async def _get_meeting_key(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Get meeting key for a specific event"""
        event_name = params.get("event_name")
        year = params.get("year")
        
        query = """
        SELECT meeting_key, meeting_name, country_name, date_start, year
        FROM meetings 
        WHERE LOWER(meeting_name) LIKE LOWER(:event_name)
        AND year = :year
        ORDER BY date_start DESC
        LIMIT 1
        """
        
        try:
            with self.engine.connect() as conn:
                result = conn.execute(text(query), {"event_name": f"%{event_name}%", "year": year})
                row = result.fetchone()
                
                if row:
                    return {
                        "success": True,
                        "meeting_key": row[0],
                        "meeting_name": row[1],
                        "country_name": row[2],
                        "date": str(row[3]),
                        "year": row[4]
                    }
                else:
                    return {
                        "success": False,
                        "error": f"No meeting found for '{event_name}' in {year}"
                    }
        except Exception as e:
            return {"success": False, "error": str(e)}
    
    async def _get_session_key(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Get session key for a specific session type with enhanced logic"""
        meeting_key = params.get("meeting_key")
        session_type = params.get("session_type")
        
        try:
            with self.engine.connect() as conn:
                # First, get all available sessions for this meeting
                sessions_query = f"""
                SELECT session_name, session_type 
                FROM sessions_transformed 
                WHERE meeting_key = {meeting_key}
                ORDER BY session_name
                """
                result = conn.execute(text(sessions_query))
                available_sessions = [row[0] for row in result.fetchall()]
                
                # Check if the requested session exists - FIXED FOR QUALIFYING
                if session_type == "Qualifying" or "qualifying" in session_type.lower():
                    # Special handling for qualifying sessions
                    qualifying_query = f"""
                    SELECT DISTINCT session_name, session_type 
                    FROM sessions_transformed 
                    WHERE meeting_key = {meeting_key}
                    AND session_name LIKE '%Qualifying%'
                    ORDER BY session_name
                    """
                    result = conn.execute(text(qualifying_query))
                    qualifying_sessions = result.fetchall()
                    
                    # Check for Sprint Race sessions
                    sprint_query = f"""
                    SELECT DISTINCT session_name, session_type 
                    FROM sessions_transformed 
                    WHERE meeting_key = {meeting_key}
                    AND session_name LIKE '%Sprint%'
                    ORDER BY session_name
                    """
                    result = conn.execute(text(sprint_query))
                    sprint_sessions = result.fetchall()
                    
                    # Try to find exact match first
                    exact_match = None
                    all_sessions = qualifying_sessions + sprint_sessions
                    for session_name, session_type_db in all_sessions:
                        if session_type.lower() in session_name.lower():
                            exact_match = session_name
                            break
                    
                    if exact_match:
                        session_query = f"""
                        SELECT session_key, session_name, session_type, date_start, date_end
                        FROM sessions_transformed 
                        WHERE meeting_key = {meeting_key} 
                        AND session_name = '{exact_match}'
                        """
                        result = conn.execute(text(session_query))
                        row = result.fetchone()
                        if row:
                            return {
                                "success": True,
                                "session_key": row[0],
                                "session_name": row[1],
                                "session_type": row[2],
                                "date_start": str(row[3]),
                                "date_end": str(row[4]) if row[4] else None,
                                "meeting_key": meeting_key,
                                "sql_query": session_query,
                                "sql_params": {"meeting_key": meeting_key, "session_name": exact_match}
                            }
                else:
                    # Regular session handling (non-qualifying)
                    session_query = f"""
                    SELECT session_key, session_name, session_type, date_start, date_end
                    FROM sessions_transformed 
                    WHERE meeting_key = {meeting_key} 
                    AND UPPER(session_name) LIKE UPPER('%{session_type}%')
                    ORDER BY date_start ASC
                    LIMIT 1
                    """
                    result = conn.execute(text(session_query))
                    row = result.fetchone()
                    if row:
                        return {
                            "success": True,
                            "session_key": row[0],
                            "session_name": row[1],
                            "session_type": row[2],
                            "date_start": str(row[3]),
                            "date_end": str(row[4]) if row[4] else None,
                            "meeting_key": meeting_key,
                            "sql_query": session_query,
                            "sql_params": {"meeting_key": meeting_key, "session_type": session_type}
                        }
                
                return {
                    "success": False,
                    "error": f"No {session_type} session found for meeting {meeting_key}. Available: {', '.join(available_sessions)}",
                    "sql_query": sessions_query,
                    "sql_params": {"meeting_key": meeting_key}
                }
        except Exception as e:
            return {"success": False, "error": str(e)}
    
    async def _get_fastest_lap(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Get fastest lap time and details"""
        session_key = params.get("session_key")
        driver_filter = params.get("driver_filter")
        team_filter = params.get("team_filter")
        
        # Build WHERE clause based on filters
        where_conditions = ["l.session_key = :session_key"]
        query_params = {"session_key": session_key}
        
        if driver_filter:
            where_conditions.append("UPPER(d.full_name) = UPPER(:driver_filter)")
            query_params["driver_filter"] = driver_filter
        
        if team_filter:
            where_conditions.append("UPPER(d.team_name) = UPPER(:team_filter)")
            query_params["team_filter"] = team_filter
        
        where_clause = " AND ".join(where_conditions)

        query = f"""
            SELECT 
                d.full_name,
                d.team_name,
                l.lap_number,
                l.lap_duration,
                l.duration_sector_1,
                l.duration_sector_2,
                l.duration_sector_3,
                l.had_incident,
                l.safety_car_lap,
                l.is_outlier
            FROM laps_transformed l
            JOIN drivers_transformed d ON l.driver_number = d.driver_number 
                AND l.meeting_key = d.meeting_key
                AND l.session_key = d.session_key
            WHERE l.session_key = {session_key}
            {f"AND UPPER(d.full_name) = UPPER('{driver_filter}')" if driver_filter else ""}
            {f"AND UPPER(d.team_name) = UPPER('{team_filter}')" if team_filter else ""}
            AND l.lap_duration IS NOT NULL
            AND l.lap_duration > 0
            AND COALESCE(l.is_outlier, false) = false
            ORDER BY l.lap_duration ASC
            LIMIT 1
            """
        
        try:
            with self.engine.connect() as conn:
                result = conn.execute(text(query), query_params)
                row = result.fetchone()
                
                if row:
                    return {
                        "success": True,
                        "driver": row[0],
                        "team": row[1],
                        "lap_number": row[2],
                        "lap_duration": float(row[3]),
                        "sector_1": float(row[4]) if row[4] else None,
                        "sector_2": float(row[5]) if row[5] else None,
                        "sector_3": float(row[6]) if row[6] else None,
                        "had_incident": row[7],
                        "safety_car_lap": row[8],
                        "is_outlier": row[9]
                    }
                else:
                    return {
                        "success": False,
                        "error": "No valid lap times found"
                    }
        except Exception as e:
            return {"success": False, "error": str(e)}

    async def _get_driver_performance(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Get comprehensive driver performance data with enhanced query"""
        session_key = params.get("session_key")
        driver_name = params.get("driver_name")
        metrics = params.get("metrics", ["all"])
        
        query = f"""
        SELECT 
            d.full_name,
            d.team_name,
            s.session_name,
            COUNT(DISTINCT l.lap_number) AS total_laps,
            ROUND(AVG(l.lap_duration)::numeric, 3) AS avg_lap,
            MIN(l.lap_duration) AS best_lap,
            MAX(l.lap_duration) AS worst_lap,
            ROUND(STDDEV(l.lap_duration)::numeric, 3) AS consistency,
            COUNT(CASE WHEN l.had_incident = true THEN 1 END) AS incidents,
            ROUND(AVG(l.duration_sector_1)::numeric, 3) AS avg_sector_1,
            ROUND(AVG(l.duration_sector_2)::numeric, 3) AS avg_sector_2,
            ROUND(AVG(l.duration_sector_3)::numeric, 3) AS avg_sector_3
        FROM laps_transformed l
        JOIN drivers_transformed d 
            ON l.driver_number = d.driver_number 
            AND l.meeting_key = d.meeting_key
            AND l.session_key = d.session_key
        JOIN sessions_transformed s 
            ON l.session_key = s.session_key 
            AND l.meeting_key = s.meeting_key
        WHERE l.session_key = {session_key}
        AND UPPER(d.full_name) = UPPER('{driver_name}')
        AND l.lap_duration IS NOT NULL
        AND l.lap_duration > 0
        AND COALESCE(l.is_outlier, false) = false
        GROUP BY d.full_name, d.team_name, s.session_name
        """
        
        try:
            with self.engine.connect() as conn:
                result = conn.execute(text(query))
                row = result.fetchone()
                
                if row:
                    return {
                        "success": True,
                        "driver": row[0],
                        "team": row[1],
                        "session_name": row[2],
                        "total_laps": row[3],
                        "avg_lap": float(row[4]) if row[4] else None,
                        "best_lap": float(row[5]) if row[5] else None,
                        "worst_lap": float(row[6]) if row[6] else None,
                        "consistency": float(row[7]) if row[7] else None,
                        "incidents": row[8],
                        "avg_sector_1": float(row[9]) if row[9] else None,
                        "avg_sector_2": float(row[10]) if row[10] else None,
                        "avg_sector_3": float(row[11]) if row[11] else None,
                        "sql_query": query,
                        "sql_params": {"session_key": session_key, "driver_name": driver_name}
                    }
                else:
                    return {
                        "success": False,
                        "error": f"No performance data found for {driver_name}",
                        "sql_query": query,
                        "sql_params": {"session_key": session_key, "driver_name": driver_name}
                    }
        except Exception as e:
            return {"success": False, "error": str(e), "sql_query": query}
        
    async def _compare_drivers(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Compare performance between two drivers with enhanced queries"""
        session_key = params.get("session_key")
        driver1 = params.get("driver1")
        driver2 = params.get("driver2")
        comparison_metrics = params.get("comparison_metrics", ["all"])
        
        query = f"""
        SELECT 
            d.full_name,
            d.team_name,
            COUNT(DISTINCT l.lap_number) AS total_laps,
            ROUND(AVG(l.lap_duration)::numeric, 3) AS avg_lap,
            MIN(l.lap_duration) AS best_lap,
            MAX(l.lap_duration) AS worst_lap,
            ROUND(STDDEV(l.lap_duration)::numeric, 3) AS consistency,
            COUNT(CASE WHEN l.had_incident = true THEN 1 END) AS incidents
        FROM laps_transformed l
        JOIN drivers_transformed d 
            ON l.driver_number = d.driver_number 
            AND l.meeting_key = d.meeting_key
            AND l.session_key = d.session_key
        WHERE 
            (UPPER(d.full_name) = UPPER('{driver1}') OR UPPER(d.full_name) = UPPER('{driver2}'))
            AND l.session_key = {session_key}
            AND l.lap_duration IS NOT NULL
            AND l.lap_duration > 0
            AND COALESCE(l.is_outlier, false) = false
        GROUP BY d.full_name, d.team_name
        ORDER BY avg_lap ASC
        """
        
        try:
            with self.engine.connect() as conn:
                result = conn.execute(text(query))
                rows = result.fetchall()
                
                if rows:
                    driver_data = []
                    for row in rows:
                        driver_data.append({
                            "driver": row[0],
                            "team": row[1],
                            "total_laps": row[2],
                            "avg_lap": float(row[3]) if row[3] else None,
                            "best_lap": float(row[4]) if row[4] else None,
                            "worst_lap": float(row[5]) if row[5] else None,
                            "consistency": float(row[6]) if row[6] else None,
                            "incidents": row[7]
                        })
                    
                    return {
                        "success": True,
                        "comparison": driver_data,
                        "driver1": driver1,
                        "driver2": driver2,
                        "sql_query": query,
                        "sql_params": {"session_key": session_key, "driver1": driver1, "driver2": driver2}
                    }
                else:
                    return {
                        "success": False,
                        "error": f"No comparison data found for {driver1} vs {driver2}",
                        "sql_query": query,
                        "sql_params": {"session_key": session_key, "driver1": driver1, "driver2": driver2}
                    }
        except Exception as e:
            return {"success": False, "error": str(e), "sql_query": query}

    async def _get_race_results(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Get race results with positions and details using positions_transformed"""
        session_key = params.get("session_key")
        result_type = params.get("result_type", "full_results")
        include_lap_times = params.get("include_lap_times", False)
        
        # Determine limit based on result type
        limit_clause = ""
        if result_type == "top_10":
            limit_clause = "LIMIT 10"
        elif result_type == "podium":
            limit_clause = "LIMIT 3"
        elif result_type == "winner_only":
            limit_clause = "LIMIT 1"
        
        # Use positions_transformed for actual race results
        if include_lap_times:
            query = f"""
            WITH final_positions AS (
                SELECT *,
                    ROW_NUMBER() OVER (PARTITION BY driver_number ORDER BY date DESC) AS rn
                FROM positions_transformed
                WHERE session_key = {session_key}
            ),
            driver_info AS (
                SELECT *,
                    ROW_NUMBER() OVER (PARTITION BY driver_number, meeting_key ORDER BY id) AS rn
                FROM drivers_transformed
                WHERE meeting_key = (SELECT meeting_key FROM sessions_transformed WHERE session_key = {session_key})
            )
            SELECT 
                d.full_name,
                d.team_name,
                p.position AS finish_position,
                MIN(l.lap_duration) AS best_lap,
                ROUND(AVG(l.lap_duration)::numeric, 3) AS avg_lap,
                COUNT(l.lap_number) AS total_laps
            FROM final_positions p
            JOIN driver_info d 
                ON p.driver_number = d.driver_number AND p.meeting_key = d.meeting_key
            LEFT JOIN laps_transformed l ON p.driver_number = l.driver_number 
                AND p.meeting_key = l.meeting_key 
                AND p.session_key = l.session_key
            WHERE p.rn = 1
            AND d.rn = 1
            AND p.position IS NOT NULL
            GROUP BY d.full_name, d.team_name, p.position
            ORDER BY p.position ASC
            {limit_clause}
            """
        else:
            query = f"""
            WITH final_positions AS (
                SELECT *,
                    ROW_NUMBER() OVER (PARTITION BY driver_number ORDER BY date DESC) AS rn
                FROM positions_transformed
                WHERE session_key = {session_key}
            ),
            driver_info AS (
                SELECT *,
                    ROW_NUMBER() OVER (PARTITION BY driver_number, meeting_key ORDER BY id) AS rn
                FROM drivers_transformed
                WHERE meeting_key = (SELECT meeting_key FROM sessions_transformed WHERE session_key = {session_key})
            )
            SELECT 
                d.full_name,
                d.team_name,
                p.position AS finish_position
            FROM final_positions p
            JOIN driver_info d 
                ON p.driver_number = d.driver_number AND p.meeting_key = d.meeting_key
            WHERE p.rn = 1
            AND d.rn = 1
            AND p.position IS NOT NULL
            ORDER BY p.position ASC
            {limit_clause}
            """
        
        try:
            with self.engine.connect() as conn:
                result = conn.execute(text(query))
                rows = result.fetchall()
                
                if rows:
                    results = []
                    for row in rows:
                        result_dict = {
                            "driver": row[0],
                            "team": row[1],
                            "position": row[2]
                        }
                        if include_lap_times and len(row) > 3:
                            result_dict.update({
                                "best_lap": float(row[3]) if row[3] else None,
                                "avg_lap": float(row[4]) if row[4] else None,
                                "total_laps": row[5]
                            })
                        results.append(result_dict)
                    
                    return {
                        "success": True,
                        "result_type": result_type,
                        "results": results,
                        "sql_query": query,
                        "sql_params": {"session_key": session_key, "result_type": result_type}
                    }
                else:
                    return {
                        "success": False,
                        "error": "No race results found",
                        "sql_query": query,
                        "sql_params": {"session_key": session_key}
                    }
        except Exception as e:
            return {"success": False, "error": str(e), "sql_query": query}

    
    async def _get_pit_stop_analysis(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Analyze pit stop strategy and performance"""
        session_key = params.get("session_key")
        driver_filter = params.get("driver_filter")
        team_filter = params.get("team_filter")
        analysis_type = params.get("analysis_type", "all")
        
        # Build WHERE clause
        where_conditions = ["ps.session_key = :session_key"]
        query_params = {"session_key": session_key}
        
        if driver_filter:
            where_conditions.append("UPPER(d.full_name) = UPPER(:driver_filter)")
            query_params["driver_filter"] = driver_filter
        
        if team_filter:
            where_conditions.append("UPPER(d.team_name) = UPPER(:team_filter)")
            query_params["team_filter"] = team_filter
        
        where_clause = " AND ".join(where_conditions)

        query = f"""
        SELECT 
            d.full_name,
            d.team_name,
            COUNT(*) AS total_stops,
            ROUND(AVG(ps.pit_duration)::numeric, 2) AS avg_pit_time,
            MIN(ps.pit_duration) AS fastest_stop,
            MAX(ps.pit_duration) AS slowest_stop,
            MIN(ps.lap_number) AS first_stop,
            MAX(ps.lap_number) AS last_stop,
            COUNT(CASE WHEN ps.long_pit_stop = true THEN 1 END) AS long_stops
        FROM pit_stops_transformed ps
        JOIN drivers_transformed d ON ps.driver_number = d.driver_number 
            AND ps.meeting_key = d.meeting_key
            AND ps.session_key = d.session_key
        WHERE ps.session_key = {session_key}
        {f"AND UPPER(d.full_name) = UPPER('{driver_filter}')" if driver_filter else ""}
        {f"AND UPPER(d.team_name) = UPPER('{team_filter}')" if team_filter else ""}
        GROUP BY d.full_name, d.team_name
        ORDER BY avg_pit_time ASC
        """
        
        try:
            with self.engine.connect() as conn:
                result = conn.execute(text(query), query_params)
                rows = result.fetchall()
                
                if rows:
                    pit_stops = []
                    for row in rows:
                        pit_stops.append({
                            "driver": row[0],
                            "team": row[1],
                            "total_stops": row[2],
                            "avg_pit_time": float(row[3]) if row[3] else None,
                            "fastest_stop": float(row[4]) if row[4] else None,
                            "slowest_stop": float(row[5]) if row[5] else None,
                            "first_stop": row[6],
                            "last_stop": row[7],
                            "long_stops": row[8]
                        })
                    
                    return {
                        "success": True,
                        "analysis_type": analysis_type,
                        "pit_stops": pit_stops
                    }
                else:
                    return {
                        "success": False,
                        "error": "No pit stop data found"
                    }
        except Exception as e:
            return {"success": False, "error": str(e)}
    
    async def _investigate_incident(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Investigate incidents or unusual performance patterns"""
        session_key = params.get("session_key")
        driver_name = params.get("driver_name")
        lap_number = params.get("lap_number")
        investigation_type = params.get("investigation_type", "all")
        context_laps = params.get("context_laps", 3)
        
        # Build lap range for context
        lap_start = lap_number - context_laps if lap_number else 1
        lap_end = lap_number + context_laps if lap_number else 999
        
        query = f"""
        SELECT 
            l.lap_number,
            l.lap_duration,
            l.duration_sector_1,
            l.duration_sector_2,
            l.duration_sector_3,
            l.had_incident,
            l.safety_car_lap,
            l.is_outlier,
            d.full_name,
            d.team_name
        FROM laps_transformed l
        JOIN drivers_transformed d ON l.driver_number = d.driver_number 
            AND l.meeting_key = d.meeting_key
            AND l.session_key = d.session_key
        WHERE l.session_key = {session_key}
        AND UPPER(d.full_name) = UPPER('{driver_name}')
        AND l.lap_number BETWEEN {lap_start} AND {lap_end}
        ORDER BY l.lap_number
        """
        
        try:
            with self.engine.connect() as conn:
                result = conn.execute(text(query), {
                    "session_key": session_key,
                    "driver_name": driver_name,
                    "lap_start": lap_start,
                    "lap_end": lap_end
                })
                rows = result.fetchall()
                
                if rows:
                    laps = []
                    for row in rows:
                        laps.append({
                            "lap_number": row[0],
                            "lap_duration": float(row[1]) if row[1] else None,
                            "sector_1": float(row[2]) if row[2] else None,
                            "sector_2": float(row[3]) if row[3] else None,
                            "sector_3": float(row[4]) if row[4] else None,
                            "had_incident": row[5],
                            "safety_car_lap": row[6],
                            "is_outlier": row[7],
                            "driver": row[8],
                            "team": row[9]
                        })
                    
                    return {
                        "success": True,
                        "investigation_type": investigation_type,
                        "target_lap": lap_number,
                        "context_laps": context_laps,
                        "laps": laps
                    }
                else:
                    return {
                        "success": False,
                        "error": f"No lap data found for {driver_name}"
                    }
        except Exception as e:
            return {"success": False, "error": str(e)}
    
    async def _explore_schema(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Explore database schema"""
        table_name = params.get("table_name")
        detail_level = params.get("detail_level", "overview")
        
        if table_name:
            if table_name not in self.db_schema:
                return {"error": f"Table '{table_name}' not found"}
            
            table_info = self.db_schema[table_name]
            
            if detail_level == "overview":
                return {
                    "table_name": table_name,
                    "columns": len(table_info['columns']),
                    "primary_keys": table_info['primary_keys'],
                    "foreign_keys": table_info['foreign_keys']
                }
            elif detail_level == "columns":
                return {
                    "table_name": table_name,
                    "columns": table_info['columns'],
                    "primary_keys": table_info['primary_keys'],
                    "foreign_keys": table_info['foreign_keys']
                }
            else:
                return {"error": f"Unsupported detail level: {detail_level}"}
        else:
            # Return overview of all tables
            tables_overview = {}
            for table, info in self.db_schema.items():
                tables_overview[table] = {
                    "columns": len(info['columns']),
                    "primary_keys": info['primary_keys']
                }
            return {
                "total_tables": len(tables_overview),
                "tables": tables_overview
            }
    
    async def _get_session_info(self, params: Dict[str, Any]) -> Dict[str, Any]:
        """Get basic session information"""
        session_key = params.get("session_key")
        
        query = f"""
        SELECT 
            s.session_name,
            s.session_type,
            s.date_start,
            s.date_end,
            m.meeting_name,
            m.country_name
        FROM sessions_transformed s
        JOIN meetings m ON s.meeting_key = m.meeting_key
        WHERE s.session_key = {session_key}
        """

        try:
            with self.engine.connect() as conn:
                result = conn.execute(text(query), {"session_key": session_key})
                row = result.fetchone()
                
                if row:
                    return {
                        "success": True,
                        "session_name": row[0],
                        "session_type": row[1],
                        "date_start": str(row[2]),
                        "date_end": str(row[3]) if row[3] else None,
                        "meeting_name": row[4],
                        "country_name": row[5]
                    }
                else:
                    return {
                        "success": False,
                        "error": f"No session found with key {session_key}"
                    }
        except Exception as e:
            return {"success": False, "error": str(e)}
    
    # Placeholder methods for remaining tools
    async def _get_team_performance(self, params: Dict[str, Any]) -> Dict[str, Any]:
        return {"success": False, "error": "Not implemented yet"}
    
    async def _compare_teams(self, params: Dict[str, Any]) -> Dict[str, Any]:
        return {"success": False, "error": "Not implemented yet"}
    
    async def _get_qualifying_results(self, params: Dict[str, Any]) -> Dict[str, Any]:
        return {"success": False, "error": "Not implemented yet"}
    
    async def _get_tire_strategy(self, params: Dict[str, Any]) -> Dict[str, Any]:
        return {"success": False, "error": "Not implemented yet"}
    
    async def _get_position_progression(self, params: Dict[str, Any]) -> Dict[str, Any]:
        return {"success": False, "error": "Not implemented yet"}
    
    async def _get_sector_analysis(self, params: Dict[str, Any]) -> Dict[str, Any]:
        return {"success": False, "error": "Not implemented yet"}

# Initialize the enhanced executor
enhanced_executor = EnhancedMCPToolExecutor(engine, db_schema)
print("✅ Enhanced MCP Tool Executor initialized!")

✅ Enhanced MCP Tool Executor initialized!


# MCP SERVER SETUP

In [6]:
# Updated HTTP Server Implementation with Enhanced Tools
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Dict, Any, List, Optional
import uvicorn
import asyncio
import json
import threading
import time

# Create FastAPI app
app = FastAPI(
    title="F1 MCP Server", 
    version="2.0.0",
    description="Enhanced MCP Server for Formula 1 data analysis with specialized tools"
)

# Pydantic models for API requests/responses
class ToolCallRequest(BaseModel):
    name: str
    arguments: Dict[str, Any]

class ToolCallResponse(BaseModel):
    success: bool
    data: Dict[str, Any]
    error: str = None

class ToolInfo(BaseModel):
    name: str
    description: str
    inputSchema: Dict[str, Any]
    examples: List[Dict[str, Any]] = []

# Initialize enhanced tool executor globally
enhanced_tool_executor = None
server_running = False
server_thread = None

@app.on_event("startup")
async def startup_event():
    """Initialize the enhanced tool executor when the server starts"""
    global enhanced_tool_executor
    enhanced_tool_executor = EnhancedMCPToolExecutor(engine, db_schema)
    print("✅ Enhanced F1 MCP Server started with specialized tool executor!")

@app.get("/")
async def root():
    """Root endpoint"""
    return {
        "message": "Enhanced F1 MCP Server is running!",
        "version": "2.0.0",
        "endpoints": {
            "tools": "/tools",
            "call_tool": "/call_tool",
            "health": "/health",
            "docs": "/docs",
            "schema": "/schema"
        },
        "tool_categories": {
            "core_identification": ["get_meeting_key", "get_session_key"],
            "performance_analysis": ["get_fastest_lap", "get_driver_performance", "get_team_performance"],
            "comparison": ["compare_drivers", "compare_teams"],
            "results": ["get_race_results", "get_qualifying_results"],
            "strategy": ["get_pit_stop_analysis", "get_tire_strategy"],
            "incident_analysis": ["investigate_incident"],
            "position_analysis": ["get_position_progression"],
            "sector_analysis": ["get_sector_analysis"],
            "utility": ["explore_schema", "get_session_info"]
        }
    }

@app.get("/tools")
async def list_tools():
    """List all available enhanced tools"""
    tools = [
        # === CORE IDENTIFICATION TOOLS ===
        ToolInfo(
            name="get_meeting_key",
            description="Get the meeting_key for a specific race event by name and year",
            inputSchema={
                "type": "object",
                "properties": {
                    "event_name": {
                        "type": "string",
                        "description": "Name of the race event (e.g., 'Australian Grand Prix', 'Monaco Grand Prix')"
                    },
                    "year": {
                        "type": "integer",
                        "description": "Year of the event (e.g., 2025)"
                    }
                },
                "required": ["event_name", "year"]
            },
            examples=[
                {
                    "event_name": "Australian Grand Prix",
                    "year": 2025
                }
            ]
        ),
        ToolInfo(
            name="get_session_key",
            description="Get the session_key for a specific session type within a meeting",
            inputSchema={
                "type": "object",
                "properties": {
                    "meeting_key": {
                        "type": "integer",
                        "description": "Meeting key from get_meeting_key"
                    },
                    "session_type": {
                        "type": "string",
                        "enum": ["Race", "Qualifying", "Practice 1", "Practice 2", "Practice 3", "Sprint", "Sprint Qualifying"],
                        "description": "Type of session"
                    }
                },
                "required": ["meeting_key", "session_type"]
            },
            examples=[
                {
                    "meeting_key": 1254,
                    "session_type": "Race"
                }
            ]
        ),
        
        # === PERFORMANCE ANALYSIS TOOLS ===
        ToolInfo(
            name="get_fastest_lap",
            description="Get the fastest lap time and details for a specific session",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {
                        "type": "integer",
                        "description": "Session key from get_session_key"
                    },
                    "driver_filter": {
                        "type": "string",
                        "description": "Optional driver name to filter results"
                    },
                    "team_filter": {
                        "type": "string", 
                        "description": "Optional team name to filter results"
                    }
                },
                "required": ["session_key"]
            },
            examples=[
                {
                    "session_key": 9693
                },
                {
                    "session_key": 9693,
                    "driver_filter": "Max VERSTAPPEN"
                }
            ]
        ),
        ToolInfo(
            name="get_driver_performance",
            description="Get comprehensive performance data for a specific driver in a session",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "driver_name": {"type": "string"},
                    "metrics": {
                        "type": "array",
                        "items": {"type": "string"},
                        "enum": ["lap_times", "consistency", "sector_times", "position_progression", "all"],
                        "description": "Specific metrics to retrieve"
                    }
                },
                "required": ["session_key", "driver_name"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "driver_name": "Max VERSTAPPEN",
                    "metrics": ["all"]
                }
            ]
        ),
        ToolInfo(
            name="get_team_performance",
            description="Get performance data for all drivers in a team for a session",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "team_name": {"type": "string"},
                    "metrics": {
                        "type": "array",
                        "items": {"type": "string"},
                        "enum": ["lap_times", "consistency", "positions", "all"]
                    }
                },
                "required": ["session_key", "team_name"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "team_name": "Red Bull Racing",
                    "metrics": ["all"]
                }
            ]
        ),
        
        # === COMPARISON TOOLS ===
        ToolInfo(
            name="compare_drivers",
            description="Compare performance between two drivers in a session",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "driver1": {"type": "string"},
                    "driver2": {"type": "string"},
                    "comparison_metrics": {
                        "type": "array",
                        "items": {"type": "string"},
                        "enum": ["lap_times", "consistency", "sector_times", "positions", "pit_stops", "all"]
                    }
                },
                "required": ["session_key", "driver1", "driver2"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "driver1": "Max VERSTAPPEN",
                    "driver2": "Lewis HAMILTON",
                    "comparison_metrics": ["all"]
                }
            ]
        ),
        ToolInfo(
            name="compare_teams",
            description="Compare performance between two teams in a session",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "team1": {"type": "string"},
                    "team2": {"type": "string"},
                    "comparison_metrics": {
                        "type": "array",
                        "items": {"type": "string"},
                        "enum": ["best_lap", "avg_lap", "consistency", "positions", "all"]
                    }
                },
                "required": ["session_key", "team1", "team2"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "team1": "Red Bull Racing",
                    "team2": "Mercedes",
                    "comparison_metrics": ["all"]
                }
            ]
        ),
        
        # === RESULTS TOOLS ===
        ToolInfo(
            name="get_race_results",
            description="Get final race results with positions, lap times, and other details",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "result_type": {
                        "type": "string",
                        "enum": ["full_results", "top_10", "podium", "winner_only", "dnf_list"],
                        "description": "Level of detail for results"
                    },
                    "include_lap_times": {
                        "type": "boolean",
                        "description": "Whether to include lap time statistics"
                    }
                },
                "required": ["session_key"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "result_type": "podium",
                    "include_lap_times": True
                }
            ]
        ),
        ToolInfo(
            name="get_qualifying_results",
            description="Get qualifying results with best lap times and positions",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "result_type": {
                        "type": "string",
                        "enum": ["full_results", "top_10", "q3_only", "eliminated_q1", "eliminated_q2"],
                        "description": "Level of detail for results"
                    }
                },
                "required": ["session_key"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "result_type": "top_10"
                }
            ]
        ),
        
        # === STRATEGY TOOLS ===
        ToolInfo(
            name="get_pit_stop_analysis",
            description="Analyze pit stop strategy and performance for drivers/teams",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "driver_filter": {"type": "string"},
                    "team_filter": {"type": "string"},
                    "analysis_type": {
                        "type": "string",
                        "enum": ["timing", "strategy", "comparison", "all"],
                        "description": "Type of pit stop analysis"
                    }
                },
                "required": ["session_key"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "analysis_type": "all"
                }
            ]
        ),
        ToolInfo(
            name="get_tire_strategy",
            description="Analyze tire strategy and stint information for drivers/teams",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "driver_filter": {"type": "string"},
                    "team_filter": {"type": "string"},
                    "strategy_type": {
                        "type": "string",
                        "enum": ["stint_analysis", "compound_usage", "strategy_comparison", "all"],
                        "description": "Type of tire strategy analysis"
                    }
                },
                "required": ["session_key"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "strategy_type": "all"
                }
            ]
        ),
        
        # === INCIDENT ANALYSIS TOOLS ===
        ToolInfo(
            name="investigate_incident",
            description="Investigate incidents, slow laps, or unusual performance patterns",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "driver_name": {"type": "string"},
                    "lap_number": {"type": "integer"},
                    "investigation_type": {
                        "type": "string",
                        "enum": ["slow_lap", "incident", "unusual_pattern", "sector_analysis", "all"],
                        "description": "Type of investigation"
                    },
                    "context_laps": {
                        "type": "integer",
                        "description": "Number of laps before/after to include for context",
                        "default": 3
                    }
                },
                "required": ["session_key", "driver_name"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "driver_name": "Lando NORRIS",
                    "lap_number": 67,
                    "investigation_type": "all",
                    "context_laps": 5
                }
            ]
        ),
        
        # === POSITION ANALYSIS TOOLS ===
        ToolInfo(
            name="get_position_progression",
            description="Track position changes throughout the session for drivers",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "driver_filter": {"type": "string"},
                    "team_filter": {"type": "string"},
                    "progression_type": {
                        "type": "string",
                        "enum": ["lap_by_lap", "key_moments", "overtakes", "all"],
                        "description": "Type of position progression analysis"
                    }
                },
                "required": ["session_key"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "progression_type": "all"
                }
            ]
        ),
        
        # === SECTOR ANALYSIS TOOLS ===
        ToolInfo(
            name="get_sector_analysis",
            description="Analyze sector times and identify strengths/weaknesses",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"},
                    "driver_filter": {"type": "string"},
                    "team_filter": {"type": "string"},
                    "sector_analysis_type": {
                        "type": "string",
                        "enum": ["best_sectors", "sector_comparison", "sector_consistency", "all"],
                        "description": "Type of sector analysis"
                    }
                },
                "required": ["session_key"]
            },
            examples=[
                {
                    "session_key": 9693,
                    "sector_analysis_type": "all"
                }
            ]
        ),
        
        # === UTILITY TOOLS ===
        ToolInfo(
            name="explore_schema",
            description="Get information about database tables, columns, and relationships",
            inputSchema={
                "type": "object",
                "properties": {
                    "table_name": {
                        "type": "string",
                        "description": "Name of the table to explore (optional)"
                    },
                    "detail_level": {
                        "type": "string",
                        "enum": ["overview", "columns", "sample_data", "relationships"],
                        "description": "Level of detail to return"
                    }
                }
            },
            examples=[
                {
                    "table_name": "laps_transformed",
                    "detail_level": "columns"
                }
            ]
        ),
        ToolInfo(
            name="get_session_info",
            description="Get basic information about a session including type, date, and participants",
            inputSchema={
                "type": "object",
                "properties": {
                    "session_key": {"type": "integer"}
                },
                "required": ["session_key"]
            },
            examples=[
                {
                    "session_key": 9693
                }
            ]
        )
    ]
    return {"tools": tools}

@app.post("/call_tool")
async def call_tool(request: ToolCallRequest):
    """Call a specific enhanced tool"""
    global enhanced_tool_executor
    
    if not enhanced_tool_executor:
        raise HTTPException(status_code=500, detail="Enhanced tool executor not initialized")
    
    try:
        # Execute the enhanced tool
        result = await enhanced_tool_executor.execute_tool(request.name, request.arguments)
        
        return ToolCallResponse(
            success=True,
            data=result
        )
        
    except Exception as e:
        return ToolCallResponse(
            success=False,
            error=str(e)
        )

@app.get("/health")
async def health_check():
    """Health check endpoint"""
    return {
        "status": "healthy", 
        "server": "enhanced-f1-mcp-server",
        "version": "2.0.0",
        "database_connected": enhanced_tool_executor is not None,
        "tool_count": len(ENHANCED_F1_TOOLS) if 'ENHANCED_F1_TOOLS' in globals() else 0
    }

@app.get("/schema")
async def get_schema():
    """Get database schema information"""
    return {"schema": db_schema}

def run_server_in_thread(host="localhost", port=8000):
    """Run the enhanced server in a separate thread"""
    def run():
        uvicorn.run(app, host=host, port=port, log_level="info")
    
    global server_thread, server_running
    if server_running:
        print("⚠️ Enhanced server is already running!")
        return
    
    server_thread = threading.Thread(target=run, daemon=True)
    server_thread.start()
    server_running = True
    
    # Wait a moment for server to start
    time.sleep(2)
    print(f"✅ Enhanced F1 MCP HTTP Server started on {host}:{port}")
    print(f"�� API Documentation: http://{host}:{port}/docs")
    print(f"�� Health Check: http://{host}:{port}/health")
    print(f"🛠️ Available Tools: http://{host}:{port}/tools")
    print(f"📊 Tool Categories:")
    print(f"   • Core Identification: get_meeting_key, get_session_key")
    print(f"   • Performance Analysis: get_fastest_lap, get_driver_performance, get_team_performance")
    print(f"   • Comparison: compare_drivers, compare_teams")
    print(f"   • Results: get_race_results, get_qualifying_results")
    print(f"   • Strategy: get_pit_stop_analysis, get_tire_strategy")
    print(f"   • Incident Analysis: investigate_incident")
    print(f"   • Position Analysis: get_position_progression")
    print(f"   • Sector Analysis: get_sector_analysis")
    print(f"   • Utility: explore_schema, get_session_info")

def stop_server():
    """Stop the enhanced server"""
    global server_running
    if server_running:
        server_running = False
        print("🛑 Enhanced server stopped")
    else:
        print("⚠️ Enhanced server is not running")

def check_server_status():
    """Check if the enhanced server is running"""
    global server_running
    if server_running:
        print("✅ Enhanced server is running")
        try:
            import requests
            response = requests.get("http://localhost:8000/health")
            if response.status_code == 200:
                print("✅ Enhanced server is responding to requests")
                health_data = response.json()
                print(f"✅ Tool count: {health_data.get('tool_count', 'Unknown')}")
            else:
                print("⚠️ Enhanced server is running but not responding properly")
        except Exception as e:
            print(f"⚠️ Enhanced server is running but not accessible: {e}")
    else:
        print("❌ Enhanced server is not running")

print("✅ Enhanced HTTP MCP Server implementation ready!")
print("To start the server: run_server_in_thread()")
print("To check server status: check_server_status()")
print("To stop the server: stop_server()")

✅ Enhanced HTTP MCP Server implementation ready!
To start the server: run_server_in_thread()
To check server status: check_server_status()
To stop the server: stop_server()


In [7]:
# Updated HTTP MCP Client with better error handling
import requests
import json
from typing import Dict, Any, List, Optional
import time

class HTTPMCPClient:
    """HTTP-based MCP Client to connect to the F1 MCP Server"""
    
    def __init__(self, base_url: str = "http://localhost:8000"):
        self.base_url = base_url
        self.session = requests.Session()
        self.session.timeout = 10  # 10 second timeout
    
    def health_check(self) -> Dict[str, Any]:
        """Check if the server is healthy"""
        try:
            response = self.session.get(f"{self.base_url}/health")
            response.raise_for_status()
            return response.json()
        except requests.RequestException as e:
            return {"error": str(e), "status": "unavailable"}
    
    def wait_for_server(self, max_wait=30):
        """Wait for server to become available"""
        print("⏳ Waiting for server to start...")
        start_time = time.time()
        
        while time.time() - start_time < max_wait:
            health = self.health_check()
            if "error" not in health:
                print("✅ Server is ready!")
                return True
            time.sleep(1)
        
        print("❌ Server did not start within the timeout period")
        return False
    
    def list_tools(self) -> List[Dict[str, Any]]:
        """List available tools"""
        try:
            response = self.session.get(f"{self.base_url}/tools")
            response.raise_for_status()
            return response.json()["tools"]
        except requests.RequestException as e:
            raise Exception(f"Failed to list tools: {e}")
    
    def call_tool(self, name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
        """Call a tool on the server"""
        try:
            payload = {
                "name": name,
                "arguments": arguments
            }
            
            response = self.session.post(
                f"{self.base_url}/call_tool",
                json=payload,
                headers={"Content-Type": "application/json"}
            )
            response.raise_for_status()
            
            result = response.json()
            
            if not result["success"]:
                raise Exception(f"Tool execution failed: {result.get('error', 'Unknown error')}")
            
            return result["data"]
            
        except requests.RequestException as e:
            raise Exception(f"Failed to call tool: {e}")
    
    def get_schema(self) -> Dict[str, Any]:
        """Get database schema information"""
        try:
            response = self.session.get(f"{self.base_url}/schema")
            response.raise_for_status()
            return response.json()["schema"]
        except requests.RequestException as e:
            raise Exception(f"Failed to get schema: {e}")

# Initialize the HTTP client
http_client = HTTPMCPClient()
print("✅ HTTP MCP Client initialized!")

✅ HTTP MCP Client initialized!


# MCP Reasoning Engine

In [8]:
# Improved HTTP Reasoning Engine with Session Detection and Text Summaries
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage, AIMessage
import json
from datetime import datetime
import re
from typing import Dict, Any, List, Optional

class HTTPMCPReasoningEngine:
    """Improved reasoning engine with session detection and intelligent summaries"""
    
    def __init__(self, http_client, openai_api_key):
        self.http_client = http_client
        self.llm = ChatOpenAI(
            model="gpt-3.5-turbo",
            temperature=0.3,  # Slightly higher for more natural text
            openai_api_key=openai_api_key
        )
        self.conversation_history = []
        self.pending_clarification = None
        self.last_meeting_context = None 
    
    def _get_conversation_context(self) -> str:
        """Get recent conversation context for the LLM"""
        if not self.conversation_history:
            return ""
        
        context = "Recent conversation context:\n"
        for entry in self.conversation_history[-2:]:  # Last 2 exchanges
            context += f"User: {entry['message']}\n"
            # Truncate response to avoid too much context
            response_preview = entry['response'][:200] + "..." if len(entry['response']) > 200 else entry['response']
            context += f"Assistant: {response_preview}\n\n"
        
        return context
    
    def reason_and_answer(self, user_query: str) -> str:
        """Use reasoning to answer a user query with intelligent session detection and summaries"""
    
        #print(f"🔍 Analyzing query: {user_query}")
        
        # Enhanced clarification handling
        if self.conversation_history:
            last_response = self.conversation_history[-1]["response"]
            
            # Check if we just asked for clarification between Sprint and main race
            if "Which one would you like to know about?" in last_response and "Sprint" in last_response:
                #print("📝 Detected clarification response for Sprint vs Main race")
                
                # Extract the original meeting from the last user query
                last_user_query = self.conversation_history[-1]["message"]
                meeting_info = self._extract_meeting_info(last_user_query)
                
                if meeting_info["name"]:
                    # Determine session type from current response
                    query_lower = user_query.lower()
                    
                    if any(word in query_lower for word in ["main", "grand prix", "gp", "main race"]):
                        session_type = "Race"
                    elif any(word in query_lower for word in ["sprint", "sprint race"]):
                        session_type = "Sprint"
                    else:
                        # Default based on keywords
                        session_type = "Sprint" if "sprint" in query_lower else "Race"
                    
                    query_analysis = {
                        "meeting_info": meeting_info,
                        "session_type": session_type,
                        "query_type": "race_results",
                        "drivers": []
                    }
                    #print(f"📝 Using clarification context: {meeting_info['name']} - {session_type}")
                else:
                    query_analysis = self._analyze_query_intent(user_query)
            else:
                query_analysis = self._analyze_query_intent(user_query)
        else:
            query_analysis = self._analyze_query_intent(user_query)
        
        #print(f"🎯 Query intent: {query_analysis}")
        
        # Store meeting context for future queries
        if query_analysis["meeting_info"]["name"]:
            self.last_meeting_context = query_analysis["meeting_info"]
        
        # Check if we need to ask for clarification
        clarification = self._check_for_clarification_needed(user_query, query_analysis)
        if clarification:
            self.add_to_history(user_query, clarification)
            return clarification
        
        # Execute tools based on analysis
        tool_results = self._execute_tools_for_query(user_query, query_analysis)
        
        # Generate intelligent summary
        answer = self._generate_intelligent_summary(user_query, query_analysis, tool_results)
        
        # Add to conversation history
        self.add_to_history(user_query, answer)
        
        return answer
    
    def _analyze_query_intent(self, user_query: str) -> Dict[str, Any]:
        """Analyze what the user is asking for with enhanced conversation context"""
        query_lower = user_query.lower()
        
        # Extract meeting info
        meeting_info = self._extract_meeting_info(user_query)
        
        # Enhanced context handling
        if not meeting_info["name"]:
            # First, check if this is a follow-up that should use last meeting context
            follow_up_patterns = [
                "what about", "how about", "and", "also", "who got", "who finished",
                "compare", "versus", "vs", "4th", "5th", "6th", "7th", "8th", "9th", "10th",
                "main race", "grand prix", "sprint", "sprint race", "qualifying"
            ]
            
            is_follow_up = any(pattern in query_lower for pattern in follow_up_patterns)
            
            if is_follow_up and self.last_meeting_context:
                meeting_info = self.last_meeting_context
                print(f"🔄 Using stored meeting context: {meeting_info['name']}")
            elif self.conversation_history:
                # Look for race mentions in recent conversation history
                for entry in reversed(self.conversation_history[-3:]):
                    previous_meeting = self._extract_meeting_info(entry["message"])
                    if previous_meeting["name"]:
                        meeting_info = previous_meeting
                        print(f"📚 Using context from previous query: {meeting_info['name']}")
                        break
        
        # Determine session type with enhanced logic
        session_type = "Race"  # Default
        
        if any(word in query_lower for word in ["qualifying", "quali", "q1", "q2", "q3", "pole", "grid"]):
            session_type = "Qualifying"
        elif any(word in query_lower for word in ["sprint race", "sprint"]) and "qualifying" not in query_lower:
            session_type = "Sprint"
        elif any(word in query_lower for word in ["practice", "fp1", "fp2", "fp3", "free practice"]):
            if "1" in query_lower or "fp1" in query_lower:
                session_type = "Practice 1"
            elif "2" in query_lower or "fp2" in query_lower:
                session_type = "Practice 2"
            elif "3" in query_lower or "fp3" in query_lower:
                session_type = "Practice 3"
        
        # Handle clarification responses for session type
        if "main race" in query_lower or "grand prix" in query_lower:
            session_type = "Race"
        elif "sprint" in query_lower:
            session_type = "Sprint"
        
        # Determine query type with better pattern matching
        query_type = "race_results"  # Default
        
        if any(word in query_lower for word in ["compare", "vs", "versus", "difference", "better"]):
            query_type = "comparison"
        elif any(word in query_lower for word in ["performance", "how did", "analysis", "stats"]):
            query_type = "performance"
        elif any(word in query_lower for word in ["fastest lap", "best lap", "quickest lap"]):
            query_type = "fastest_lap"
        elif any(word in query_lower for word in ["pit stop", "strategy", "stops", "stints", "pitstop"]):
            query_type = "strategy"
        elif any(word in query_lower for word in ["qualifying", "quali", "grid", "pole"]):
            query_type = "qualifying_results"
        
        return {
            "meeting_info": meeting_info,
            "session_type": session_type,
            "query_type": query_type,
            "drivers": self._extract_driver_names(user_query)
        }
    
    def _check_for_clarification_needed(self, user_query: str, query_analysis: Dict[str, Any]) -> Optional[str]:
        """Check if we need to ask for clarification with enhanced context awareness"""
        
        # Check if meeting is identified
        if not query_analysis["meeting_info"]["name"]:
            return ("I'd be happy to help! However, I couldn't identify which race you're asking about. "
                "Could you please specify the race? For example: 'Bahrain Grand Prix', 'Miami Grand Prix', "
                "'Chinese Grand Prix', etc.")
        
        # Check for ambiguous session types in weekends with both Sprint and Race
        query_lower = user_query.lower()
        meeting_name = query_analysis["meeting_info"]["name"]
        
        # Skip clarification if we just provided one
        if self.conversation_history:
            last_response = self.conversation_history[-1]["response"]
            if "Which one would you like to know about?" in last_response:
                return None  # Don't ask for clarification again
        
        # Check if this weekend has both Sprint and Race
        weekend_has_sprint = self._check_if_weekend_has_sprint(query_analysis["meeting_info"])
        
        if weekend_has_sprint and query_analysis["session_type"] == "Race":
            # Check if query is ambiguous (mentions "race" but not specifically "sprint" or "main")
            ambiguous_terms = ["who won", "winner", "results", "race"]
            specific_terms = ["main race", "grand prix", "gp", "sprint race", "sprint"]
            
            has_ambiguous = any(term in query_lower for term in ambiguous_terms)
            has_specific = any(term in query_lower for term in specific_terms)
            
            if has_ambiguous and not has_specific:
                return (f"The {meeting_name} weekend has both a Sprint Race and the main Grand Prix. "
                    f"Which one would you like to know about?\n"
                    f"• Main race (Grand Prix)\n"
                    f"• Sprint race\n"
                    f"Please clarify which race you're interested in!")
        
        return None
    
    def _check_if_weekend_has_sprint(self, meeting_info: Dict[str, Any]) -> bool:
        """Check if a weekend has sprint sessions"""
        try:
            if not meeting_info["name"]:
                return False
                
            # Get meeting key first
            meeting_result = self.http_client.call_tool("get_meeting_key", {
                "event_name": meeting_info["name"],
                "year": meeting_info["year"]
            })
            
            if not meeting_result.get("success"):
                return False
            
            meeting_key = meeting_result.get("meeting_key")
            
            # Check available sessions
            try:
                sprint_result = self.http_client.call_tool("get_session_key", {
                    "meeting_key": meeting_key,
                    "session_type": "Sprint"
                })
                return sprint_result.get("success", False)
            except:
                return False
                
        except Exception as e:
            print(f"⚠️ Could not check sprint availability: {e}")
            return False
    
    def _execute_tools_for_query(self, user_query: str, query_analysis: Dict[str, Any]) -> Dict[str, Any]:
        """Execute tools based on query analysis with proper session detection"""
        tool_results = {}
        
        try:
            meeting_info = query_analysis["meeting_info"]
            session_type = query_analysis["session_type"]
            
            # Step 1: Get meeting key
            #print("1️⃣ Getting meeting key...")
            meeting_result = self.http_client.call_tool("get_meeting_key", {
                "event_name": meeting_info["name"],
                "year": meeting_info["year"]
            })
            
            if not meeting_result.get("success"):
                return {"error": f"Failed to get meeting key: {meeting_result.get('error')}"}
            
            meeting_key = meeting_result.get("meeting_key")
            tool_results["meeting_info"] = meeting_result
            #print(f"✅ Meeting key: {meeting_key}")
            
            # Step 2: Get session key with proper session type
            #print(f"2️⃣ Getting session key for {session_type}...")
            session_result = self.http_client.call_tool("get_session_key", {
                "meeting_key": meeting_key,
                "session_type": session_type
            })
            
            if not session_result.get("success"):
                # Try alternative session types if the requested one fails
                print(f"⚠️ {session_type} session not found, trying alternatives...")
                
                if session_type == "Sprint":
                    # Try Race instead
                    session_result = self.http_client.call_tool("get_session_key", {
                        "meeting_key": meeting_key,
                        "session_type": "Race"
                    })
                    if session_result.get("success"):
                        print("📝 Note: Using Race session as Sprint not available")
                
                if not session_result.get("success"):
                    return {"error": f"No {session_type} session found for this race weekend"}
            
            session_key = session_result.get("session_key")
            tool_results["session_info"] = session_result
            #print(f"✅ Session key: {session_key} ({session_result.get('session_type')})")
            
            # Step 3: Execute specific tools based on query type
            query_type = query_analysis["query_type"]
            
            if query_type == "comparison":
                #print("3️⃣ Executing driver comparison...")
                drivers = query_analysis["drivers"]
                if len(drivers) >= 2:
                    compare_result = self.http_client.call_tool("compare_drivers", {
                        "session_key": session_key,
                        "driver1": drivers[0],
                        "driver2": drivers[1],
                        "comparison_metrics": ["all"]
                    })
                    tool_results["driver_comparison"] = compare_result
                else:
                    tool_results["error"] = f"Need two drivers to compare. Found: {drivers}"
            
            elif query_type == "performance":
                #print("3️⃣ Getting driver performance...")
                drivers = query_analysis["drivers"]
                if len(drivers) >= 1:
                    performance_result = self.http_client.call_tool("get_driver_performance", {
                        "session_key": session_key,
                        "driver_name": drivers[0],
                        "metrics": ["all"]
                    })
                    tool_results["driver_performance"] = performance_result
                else:
                    tool_results["error"] = "Need to specify a driver for performance analysis"
            
            elif query_type == "fastest_lap":
                #print("3️⃣ Getting fastest lap...")
                fastest_result = self.http_client.call_tool("get_fastest_lap", {
                    "session_key": session_key
                })
                tool_results["fastest_lap"] = fastest_result
            
            elif query_type == "pit_stops":
                #print("3️⃣ Getting pit stop analysis...")
                pit_result = self.http_client.call_tool("get_pit_stop_analysis", {
                    "session_key": session_key,
                    "analysis_type": "all"
                })
                tool_results["pit_stop_analysis"] = pit_result
            
            else:  # race_results or qualifying_results
                #print("3️⃣ Getting race/qualifying results...")
                result_type = "top_10" if query_type == "qualifying_results" else "podium"
                race_result = self.http_client.call_tool("get_race_results", {
                    "session_key": session_key,
                    "result_type": result_type,
                    "include_lap_times": True
                })
                tool_results["race_results"] = race_result
            
        except Exception as e:
            tool_results["error"] = str(e)
            print(f"❌ Error executing tools: {e}")
        
        return tool_results
    
    def _generate_intelligent_summary(self, user_query: str, query_analysis: Dict[str, Any], tool_results: Dict[str, Any]) -> str:
        """Generate an intelligent text summary using LLM"""
        
        if "error" in tool_results:
            return f"I apologize, but I encountered an error: {tool_results['error']}"
        
         # Get conversation context
        conversation_context = self._get_conversation_context()
        
        # Prepare context for the LLM
        context_data = {
            "query": user_query,
            "race": query_analysis["meeting_info"]["name"],
            "year": query_analysis["meeting_info"]["year"],
            "session_type": query_analysis["session_type"],
            "query_type": query_analysis["query_type"],
            "tool_results": tool_results
        }
        
        # Create a prompt for generating the summary
        summary_prompt = f"""{conversation_context}You are an expert F1 analyst. Based on the following data, provide a comprehensive and engaging summary that directly answers the user's question.

    User Query: {user_query}
    Race: {context_data['race']} {context_data['year']}
    Session: {context_data['session_type']}

    Data Retrieved:
    {json.dumps(tool_results, indent=2)}

    Please provide a response that:
    1. Directly answers the user's question in 2-3 sentences maximum
    2. Includes specific numbers, times, and positions
    3. Uses clear, factual language (avoid flowery descriptions)
    4. Converts lap times from seconds to MM:SS.mmm format (e.g., 93.614 seconds becomes 01:33.614)
    5. Mentions significant gaps only if noteworthy

    IMPORTANT: 
    - Keep responses concise and to the point
    - Avoid phrases like "thrilling", "spectacular", "vibrant skies", "edge of their seats"
    - Focus on facts: who, what times, what positions
    - Maximum 3-4 sentences per response
    - When displaying lap times, use MM:SS.mmm format

    IMPORTANT: When displaying lap times, use the format MM:SS.mmm (e.g., 01:33.614, 01:30.000, 01:35.123). Do NOT mix words and numbers for times.

    Format the response as flowing text with natural paragraphs, not as raw data dumps.
    """
        
        try:
            messages = [SystemMessage(content=summary_prompt)]
            response = self.llm.invoke(messages)
            return response.content
        except Exception as e:
            print(f"❌ Error generating summary: {e}")
            # Fallback to basic summary
            return self._generate_basic_summary(user_query, query_analysis, tool_results)
        
        def _format_lap_time(self, seconds: float) -> str:
            """Convert seconds to MM:SS.mmm format"""
            if seconds is None:
                return "N/A"
            
            minutes = int(seconds // 60)
            remaining_seconds = seconds % 60
            return f"{minutes:02d}:{remaining_seconds:06.3f}"

        def _generate_basic_summary(self, user_query: str, query_analysis: Dict[str, Any], tool_results: Dict[str, Any]) -> str:
            """Generate a basic summary if LLM fails"""
            
            summary = f"Here's what I found for the {query_analysis['meeting_info']['name']} {query_analysis['meeting_info']['year']} {query_analysis['session_type']}:\n\n"
            
            if "driver_comparison" in tool_results and tool_results["driver_comparison"].get("success"):
                comparison_data = tool_results["driver_comparison"]["comparison"]
                summary += f"Driver Comparison:\n"
                for driver_data in comparison_data:
                    best_lap = self._format_lap_time(driver_data.get('best_lap'))
                    avg_lap = self._format_lap_time(driver_data.get('avg_lap'))
                    summary += f"• {driver_data['driver']} ({driver_data['team']}): {driver_data.get('total_laps', 'N/A')} laps, best: {best_lap}, avg: {avg_lap}\n"
            
            elif "race_results" in tool_results and tool_results["race_results"].get("success"):
                results = tool_results["race_results"]["results"]
                summary += "Race Results:\n"
                for i, result in enumerate(results[:3], 1):
                    summary += f"{i}. {result['driver']} ({result['team']}) - Position {result['position']}\n"
            
            return summary
    
    def _extract_meeting_info(self, query: str) -> Dict[str, Any]:
        """Extract meeting name and year from query"""
        race_keywords = {
            'australian': 'Australian Grand Prix',
            'chinese': 'Chinese Grand Prix',
            'china': 'Chinese Grand Prix',
            'japanese': 'Japanese Grand Prix',
            'japan': 'Japanese Grand Prix',
            'bahrain': 'Bahrain Grand Prix',
            'saudi': 'Saudi Arabian Grand Prix',
            'saudi arabian': 'Saudi Arabian Grand Prix',
            'miami': 'Miami Grand Prix',
            'italian emilia-romagna': 'Emilia‑Romagna Grand Prix',
            'emilia romagna': 'Emilia‑Romagna Grand Prix',
            'emilia-romagna': 'Emilia‑Romagna Grand Prix',
            'imola': 'Emilia‑Romagna Grand Prix',
            'monaco': 'Monaco Grand Prix',
            'spanish': 'Spanish Grand Prix',
            'spain': 'Spanish Grand Prix',
            'canadian': 'Canadian Grand Prix',
            'canada': 'Canadian Grand Prix', 
            'austrian': 'Austrian Grand Prix',
            'austria': 'Austrian Grand Prix',
            'british': 'British Grand Prix',
            'silverstone': 'British Grand Prix',
            'great britain': 'British Grand Prix'
        }
        
        query_lower = query.lower()
        meeting_name = None
        
        for keyword, race_name in race_keywords.items():
            if keyword in query_lower:
                meeting_name = race_name
                break
        
        # Extract year (default to 2025)
        year_match = re.search(r'20\d{2}', query)
        year = int(year_match.group()) if year_match else 2025
        
        return {
            "name": meeting_name,
            "year": year
        }
    
    def _extract_driver_names(self, query: str) -> List[str]:
        """Extract driver names from query with improved matching"""
        driver_mappings = {
            "Oscar PIASTRI": ["oscar", "piastri", "oscar piastri"],
            "Lando NORRIS": ["lando", "norris", "lando norris"],
            "George RUSSELL": ["george", "russell", "george russell"],
            "Andrea Kimi ANTONELLI": ["antonelli", "kimi", "kimi antonelli", "andrea antonelli"],
            "Max VERSTAPPEN": ["max", "verstappen", "max verstappen"],
            "Liam LAWSON": ["liam", "lawson", "liam lawson"],
            "Charles LECLERC": ["charles", "leclerc", "charles leclerc"],
            "Lewis HAMILTON": ["lewis", "hamilton", "lewis hamilton"],
            "Esteban OCON": ["esteban", "ocon", "esteban ocon"],
            "Oliver BEARMAN": ["oliver", "bearman", "oliver bearman"],
            "Alex ALBON": ["alex", "albon", "alex albon"],
            "Carlos SAINZ": ["carlos", "sainz", "carlos sainz"],
            "Pierre GASLY": ["pierre", "gasly", "pierre gasly"],
            "Jack DOOHAN": ["jack", "doohan", "jack doohan"],
            "Lance STROLL": ["lance", "stroll", "lance stroll"],
            "Fernando ALONSO": ["fernando", "alonso", "fernando alonso"],
            "Isack HADJAR": ["isack", "hadjar", "isack hadjar"],
            "Yuki TSUNODA": ["yuki", "tsunoda", "yuki tsunoda"],
            "Gabriel BORTOLETO": ["gabriel", "bortoleto", "gabriel bortoleto"],
            "Nico HULKENBERG": ["nico", "hulkenberg", "nico hulkenberg", "hulk"]
        }
        
        query_lower = query.lower()
        found_drivers = []
        
        for full_name, aliases in driver_mappings.items():
            for alias in aliases:
                if alias in query_lower:
                    if full_name not in found_drivers:
                        found_drivers.append(full_name)
                    break
        
        return found_drivers[:2]
    
    def add_to_history(self, message: str, response: str):
        """Add to conversation history"""
        self.conversation_history.append({
            "timestamp": datetime.now().isoformat(),
            "message": message,
            "response": response
        })
    
    def get_context(self) -> str:
        """Get conversation context"""
        if not self.conversation_history:
            return ""
        
        context = "Previous conversation:\n"
        for entry in self.conversation_history[-3:]:
            context += f"User: {entry['message']}\n"
            context += f"Assistant: {entry['response'][:150]}...\n\n"
        
        return context

# Testing Interface

In [9]:
# Fixed Test System and Initialization
import os
from typing import Dict, Any, List, Optional

# First, let's create a complete replacement for your test function
def test_http_mcp_system_fixed():
    """Fixed test function for the HTTP MCP system"""
    print("\n🧪 Testing HTTP MCP System (Fixed Version)...")
    
    # Check if server is running
    print("1. Checking server status...")
    try:
        health = http_client.health_check()
        if "error" in health:
            print(f"❌ Server not available: {health['error']}")
            print("💡 Start the server first with: run_server_in_thread()")
            return
        else:
            print(f"✅ Server is healthy: {health}")
    except Exception as e:
        print(f"❌ Server health check failed: {e}")
        return
    
    # Test listing tools
    print("\n2. Testing tool listing...")
    try:
        tools = http_client.list_tools()
        print(f"✅ Found {len(tools)} tools: {[tool['name'] for tool in tools[:5]]}...")  # Show first 5
    except Exception as e:
        print(f"❌ Failed to list tools: {e}")
        return
    
    # Test a simple tool call
    print("\n3. Testing simple tool call...")
    try:
        result = http_client.call_tool("explore_schema", {"detail_level": "overview"})
        print(f"✅ Tool call successful. Result type: {type(result)}")
        if isinstance(result, dict):
            print(f"✅ Result keys: {list(result.keys())}")
            if 'total_tables' in result:
                print(f"✅ Found {result['total_tables']} tables")
        else:
            print(f"⚠️ Unexpected result format: {result}")
    except Exception as e:
        print(f"❌ Tool call failed: {e}")
        return
    
    # Test direct tool execution for Miami Grand Prix
    print("\n4. Testing Miami Grand Prix data retrieval...")
    try:
        # Step 1: Get meeting key
        #print("   4a. Getting meeting key...")
        meeting_result = http_client.call_tool("get_meeting_key", {
            "event_name": "Miami Grand Prix",
            "year": 2025
        })
        print(f"   Meeting result: {meeting_result}")
        
        if isinstance(meeting_result, dict) and meeting_result.get("success"):
            meeting_key = meeting_result.get("meeting_key")
            #print(f"   ✅ Meeting key: {meeting_key}")
            
            # Step 2: Get session key
            print("   4b. Getting session key...")
            session_result = http_client.call_tool("get_session_key", {
                "meeting_key": meeting_key,
                "session_type": "Race"
            })
            print(f"   Session result: {session_result}")
            
            if isinstance(session_result, dict) and session_result.get("success"):
                session_key = session_result.get("session_key")
                #print(f"   ✅ Session key: {session_key}")
                
                # Step 3: Test comparison
                print("   4c. Testing driver comparison...")
                compare_result = http_client.call_tool("compare_drivers", {
                    "session_key": session_key,
                    "driver1": "Lewis HAMILTON",
                    "driver2": "Max VERSTAPPEN",
                    "comparison_metrics": ["all"]
                })
                print(f"   Compare result: {compare_result}")
                
                if isinstance(compare_result, dict) and compare_result.get("success"):
                    print("   ✅ Driver comparison successful!")
                else:
                    print(f"   ❌ Driver comparison failed: {compare_result}")
            else:
                print(f"   ❌ Session key failed: {session_result}")
        else:
            print(f"   ❌ Meeting key failed: {meeting_result}")
            
    except Exception as e:
        print(f"❌ Miami GP test failed: {e}")
        import traceback
        traceback.print_exc()
    
    # Test reasoning engine
    print("\n5. Testing reasoning engine...")
    try:
        # Make sure we have the fixed reasoning engine
        if 'HTTPMCPReasoningEngine' not in globals():
            print("❌ HTTPMCPReasoningEngine not found. Please run the fixed reasoning engine code first.")
            return
        
        # Initialize the fixed reasoning engine
        print("   5a. Initializing fixed reasoning engine...")
        openai_key = os.getenv('OPENAI_API_KEY')
        if not openai_key:
            print("❌ OPENAI_API_KEY not found in environment variables")
            return
        
        fixed_reasoning_engine = HTTPMCPReasoningEngine(http_client, openai_key)
        print("   ✅ Fixed reasoning engine initialized")
        
        # Test with a query
        print("   5b. Testing with query...")
        test_query = "Compare hamilton and verstappen at the Miami race?"
        print(f"   Query: {test_query}")
        
        response = fixed_reasoning_engine.reason_and_answer(test_query)
        print("\n   📊 Reasoning Engine Response:")
        print("   " + "-" * 40)
        print("   " + response.replace('\n', '\n   '))
        print("   " + "-" * 40)
        
    except Exception as e:
        print(f"❌ Reasoning engine test failed: {e}")
        import traceback
        traceback.print_exc()

# Function to initialize everything properly
def initialize_fixed_system():
    """Initialize the fixed system with proper error handling"""
    #print("🔧 Initializing Fixed HTTP MCP System...")
    
    try:
        # Check if required components exist
        required_globals = ['http_client', 'engine', 'db_schema']
        missing = [item for item in required_globals if item not in globals()]
        
        if missing:
            print(f"❌ Missing required components: {missing}")
            return False
        
        #print("✅ All required components found")
        
        # Initialize the fixed reasoning engine
        openai_key = os.getenv('OPENAI_API_KEY')
        if not openai_key:
            print("❌ OPENAI_API_KEY not set. Please set it in your environment.")
            return False
        
        #print("✅ OpenAI API key found")
        
        # Create the fixed reasoning engine
        global enhanced_http_reasoning_engine
        enhanced_http_reasoning_engine = HTTPMCPReasoningEngine(http_client, openai_key)
        #print("✅ Fixed reasoning engine initialized")
        
        return True
        
    except Exception as e:
        print(f"❌ Initialization failed: {e}")
        import traceback
        traceback.print_exc()
        return False

# Quick debugging function
def debug_current_state():
    """Debug the current state of the system"""
    print("🔍 Debugging Current System State...")
    
    # Check global variables
    globals_to_check = [
        'http_client', 'engine', 'db_schema', 'HTTPMCPReasoningEngine',
    ]
    
    for var_name in globals_to_check:
        if var_name in globals():
            var_value = globals()[var_name]
            print(f"✅ {var_name}: {type(var_value)}")
        else:
            print(f"❌ {var_name}: Not found")
    
    # Check environment variables
    env_vars = ['OPENAI_API_KEY', 'DB_HOST', 'DB_NAME', 'DB_USER']
    for var_name in env_vars:
        if os.getenv(var_name):
            print(f"✅ {var_name}: Set")
        else:
            print(f"❌ {var_name}: Not set")
    
    # Check server status
    try:
        health = http_client.health_check()
        if "error" in health:
            print(f"❌ Server: Not running ({health['error']})")
        else:
            print(f"✅ Server: Running ({health.get('status', 'unknown')})")
    except Exception as e:
        print(f"❌ Server: Error checking ({e})")

# Updated interactive test function
def http_interactive_test_fixed():
    """Fixed interactive testing interface"""
    
    print("🏎️ HTTP MCP F1 Bot Interactive Testing Session (Fixed)")
    print("=" * 60)
    print("Commands:")
    print("  • 'quit' or 'exit' - Exit the session")
    print("  • 'help' - Show example queries")
    print("  • 'debug' - Debug system state")
    print("  • 'test' - Run system test")
    print("  • 'init' - Initialize fixed system")
    print("  • Any F1 query - Ask about F1 data")
    print("=" * 60)
    
    # Initialize the system
    if not initialize_fixed_system():
        print("❌ Failed to initialize system. Please check the errors above.")
        return
    
    conversation_count = 0
    
    while True:
        try:
            conversation_count += 1
            user_input = input(f"\n[{conversation_count}] You: ").strip()
            
            if user_input.lower() in ['quit', 'exit']:
                print("👋 Thanks for testing! Goodbye!")
                break
            
            if user_input.lower() == 'help':
                print("\n📝 Example queries:")
                print("  • Who won the Miami Grand Prix?")
                print("  • Compare Hamilton and Verstappen at Miami")
                print("  • What was the fastest lap at Miami?")
                print("  • Show pit stop analysis for Miami race")
                print("  • How did Norris perform at Miami?")
                continue
            
            if user_input.lower() == 'debug':
                debug_current_state()
                continue
            
            if user_input.lower() == 'test':
                test_http_mcp_system_fixed()
                continue
            
            if user_input.lower() == 'init':
                if initialize_fixed_system():
                    print("✅ System reinitialized successfully")
                else:
                    print("❌ Failed to reinitialize system")
                continue
            
            if not user_input:
                continue
            
            # Check if server is running
            health = http_client.health_check()
            if "error" in health:
                print("❌ Server is not running!")
                print("💡 Start the server first with: run_server_in_thread()")
                continue
            
            #print(f"\n🤖 Processing query: {user_input}")
            
            # Use the fixed reasoning engine
            response = enhanced_http_reasoning_engine.reason_and_answer(user_input)
            
            print("\n📊 Response:")
            print("-" * 50)
            print(response)
            print("-" * 50)
            
        except KeyboardInterrupt:
            print("\n👋 Session interrupted. Goodbye!")
            break
        except Exception as e:
            print(f"❌ Error: {e}")
            print("💡 Try 'debug' to check system state or 'init' to reinitialize")
            continue

print("✅ Fixed test system ready!")
print("Commands:")
print("  • debug_current_state() - Check what's available")
print("  • initialize_fixed_system() - Initialize the fixed system") 
print("  • test_http_mcp_system_fixed() - Run comprehensive tests")
print("  • http_interactive_test_fixed() - Start interactive session")

✅ Fixed test system ready!
Commands:
  • debug_current_state() - Check what's available
  • initialize_fixed_system() - Initialize the fixed system
  • test_http_mcp_system_fixed() - Run comprehensive tests
  • http_interactive_test_fixed() - Start interactive session


In [None]:
run_server_in_thread()

INFO:     Started server process [80352]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://localhost:8000 (Press CTRL+C to quit)


✅ Enhanced F1 MCP Server started with specialized tool executor!
✅ Enhanced F1 MCP HTTP Server started on localhost:8000
�� API Documentation: http://localhost:8000/docs
�� Health Check: http://localhost:8000/health
🛠️ Available Tools: http://localhost:8000/tools
📊 Tool Categories:
   • Core Identification: get_meeting_key, get_session_key
   • Performance Analysis: get_fastest_lap, get_driver_performance, get_team_performance
   • Comparison: compare_drivers, compare_teams
   • Results: get_race_results, get_qualifying_results
   • Strategy: get_pit_stop_analysis, get_tire_strategy
   • Incident Analysis: investigate_incident
   • Position Analysis: get_position_progression
   • Sector Analysis: get_sector_analysis
   • Utility: explore_schema, get_session_info


In [11]:
check_server_status()

✅ Enhanced server is running
✅ Enhanced server is responding to requests
✅ Tool count: 16


INFO:     ::1:56982 - "GET /health HTTP/1.1" 200 OK
INFO:     ::1:56983 - "GET /health HTTP/1.1" 200 OK
INFO:     ::1:56983 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:56983 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:56983 - "GET /health HTTP/1.1" 200 OK
INFO:     ::1:56983 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:56983 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:56983 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:56985 - "GET /health HTTP/1.1" 200 OK
INFO:     ::1:56985 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:56985 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:56985 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:56985 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:56985 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:57000 - "GET /health HTTP/1.1" 200 OK
INFO:     ::1:57000 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:57000 - "POST /call_tool HTTP/1.1" 200 OK
INFO:     ::1:57000 - "GET /health HTTP/1.1" 200 OK
INFO:     ::1:57

In [12]:
#test_http_mcp_system_fixed()

In [13]:
http_interactive_test_fixed()

🏎️ HTTP MCP F1 Bot Interactive Testing Session (Fixed)
Commands:
  • 'quit' or 'exit' - Exit the session
  • 'help' - Show example queries
  • 'debug' - Debug system state
  • 'test' - Run system test
  • 'init' - Initialize fixed system
  • Any F1 query - Ask about F1 data

📊 Response:
--------------------------------------------------
The Miami Grand Prix weekend has both a Sprint Race and the main Grand Prix. Which one would you like to know about?
• Main race (Grand Prix)
• Sprint race
Please clarify which race you're interested in!
--------------------------------------------------

📊 Response:
--------------------------------------------------
In the Sprint Race of the 2025 Miami Grand Prix, Lando Norris from McLaren secured first place with a total race time of 35:51.002. Following closely, Oscar Piastri, also from McLaren, took second place, finishing just 2.554 seconds behind Norris. Lewis Hamilton representing Ferrari claimed the third spot, crossing the finish line with an a

In [14]:
#stop_server()