In [None]:
!pip install fastf1 pandas numpy matplotlib seaborn plotly scikit-learn



In [None]:
import fastf1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import warnings
import os
warnings.filterwarnings('ignore')

# Create cache directory if it doesn't exist
cache_dir = 'fastf1_cache'
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)
    print(f"Created cache directory: {cache_dir}")

# Enable fastf1 cache
fastf1.Cache.enable_cache(cache_dir)
print("FastF1 cache enabled successfully!")

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")
print("Libraries imported and setup complete!")

FastF1 cache enabled successfully!
Libraries imported and setup complete!


In [None]:
def get_recent_race_data(year=2024, num_races=3):
    """
    Collect data from recent races for analysis
    """
    try:
        schedule = fastf1.get_event_schedule(year)
        print(f"Schedule loaded for {year}")
    except Exception as e:
        print(f"Error loading schedule: {e}")
        return create_sample_data()

    recent_races = []

    # Get last few completed races
    completed_races = schedule[schedule['EventDate'] < pd.Timestamp.now()]
    if len(completed_races) < num_races:
        num_races = len(completed_races)

    if num_races == 0:
        print("No completed races found, using sample data")
        return create_sample_data()

    last_races = completed_races.tail(num_races)

    for idx, race in last_races.iterrows():
        try:
            print(f"Loading data for {race['EventName']}...")
            session = fastf1.get_session(year, race['RoundNumber'], 'R')
            session.load()

            race_results = session.results
            race_results['RaceName'] = race['EventName']
            race_results['Round'] = race['RoundNumber']

            recent_races.append(race_results)
            print(f"✓ Successfully loaded {race['EventName']}")
        except Exception as e:
            print(f"✗ Error loading {race['EventName']}: {e}")
            continue

    if recent_races:
        return pd.concat(recent_races, ignore_index=True)
    else:
        print("No race data could be loaded, using sample data")
        return create_sample_data()

def get_qualifying_data(year=2024, num_races=3):
    """
    Get qualifying performance data
    """
    try:
        schedule = fastf1.get_event_schedule(year)
    except Exception as e:
        print(f"Error loading schedule for qualifying: {e}")
        return create_sample_qualifying_data()

    qualifying_data = []

    completed_races = schedule[schedule['EventDate'] < pd.Timestamp.now()]
    if len(completed_races) < num_races:
        num_races = len(completed_races)

    if num_races == 0:
        print("No completed races found for qualifying, using sample data")
        return create_sample_qualifying_data()

    last_races = completed_races.tail(num_races)

    for idx, race in last_races.iterrows():
        try:
            print(f"Loading qualifying data for {race['EventName']}...")
            session = fastf1.get_session(year, race['RoundNumber'], 'Q')
            session.load()

            qual_results = session.results
            qual_results['RaceName'] = race['EventName']
            qual_results['Round'] = race['RoundNumber']

            qualifying_data.append(qual_results)
            print(f"✓ Successfully loaded qualifying for {race['EventName']}")
        except Exception as e:
            print(f"✗ Error loading qualifying for {race['EventName']}: {e}")
            continue

    if qualifying_data:
        return pd.concat(qualifying_data, ignore_index=True)
    else:
        print("No qualifying data could be loaded, using sample data")
        return create_sample_qualifying_data()

def create_sample_data():
    """
    Create sample race data for demonstration purposes
    """
    drivers = ['VER', 'LEC', 'NOR', 'HAM', 'RUS', 'SAI', 'PER', 'ALO', 'STR', 'GAS',
               'OCO', 'ALB', 'TSU', 'RIC', 'MAG', 'HUL', 'BOT', 'ZHO', 'SAR', 'COL']

    sample_data = []

    # Create 3 sample races
    for race_num in range(1, 4):
        positions = list(range(1, 21))
        np.random.shuffle(positions)

        for i, driver in enumerate(drivers):
            points_map = {1: 25, 2: 18, 3: 15, 4: 12, 5: 10, 6: 8, 7: 6, 8: 4, 9: 2, 10: 1}

            sample_data.append({
                'Abbreviation': driver,
                'Position': positions[i],
                'GridPosition': np.random.randint(1, 21),
                'Points': points_map.get(positions[i], 0),
                'Time': pd.Timedelta(seconds=np.random.randint(5400, 6000)) if positions[i] <= 15 else pd.NaT,
                'RaceName': f'Sample Race {race_num}',
                'Round': race_num
            })

    print("Created sample race data for demonstration")
    return pd.DataFrame(sample_data)

def create_sample_qualifying_data():
    """
    Create sample qualifying data
    """
    drivers = ['VER', 'LEC', 'NOR', 'HAM', 'RUS', 'SAI', 'PER', 'ALO', 'STR', 'GAS',
               'OCO', 'ALB', 'TSU', 'RIC', 'MAG', 'HUL', 'BOT', 'ZHO', 'SAR', 'COL']

    sample_data = []

    for race_num in range(1, 4):
        positions = list(range(1, 21))
        np.random.shuffle(positions)

        for i, driver in enumerate(drivers):
            sample_data.append({
                'Abbreviation': driver,
                'Position': positions[i],
                'Q1': pd.Timedelta(seconds=80 + np.random.random() * 3) if positions[i] <= 15 else pd.NaT,
                'Q2': pd.Timedelta(seconds=79 + np.random.random() * 2) if positions[i] <= 10 else pd.NaT,
                'Q3': pd.Timedelta(seconds=78 + np.random.random() * 1.5) if positions[i] <= 10 else pd.NaT,
                'RaceName': f'Sample Race {race_num}',
                'Round': race_num
            })

    print("Created sample qualifying data for demonstration")
    return pd.DataFrame(sample_data)

In [None]:
# Load recent race and qualifying data
print("="*50)
print("LOADING F1 DATA")
print("="*50)

print("Loading recent race data...")
try:
    race_data = get_recent_race_data(2024, 3)  # Reduced to 3 races for better reliability
    print(f"✓ Successfully loaded race data: {len(race_data)} records")
except Exception as e:
    print(f"✗ Error in race data loading: {e}")
    race_data = create_sample_data()

print("\nLoading qualifying data...")
try:
    qualifying_data = get_qualifying_data(2024, 3)  # Reduced to 3 races
    print(f"✓ Successfully loaded qualifying data: {len(qualifying_data)} records")
except Exception as e:
    print(f"✗ Error in qualifying data loading: {e}")
    qualifying_data = create_sample_qualifying_data()

# Display basic info
print(f"\n📊 DATA SUMMARY:")
print(f"Race results: {len(race_data)} entries")
print(f"Qualifying results: {len(qualifying_data)} entries")

# Display available columns
if not race_data.empty:
    print(f"\n📋 Race data columns available:")
    for col in race_data.columns:
        print(f"  • {col}")

if not qualifying_data.empty:
    print(f"\n📋 Qualifying data columns available:")
    for col in qualifying_data.columns:
        print(f"  • {col}")

# Show sample of data
if not race_data.empty:
    print(f"\n🏁 Sample Race Data:")
    display_cols = ['Abbreviation', 'Position', 'GridPosition', 'Points', 'RaceName']
    available_cols = [col for col in display_cols if col in race_data.columns]
    print(race_data[available_cols].head())

print("\n" + "="*50)
print("DATA LOADING COMPLETE")
print("="*50)

LOADING F1 DATA
Loading recent race data...
Schedule loaded for 2024
Loading data for Las Vegas Grand Prix...


core           INFO 	Loading data for Las Vegas Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Las Vegas Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core     

✓ Successfully loaded Las Vegas Grand Prix
Loading data for Qatar Grand Prix...


req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core           INFO 	Processing timing data...
INFO:fastf1.fastf1.core:Processing timing data...
req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '81', '63', '10', '55', '14', '24', '20', '4', '77', '44', '22', '30', '23', '27', '11', '18', '43', '31']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['1', '16', '81', '63', '10', '55', '14', '24', '20', '4', '77', '44', '22'

✓ Successfully loaded Qatar Grand Prix
Loading data for Abu Dhabi Grand Prix...


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '55', '16', '44', '63', '1', '10', '27', '14', '81', '23', '22', '24', '18', '61', '20', '30', '77', '43', '11']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['4', '55', '16', '44', '63', '1', '10', '27', '14', '81', '23', '22', '24', '18', '61', '20', '30', '77', '43', '11']
core           INFO 	Loading data for Las Vegas Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Las Vegas Grand Prix - Qualifying [v3.5.3]
req     

✓ Successfully loaded Abu Dhabi Grand Prix
✓ Successfully loaded race data: 60 records

Loading qualifying data...
Loading qualifying data for Las Vegas Grand Prix...


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '55', '10', '16', '1', '4', '22', '81', '27', '44', '31', '20', '24', '43', '30', '11', '14', '23', '77', '18']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['63', '55', '10', '16', '1', '4', '22', '81', '27', '44', '31', '20', '24', '43', '30', '11', '14', '23', '77', '18']
core           INFO 	Loading data for Qatar Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Qatar Grand Prix - Qualifying [v3.5.3]
req            I

✓ Successfully loaded qualifying for Las Vegas Grand Prix
Loading qualifying data for Qatar Grand Prix...


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '63', '4', '81', '16', '44', '55', '14', '11', '20', '10', '24', '77', '22', '18', '23', '30', '27', '43', '31']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['1', '63', '4', '81', '16', '44', '55', '14', '11', '20', '10', '24', '77', '22', '18', '23', '30', '27', '43', '31']
core           INFO 	Loading data for Abu Dhabi Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Abu Dhabi Grand Prix - Qualifying [v3.5.3]
req     

✓ Successfully loaded qualifying for Qatar Grand Prix
Loading qualifying data for Abu Dhabi Grand Prix...


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '55', '27', '1', '10', '63', '14', '77', '11', '22', '30', '18', '16', '20', '23', '24', '44', '43', '61']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['4', '81', '55', '27', '1', '10', '63', '14', '77', '11', '22', '30', '18', '16', '20', '23', '24', '44', '43', '61']


✓ Successfully loaded qualifying for Abu Dhabi Grand Prix
✓ Successfully loaded qualifying data: 60 records

📊 DATA SUMMARY:
Race results: 60 entries
Qualifying results: 60 entries

📋 Race data columns available:
  • DriverNumber
  • BroadcastName
  • Abbreviation
  • DriverId
  • TeamName
  • TeamColor
  • TeamId
  • FirstName
  • LastName
  • FullName
  • HeadshotUrl
  • CountryCode
  • Position
  • ClassifiedPosition
  • GridPosition
  • Q1
  • Q2
  • Q3
  • Time
  • Status
  • Points
  • RaceName
  • Round

📋 Qualifying data columns available:
  • DriverNumber
  • BroadcastName
  • Abbreviation
  • DriverId
  • TeamName
  • TeamColor
  • TeamId
  • FirstName
  • LastName
  • FullName
  • HeadshotUrl
  • CountryCode
  • Position
  • ClassifiedPosition
  • GridPosition
  • Q1
  • Q2
  • Q3
  • Time
  • Status
  • Points
  • RaceName
  • Round

🏁 Sample Race Data:
  Abbreviation  Position  GridPosition  Points              RaceName
0          RUS       1.0           1.0    25.0  Las V

In [None]:
def create_driver_performance_metrics(race_data, qualifying_data):
    """
    Create comprehensive driver performance metrics
    """
    metrics = {}

    if not race_data.empty:
        # Race performance metrics
        race_metrics = race_data.groupby('Abbreviation').agg({
            'Position': ['mean', 'std', 'count'],
            'Points': ['sum', 'mean'],
            'GridPosition': 'mean',
            'Time': lambda x: x.count()  # Finished races
        }).round(2)

        race_metrics.columns = ['_'.join(col).strip() for col in race_metrics.columns]
        race_metrics = race_metrics.reset_index()

        for driver in race_metrics['Abbreviation']:
            driver_data = race_data[race_data['Abbreviation'] == driver]

            # Calculate consistency (lower std = more consistent)
            consistency_score = 1 / (1 + race_metrics[race_metrics['Abbreviation'] == driver]['Position_std'].iloc[0])

            # Calculate average finish position
            avg_position = race_metrics[race_metrics['Abbreviation'] == driver]['Position_mean'].iloc[0]

            # Points per race
            points_per_race = race_metrics[race_metrics['Abbreviation'] == driver]['Points_mean'].iloc[0]

            # Finishing rate
            total_races = race_metrics[race_metrics['Abbreviation'] == driver]['Position_count'].iloc[0]
            finishing_rate = race_metrics[race_metrics['Abbreviation'] == driver]['Time_<lambda>'].iloc[0] / total_races

            metrics[driver] = {
                'avg_finish_position': avg_position,
                'consistency_score': consistency_score,
                'points_per_race': points_per_race,
                'finishing_rate': finishing_rate,
                'total_points': race_metrics[race_metrics['Abbreviation'] == driver]['Points_sum'].iloc[0]
            }

    if not qualifying_data.empty:
        # Qualifying performance
        qual_metrics = qualifying_data.groupby('Abbreviation').agg({
            'Position': ['mean', 'std'],
            'Q1': lambda x: x.dropna().count(),
            'Q2': lambda x: x.dropna().count(),
            'Q3': lambda x: x.dropna().count()
        }).round(2)

        qual_metrics.columns = ['_'.join(col).strip() for col in qual_metrics.columns]
        qual_metrics = qual_metrics.reset_index()

        for driver in qual_metrics['Abbreviation']:
            if driver in metrics:
                metrics[driver]['avg_quali_position'] = qual_metrics[qual_metrics['Abbreviation'] == driver]['Position_mean'].iloc[0]
                metrics[driver]['quali_consistency'] = 1 / (1 + qual_metrics[qual_metrics['Abbreviation'] == driver]['Position_std'].iloc[0])
            else:
                avg_quali = qual_metrics[qual_metrics['Abbreviation'] == driver]['Position_mean'].iloc[0]
                quali_consistency = 1 / (1 + qual_metrics[qual_metrics['Abbreviation'] == driver]['Position_std'].iloc[0])

                metrics[driver] = {
                    'avg_finish_position': 10.0,  # Default values
                    'consistency_score': 0.5,
                    'points_per_race': 0.0,
                    'finishing_rate': 0.8,
                    'total_points': 0.0,
                    'avg_quali_position': avg_quali,
                    'quali_consistency': quali_consistency
                }

    return metrics

# Create performance metrics
driver_metrics = create_driver_performance_metrics(race_data, qualifying_data)

# Convert to DataFrame for easier analysis
metrics_df = pd.DataFrame.from_dict(driver_metrics, orient='index')
metrics_df = metrics_df.reset_index()
metrics_df = metrics_df.rename(columns={'index': 'Driver'})

print("Driver Performance Metrics:")
print(metrics_df.head(10))

Driver Performance Metrics:
  Driver  avg_finish_position  consistency_score  points_per_race  \
0    ALB                15.00           0.200000             0.00   
1    ALO                 9.00           0.333333             2.67   
2    BOT                15.67           0.198413             0.00   
3    COL                17.33           0.257069             0.00   
4    DOO                15.00                NaN             0.00   
5    GAS                10.67           0.109409             5.33   
6    HAM                 6.00           0.158983            10.00   
7    HUL                10.67           0.177936             2.67   
8    LAW                15.67           0.395257             0.00   
9    LEC                 3.00           0.500000            15.00   

   finishing_rate  total_points  avg_quali_position  quali_consistency  
0        0.666667           0.0               16.67           0.465116  
1        1.000000           8.0               11.00           0.16

In [None]:
def get_barcelona_historical_data():
    """
    Get historical performance at Barcelona-Catalunya
    """
    barcelona_data = []

    # Try to get data from previous years at Barcelona
    for year in [2023, 2022, 2021]:
        try:
            print(f"Loading Barcelona data for {year}...")
            session = fastf1.get_session(year, 'Spain', 'R')
            session.load()

            results = session.results
            results['Year'] = year
            barcelona_data.append(results)
        except Exception as e:
            print(f"Error loading Barcelona {year}: {e}")
            continue

    return pd.concat(barcelona_data, ignore_index=True) if barcelona_data else pd.DataFrame()

# Load Barcelona-specific data
barcelona_history = get_barcelona_historical_data()

if not barcelona_history.empty:
    print(f"\nLoaded Barcelona historical data: {len(barcelona_history)} results")

    # Analyze Barcelona performance
    barcelona_performance = barcelona_history.groupby('Abbreviation').agg({
        'Position': ['mean', 'count'],
        'Points': 'sum'
    }).round(2)

    barcelona_performance.columns = ['avg_position_barcelona', 'races_at_barcelona', 'total_points_barcelona']
    barcelona_performance = barcelona_performance.reset_index()

    print("\nBarcelona Historical Performance (Top 10):")
    print(barcelona_performance.sort_values('avg_position_barcelona').head(10))
else:
    print("No Barcelona historical data available")

Loading Barcelona data for 2023...


core           INFO 	Loading data for Spanish Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Spanish Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core         

Loading Barcelona data for 2022...


req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core           INFO 	Processing timing data...
INFO:fastf1.fastf1.core:Processing timing data...
req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '63', '55', '44', '77', '31', '4', '14', '22', '5', '3', '10', '47', '18', '6', '20', '23', '24', '16']
INFO:fas

Loading Barcelona data for 2021...


req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core           INFO 	Processing timing data...
INFO:fastf1.fastf1.core:Processing timing data...
req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: 


Loaded Barcelona historical data: 60 results

Barcelona Historical Performance (Top 10):
   Abbreviation  avg_position_barcelona  races_at_barcelona  \
24          VER                    1.33                   3   
6           HAM                    2.67                   3   
15          PER                    3.67                   3   
20          SAI                    5.33                   3   
19          RUS                    6.67                   3   
14          OCO                    8.00                   3   
18          RIC                    9.00                   2   
2           BOT                    9.33                   3   
22          STR                   10.67                   3   
4           GAS                   11.00                   3   

    total_points_barcelona  
24                    70.0  
6                     53.0  
15                    41.0  
20                    28.0  
19                    30.0  
14                    12.0  
18           

#  Setup Agentic AI System with CrewAI

In [None]:
!pip install langgraph langchain langchain-openai pandas fastf1

import os
import pandas as pd
from langgraph.graph import StateGraph, END
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
from typing import TypedDict, Optional
from datetime import datetime
import json

Collecting langgraph
  Downloading langgraph-0.4.7-py3-none-any.whl.metadata (6.8 kB)
Collecting langgraph-checkpoint>=2.0.26 (from langgraph)
  Downloading langgraph_checkpoint-2.0.26-py3-none-any.whl.metadata (4.6 kB)
Collecting langgraph-prebuilt>=0.2.0 (from langgraph)
  Downloading langgraph_prebuilt-0.2.2-py3-none-any.whl.metadata (4.5 kB)
Collecting langgraph-sdk>=0.1.42 (from langgraph)
  Downloading langgraph_sdk-0.1.70-py3-none-any.whl.metadata (1.5 kB)
Collecting ormsgpack<2.0.0,>=1.8.0 (from langgraph-checkpoint>=2.0.26->langgraph)
  Downloading ormsgpack-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Downloading langgraph-0.4.7-py3-none-any.whl (154 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.9/154.9 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langgraph_checkpoint-2.0.26-py3

In [None]:
# Set your OpenAI API key
OPENAI_API_KEY = "*******************"  # ⚠️ Replace with your actual API key
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

print("🤖 Setting up LangGraph AI System...")
print("📋 Creating F1 prediction workflow...")

# Initialize LLM
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)

🤖 Setting up LangGraph AI System...
📋 Creating F1 prediction workflow...


In [None]:
# Define state to track data and results
class F1PredictionState(TypedDict):
    metrics_df: Optional[pd.DataFrame]
    barcelona_performance: Optional[pd.DataFrame]
    performance_analysis: Optional[str]
    weather_analysis: Optional[str]
    circuit_analysis: Optional[str]
    team_dynamics: Optional[str]
    technical_assessment: Optional[str]
    final_prediction: Optional[str]
    error: Optional[str]

# Define tools
@tool
def analyze_driver_performance() -> str:
    """Analyze driver performance metrics for Spanish Grand Prix 2025 using FastF1 API data."""
    try:
        global metrics_df
        if not isinstance(metrics_df, pd.DataFrame):
            return "Error: metrics_df is not a valid DataFrame."
        if metrics_df.empty or 'points_per_race' not in metrics_df.columns:
            return "Driver performance data not available from FastF1 API. Ensure metrics_df is populated with points_per_race column."

        # Use list comprehension for top 5
        drivers = [
            (row['Driver'], row['points_per_race'], row['consistency_score'])
            for _, row in metrics_df.iterrows()
            if pd.notna(row['points_per_race'])
        ]
        drivers.sort(key=lambda x: x[1], reverse=True)
        top_performers = drivers[:5]

        if not top_performers:
            return "No valid driver performance data found in metrics_df."

        analysis = "Top 5 drivers by points per race for Spanish Grand Prix 2025:\n"
        for driver, points, consistency in top_performers:
            analysis += f"- {driver}: {points:.2f} points/race (Consistency: {consistency:.2f})\n"
        return analysis
    except Exception as e:
        return f"Error analyzing driver data: {str(e)}"

@tool
def get_weather_factor() -> str:
    """Provide typical weather conditions for Spanish Grand Prix 2025 based on Barcelona patterns."""
    try:
        analysis = (
            "Weather for Spanish Grand Prix 2025 (Barcelona, May):\n"
            "- Condition: Mostly clear with occasional clouds\n"
            "- Temperature: ~20-25°C\n"
            "- Impact: Clear conditions expected - favors aerodynamically efficient cars and consistent tire performance."
        )
        return analysis
    except Exception as e:
        return f"Error generating weather analysis: {str(e)}"

@tool
def analyze_historical_barcelona_data() -> str:
    """Analyze historical performance at Barcelona circuit using FastF1 API data."""
    try:
        global barcelona_performance
        if not isinstance(barcelona_performance, pd.DataFrame):
            return "Error: barcelona_performance is not a valid DataFrame."
        if barcelona_performance.empty or 'avg_position_barcelona' not in barcelona_performance.columns:
            return "Barcelona historical data not available from FastF1 API. Ensure barcelona_performance is populated with avg_position_barcelona column."

        # Use list comprehension for top 5
        drivers = [
            (row['Abbreviation'], row['avg_position_barcelona'], row['races_at_barcelona'])
            for _, row in barcelona_performance.iterrows()
            if pd.notna(row['avg_position_barcelona'])
        ]
        drivers.sort(key=lambda x: x[1])
        top_barcelona = drivers[:5]

        if not top_barcelona:
            return "No valid historical data found in barcelona_performance."

        analysis = "Top 5 historical performers at Barcelona:\n"
        for driver, avg_pos, races in top_barcelona:
            analysis += f"- {driver}: Avg position {avg_pos:.1f} (Races: {races})\n"
        return analysis
    except Exception as e:
        return f"Error analyzing historical data: {str(e)}"

# Define nodes
def performance_analysis_node(state: F1PredictionState) -> F1PredictionState:
    print("🔍 Running Performance Analysis...")
    try:
        result = analyze_driver_performance.invoke({})
        state["performance_analysis"] = result
    except Exception as e:
        state["error"] = f"Performance analysis failed: {str(e)}"
    return state

def weather_strategy_node(state: F1PredictionState) -> F1PredictionState:
    print("🌤️ Running Weather Strategy Analysis...")
    try:
        result = get_weather_factor.invoke({})
        state["weather_analysis"] = result
    except Exception as e:
        state["error"] = f"Weather strategy failed: {str(e)}"
    return state

def circuit_analysis_node(state: F1PredictionState) -> F1PredictionState:
    print("🏎️ Running Circuit Analysis...")
    try:
        result = analyze_historical_barcelona_data.invoke({})
        state["circuit_analysis"] = result
    except Exception as e:
        state["error"] = f"Circuit analysis failed: {str(e)}"
    return state

def team_dynamics_node(state: F1PredictionState) -> F1PredictionState:
    print("🤝 Running Team Dynamics Analysis...")
    try:
        prompt = (
            "Evaluate team and psychological factors for the Spanish Grand Prix 2025 based on current F1 season context:\n"
            "- Current team form and momentum\n"
            "- Driver confidence and recent performance trends\n"
            "- Inter-team competition dynamics\n"
            "- Pressure situations and driver responses\n"
            "- Team strategic capabilities\n"
            "Provide a concise analysis."
        )
        result = llm.invoke(prompt).content
        state["team_dynamics"] = result
    except Exception as e:
        state["error"] = f"Team dynamics analysis failed: {str(e)}"
    return state

def technical_assessment_node(state: F1PredictionState) -> F1PredictionState:
    print("🔧 Running Technical Assessment...")
    try:
        prompt = (
            "Assess technical and reliability factors for the Spanish Grand Prix 2025:\n"
            "- Car performance characteristics\n"
            "- Reliability concerns and mechanical factors\n"
            "- Aerodynamic advantages at Barcelona\n"
            "- Engine performance and power unit reliability\n"
            "- Technical updates and their impact\n"
            "Provide a concise analysis."
        )
        result = llm.invoke(prompt).content
        state["technical_assessment"] = result
    except Exception as e:
        state["error"] = f"Technical assessment failed: {str(e)}"
    return state

def final_prediction_node(state: F1PredictionState) -> F1PredictionState:
    print("🏁 Generating Final Prediction...")
    try:
        prompt = (
            "Synthesize the following analyses to create a final race prediction for the Spanish Grand Prix 2025:\n"
            f"1. Performance Analysis:\n{state.get('performance_analysis', 'No data')}\n"
            f"2. Weather Analysis:\n{state.get('weather_analysis', 'No data')}\n"
            f"3. Circuit Analysis:\n{state.get('circuit_analysis', 'No data')}\n"
            f"4. Team Dynamics:\n{state.get('team_dynamics', 'No data')}\n"
            f"5. Technical Assessment:\n{state.get('technical_assessment', 'No data')}\n"
            "Provide:\n"
            "- A top-20 finishing order prediction\n"
            "- Podium predictions with confidence levels\n"
            "- Key factors influencing the outcome\n"
            "Format as a concise report."
        )
        result = llm.invoke(prompt).content
        state["final_prediction"] = result
    except Exception as e:
        state["error"] = f"Final prediction failed: {str(e)}"
    return state

# Create the graph
workflow = StateGraph(F1PredictionState)

# Add nodes
workflow.add_node("performance_analysis_node", performance_analysis_node)
workflow.add_node("weather_strategy_node", weather_strategy_node)
workflow.add_node("circuit_analysis_node", circuit_analysis_node)
workflow.add_node("team_dynamics_node", team_dynamics_node)
workflow.add_node("technical_assessment_node", technical_assessment_node)
workflow.add_node("final_prediction_node", final_prediction_node)

# Define edges (sequential execution)
workflow.set_entry_point("performance_analysis_node")
workflow.add_edge("performance_analysis_node", "weather_strategy_node")
workflow.add_edge("weather_strategy_node", "circuit_analysis_node")
workflow.add_edge("circuit_analysis_node", "team_dynamics_node")
workflow.add_edge("team_dynamics_node", "technical_assessment_node")
workflow.add_edge("technical_assessment_node", "final_prediction_node")
workflow.add_edge("final_prediction_node", END)

# Compile the graph
graph = workflow.compile()

print("✅ LangGraph workflow created successfully!")
print("🏁 Ready for Spanish Grand Prix 2025 analysis")


✅ LangGraph workflow created successfully!
🏁 Ready for Spanish Grand Prix 2025 analysis


In [None]:
import pandas as pd
import json
from datetime import datetime
import traceback

print("🚀 Starting LangGraph AI Analysis...")
print("=" * 60)
print(f"SPANISH GRAND PRIX 2025 - AI PREDICTION ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})")
print("=" * 60)

try:
    # Verify FastF1 data
    print("\n📊 Checking FastF1 Data...")
    if not isinstance(metrics_df, pd.DataFrame) or metrics_df.empty or 'points_per_race' not in metrics_df.columns:
        raise ValueError("metrics_df is empty or missing points_per_race column. Check FastF1 API data loading in second cell.")
    if not isinstance(barcelona_performance, pd.DataFrame) or barcelona_performance.empty or 'avg_position_barcelona' not in barcelona_performance.columns:
        print("⚠️ Warning: barcelona_performance is empty or missing avg_position_barcelona column. Historical analysis may be limited.")
    print(f"✓ metrics_df: {len(metrics_df)} rows, columns: {list(metrics_df.columns)}")
    print(f"✓ barcelona_performance: {len(barcelona_performance)} rows, columns: {list(barcelona_performance.columns)}")

    # Initialize state with FastF1 data
    initial_state = {
        "metrics_df": metrics_df,
        "barcelona_performance": barcelona_performance,
        "performance_analysis": None,
        "weather_analysis": None,
        "circuit_analysis": None,
        "team_dynamics": None,
        "technical_assessment": None,
        "final_prediction": None,
        "error": None
    }

    # Execute the workflow
    print("\n🏎️ Executing LangGraph Workflow...")
    final_state = graph.invoke(initial_state)

    # Check for errors
    if final_state.get("error"):
        raise RuntimeError(f"Workflow error: {final_state['error']}")

    # Display individual node results
    print("\n" + "=" * 60)
    print("📋 INDIVIDUAL ANALYSIS RESULTS")
    print("=" * 60)

    analyses = [
        ("Performance Analysis", final_state.get("performance_analysis")),
        ("Weather Strategy", final_state.get("weather_analysis")),
        ("Circuit Analysis", final_state.get("circuit_analysis")),
        ("Team Dynamics", final_state.get("team_dynamics")),
        ("Technical Assessment", final_state.get("technical_assessment"))
    ]

    for name, result in analyses:
        print(f"\n🔍 {name}:")
        print("-" * 40)
        print(result if result else "No output generated")
        print("-" * 40)

    # Display final prediction
    print("\n" + "=" * 60)
    print("🏁 FINAL RACE PREDICTION")
    print("=" * 60)
    final_prediction = final_state.get("final_prediction", "No prediction generated")
    print(final_prediction)

    # Extract and highlight the winner
    winner = "Unknown"
    if final_prediction:
        lines = final_prediction.split("\n")
        for line in lines:
            if line.strip().startswith("1."):
                winner = line.strip().split(":")[0].replace("1.", "").strip()
                break
    print(f"\n🥇 Predicted Winner: {winner}")

    # Save results to JSON
    results_dict = {
        "timestamp": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        "event": "Spanish Grand Prix 2025",
        "metrics_df_columns": list(metrics_df.columns) if isinstance(metrics_df, pd.DataFrame) else [],
        "barcelona_performance_columns": list(barcelona_performance.columns) if isinstance(barcelona_performance, pd.DataFrame) else [],
        "analyses": {name: result for name, result in analyses},
        "final_prediction": final_prediction,
        "predicted_winner": winner
    }
    with open("spanish_gp_2025_results.json", "w") as f:
        json.dump(results_dict, f, indent=4)
    print("\n💾 Results saved to 'spanish_gp_2025_results.json'")

except Exception as e:
    print(f"❌ Error in AI analysis: {str(e)}")
    print("\n📝 Detailed Error Traceback:")
    print("-" * 40)
    print(traceback.format_exc())
    print("-" * 40)
    print("📝 Debugging Information:")
    print("- Verify FastF1 API data (metrics_df and barcelona_performance) is loaded correctly in the second cell.")
    print("- Ensure OpenAI API key is valid and has sufficient quota.")
    print("- Check LangGraph workflow in third cell for node definitions.")

    # Provide fallback analysis
    print("\n🔄 Providing Fallback Analysis...")
    fallback_prediction = f"FALLBACK AI ANALYSIS - Spanish Grand Prix 2025 ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})\n\n"

    if isinstance(metrics_df, pd.DataFrame) and not metrics_df.empty and 'points_per_race' in metrics_df.columns:
        drivers = [
            (row['Driver'], row['points_per_race'], row['consistency_score'])
            for _, row in metrics_df.iterrows()
            if pd.notna(row['points_per_race'])
        ]
        drivers.sort(key=lambda x: x[1], reverse=True)
        top_drivers = drivers[:5]
        if top_drivers:
            fallback_prediction += "🥇 TOP DRIVERS (based on FastF1 data):\n"
            for i, (driver, points, consistency) in enumerate(top_drivers, 1):
                fallback_prediction += f"{i}. {driver}: {points:.2f} points/race (Consistency: {consistency:.2f})\n"
            winner = top_drivers[0][0]
    else:
        fallback_prediction += (
            "🥇 TOP DRIVERS (generic, due to missing data):\n"
            "1. VER: Strong historical form at Barcelona\n"
            "2. LEC: Consistent performer\n"
            "3. NOR: Competitive in recent races\n"
            "4. HAM: Experienced at Barcelona\n"
            "5. SAI: Strong home race potential\n"
        )
        winner = "VER"

    if isinstance(barcelona_performance, pd.DataFrame) and not barcelona_performance.empty and 'avg_position_barcelona' in barcelona_performance.columns:
        drivers = [
            (row['Abbreviation'], row['avg_position_barcelona'], row['races_at_barcelona'])
            for _, row in barcelona_performance.iterrows()
            if pd.notna(row['avg_position_barcelona'])
        ]
        drivers.sort(key=lambda x: x[1])
        top_barcelona = drivers[:5]
        if top_barcelona:
            fallback_prediction += "\n📊 HISTORICAL PERFORMANCE AT BARCELONA:\n"
            for driver, avg_pos, races in top_barcelona:
                fallback_prediction += f"- {driver}: Avg position {avg_pos:.1f} (Races: {races})\n"

    fallback_prediction += (
        "\n🔍 KEY FACTORS:\n"
        "- Weather: Mostly clear (typical for May in Barcelona), favoring aerodynamic efficiency\n"
        "- Tire strategy: Critical due to Barcelona's high tire wear\n"
        "- Circuit: Limited overtaking, grid position is key\n"
        "- Historical performance: Favors experienced drivers with strong car setups\n"
        f"\n🥇 Predicted Winner (Fallback): {winner}"
    )

    print(fallback_prediction)

    # Save fallback results to JSON
    fallback_dict = {
        "timestamp": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        "event": "Spanish Grand Prix 2025",
        "status": "Fallback due to error",
        "error": str(e),
        "traceback": traceback.format_exc(),
        "metrics_df_columns": list(metrics_df.columns) if isinstance(metrics_df, pd.DataFrame) else [],
        "barcelona_performance_columns": list(barcelona_performance.columns) if isinstance(barcelona_performance, pd.DataFrame) else [],
        "fallback_analysis": fallback_prediction,
        "predicted_winner": winner
    }
    with open("spanish_gp_2025_fallback.json", "w") as f:
        json.dump(fallback_dict, f, indent=4)
    print("\n💾 Fallback results saved to 'spanish_gp_2025_fallback.json'")

print("\n✅ AI Analysis Complete!")

🚀 Starting LangGraph AI Analysis...
SPANISH GRAND PRIX 2025 - AI PREDICTION (2025-05-29 02:08:32)

📊 Checking FastF1 Data...
✓ metrics_df: 21 rows, columns: ['Driver', 'avg_finish_position', 'consistency_score', 'points_per_race', 'finishing_rate', 'total_points', 'avg_quali_position', 'quali_consistency']
✓ barcelona_performance: 27 rows, columns: ['Abbreviation', 'avg_position_barcelona', 'races_at_barcelona', 'total_points_barcelona']

🏎️ Executing LangGraph Workflow...
🔍 Running Performance Analysis...
🌤️ Running Weather Strategy Analysis...
🏎️ Running Circuit Analysis...
🤝 Running Team Dynamics Analysis...
🔧 Running Technical Assessment...
🏁 Generating Final Prediction...

📋 INDIVIDUAL ANALYSIS RESULTS

🔍 Performance Analysis:
----------------------------------------
Top 5 drivers by points per race for Spanish Grand Prix 2025:
- RUS: 15.67 points/race (Consistency: 0.32)
- LEC: 15.00 points/race (Consistency: 0.50)
- VER: 14.33 points/race (Consistency: 0.27)
- SAI: 13.67 points/

In [None]:
# F1 Spanish Grand Prix 2025 - Interactive Race Simulation

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import random
from datetime import datetime
import time

# Install required packages (run this first in Colab)
# !pip install plotly kaleido

# Set random seed for reproducible results
random.seed(42)
np.random.seed(42)

class F1RaceSimulator:
    def __init__(self):
        # Driver data based on AI prediction
        self.drivers = {
            'Max Verstappen': {'team': 'Red Bull', 'color': '#0600EF', 'skill': 0.95, 'consistency': 0.9},
            'Lewis Hamilton': {'team': 'Mercedes', 'color': '#00D2BE', 'skill': 0.93, 'consistency': 0.88},
            'George Russell': {'team': 'Mercedes', 'color': '#00D2BE', 'skill': 0.89, 'consistency': 0.85},
            'Charles Leclerc': {'team': 'Ferrari', 'color': '#DC143C', 'skill': 0.91, 'consistency': 0.82},
            'Carlos Sainz': {'team': 'Ferrari', 'color': '#DC143C', 'skill': 0.88, 'consistency': 0.84},
            'Lando Norris': {'team': 'McLaren', 'color': '#FF8700', 'skill': 0.86, 'consistency': 0.83},
            'Sergio Pérez': {'team': 'Red Bull', 'color': '#0600EF', 'skill': 0.84, 'consistency': 0.79},
            'Oscar Piastri': {'team': 'McLaren', 'color': '#FF8700', 'skill': 0.82, 'consistency': 0.81},
            'Fernando Alonso': {'team': 'Aston Martin', 'color': '#006F62', 'skill': 0.85, 'consistency': 0.86},
            'Lance Stroll': {'team': 'Aston Martin', 'color': '#006F62', 'skill': 0.75, 'consistency': 0.77}
        }

        self.total_laps = 66  # Spanish GP lap count
        self.race_data = []

    def simulate_race(self):
        """Simulate the complete race with realistic position changes"""
        print("🏎️ Simulating Spanish Grand Prix 2025...")

        # Initialize starting positions (based on AI prediction)
        positions = list(self.drivers.keys())

        # Store lap-by-lap data
        for lap in range(self.total_laps + 1):
            lap_data = {
                'lap': lap,
                'positions': positions.copy(),
                'times': self._generate_lap_times(positions, lap)
            }
            self.race_data.append(lap_data)

            # Simulate position changes
            if lap < self.total_laps:
                positions = self._simulate_lap_changes(positions, lap)

        print("✅ Race simulation complete!")
        return self.race_data

    def _generate_lap_times(self, positions, lap):
        """Generate realistic lap times for each driver"""
        times = {}
        base_time = 78.0  # Base lap time in seconds

        for i, driver in enumerate(positions):
            driver_data = self.drivers[driver]

            # Add position-based time penalty
            position_penalty = i * 0.1

            # Add skill-based variation
            skill_factor = (1 - driver_data['skill']) * 2

            # Add consistency variation
            consistency_var = np.random.normal(0, 1 - driver_data['consistency'])

            # Tire degradation effect
            tire_deg = lap * 0.01

            lap_time = base_time + position_penalty + skill_factor + consistency_var + tire_deg
            times[driver] = round(lap_time, 3)

        return times

    def _simulate_lap_changes(self, positions, lap):
        """Simulate realistic position changes during the race"""
        new_positions = positions.copy()

        # Pit stop windows (laps 15-25 and 35-45)
        if 15 <= lap <= 25 or 35 <= lap <= 45:
            # Simulate pit stops
            if random.random() < 0.3:  # 30% chance of pit stop
                pit_driver_idx = random.randint(0, min(5, len(positions)-1))
                pit_driver = new_positions.pop(pit_driver_idx)
                # Driver loses positions during pit stop
                new_position = min(len(new_positions), pit_driver_idx + random.randint(2, 4))
                new_positions.insert(new_position, pit_driver)

        # Regular overtaking (more likely in middle of race)
        overtake_probability = 0.15 if 10 <= lap <= 50 else 0.08

        if random.random() < overtake_probability:
            # Random overtake between adjacent positions
            pos_idx = random.randint(1, len(new_positions) - 1)
            if pos_idx < len(new_positions):
                # Swap positions
                new_positions[pos_idx], new_positions[pos_idx-1] = new_positions[pos_idx-1], new_positions[pos_idx]

        return new_positions

    def create_position_chart(self):
        """Create an interactive position progression chart"""
        # Prepare data for plotting
        laps = []
        drivers_data = {driver: [] for driver in self.drivers.keys()}

        for lap_data in self.race_data:
            laps.append(lap_data['lap'])
            positions = lap_data['positions']

            for driver in self.drivers.keys():
                if driver in positions:
                    pos = positions.index(driver) + 1
                else:
                    pos = len(positions)
                drivers_data[driver].append(pos)

        # Create the plot
        fig = go.Figure()

        for driver, data in drivers_data.items():
            if driver in list(self.drivers.keys())[:10]:  # Top 10 drivers
                fig.add_trace(go.Scatter(
                    x=laps,
                    y=data,
                    mode='lines+markers',
                    name=driver,
                    line=dict(color=self.drivers[driver]['color'], width=3),
                    marker=dict(size=6),
                    hovertemplate=f'<b>{driver}</b><br>Lap: %{{x}}<br>Position: %{{y}}<extra></extra>'
                ))

        # Customize layout
        fig.update_layout(
            title={
                'text': '🏁 Spanish Grand Prix 2025 - Race Position Progression<br><sub>AI-Powered Simulation Results</sub>',
                'x': 0.5,
                'font': {'size': 24, 'color': '#FF1E00'}
            },
            xaxis_title='Lap',
            yaxis_title='Position',
            yaxis=dict(
                autorange='reversed',
                dtick=1,
                gridcolor='rgba(128, 128, 128, 0.3)'
            ),
            xaxis=dict(
                dtick=5,
                gridcolor='rgba(128, 128, 128, 0.3)'
            ),
            plot_bgcolor='rgba(0, 0, 0, 0.9)',
            paper_bgcolor='rgba(0, 0, 0, 0.95)',
            font=dict(color='white', family='Arial'),
            legend=dict(
                orientation="v",
                yanchor="top",
                y=1,
                xanchor="left",
                x=1.02,
                bgcolor='rgba(0, 0, 0, 0.8)',
                bordercolor='rgba(255, 255, 255, 0.2)',
                borderwidth=1
            ),
            width=1200,
            height=700,
            margin=dict(r=150)
        )

        # Add race phases annotations
        fig.add_vrect(x0=15, x1=25, fillcolor="yellow", opacity=0.1,
                     annotation_text="Pit Window 1", annotation_position="top left")
        fig.add_vrect(x0=35, x1=45, fillcolor="yellow", opacity=0.1,
                     annotation_text="Pit Window 2", annotation_position="top left")

        return fig

    def create_final_results_chart(self):
        """Create a final results bar chart"""
        final_positions = self.race_data[-1]['positions'][:10]

        # Create bar chart
        fig = go.Figure(data=[
            go.Bar(
                x=[f"{i+1}. {driver}" for i, driver in enumerate(final_positions)],
                y=list(range(10, 0, -1)),
                marker_color=[self.drivers[driver]['color'] for driver in final_positions],
                text=[f"P{i+1}" for i in range(10)],
                textposition='auto',
                hovertemplate='<b>%{x}</b><br>Final Position: P%{customdata}<extra></extra>',
                customdata=[i+1 for i in range(10)]
            )
        ])

        fig.update_layout(
            title={
                'text': '🏆 Spanish Grand Prix 2025 - Final Results<br><sub>Top 10 Finishers</sub>',
                'x': 0.5,
                'font': {'size': 20, 'color': '#FF1E00'}
            },
            xaxis_title='Driver',
            yaxis_title='Points Value',
            plot_bgcolor='rgba(0, 0, 0, 0.9)',
            paper_bgcolor='rgba(0, 0, 0, 0.95)',
            font=dict(color='white', family='Arial'),
            width=1000,
            height=500,
            xaxis=dict(tickangle=45)
        )

        return fig

    def create_lap_time_analysis(self):
        """Create lap time analysis chart"""
        # Get lap times for top 5 drivers
        top_5_drivers = list(self.drivers.keys())[:5]

        fig = go.Figure()

        for driver in top_5_drivers:
            lap_times = []
            laps = []

            for lap_data in self.race_data[1:]:  # Skip lap 0
                if driver in lap_data['times']:
                    lap_times.append(lap_data['times'][driver])
                    laps.append(lap_data['lap'])

            fig.add_trace(go.Scatter(
                x=laps,
                y=lap_times,
                mode='lines',
                name=driver,
                line=dict(color=self.drivers[driver]['color'], width=2),
                hovertemplate=f'<b>{driver}</b><br>Lap: %{{x}}<br>Time: %{{y:.3f}}s<extra></extra>'
            ))

        fig.update_layout(
            title={
                'text': '⏱️ Lap Time Analysis - Top 5 Drivers<br><sub>Performance Throughout the Race</sub>',
                'x': 0.5,
                'font': {'size': 18, 'color': '#FF1E00'}
            },
            xaxis_title='Lap',
            yaxis_title='Lap Time (seconds)',
            plot_bgcolor='rgba(0, 0, 0, 0.9)',
            paper_bgcolor='rgba(0, 0, 0, 0.95)',
            font=dict(color='white', family='Arial'),
            width=1000,
            height=500,
            legend=dict(bgcolor='rgba(0, 0, 0, 0.8)')
        )

        return fig

# Initialize and run simulation
def run_f1_simulation():
    """Main function to run the complete F1 simulation"""
    print("🏎️ Starting F1 Spanish Grand Prix 2025 Simulation...")
    print("=" * 60)

    # Create simulator instance
    simulator = F1RaceSimulator()

    # Run the race simulation
    race_data = simulator.simulate_race()

    # Create visualizations
    print("\n📊 Creating interactive visualizations...")

    # Position progression chart
    position_fig = simulator.create_position_chart()
    position_fig.show()

    # Final results
    results_fig = simulator.create_final_results_chart()
    results_fig.show()

    # Lap time analysis
    laptime_fig = simulator.create_lap_time_analysis()
    laptime_fig.show()

    # Print final results
    final_positions = race_data[-1]['positions']
    print("\n🏁 FINAL RACE RESULTS:")
    print("=" * 40)
    for i, driver in enumerate(final_positions[:10]):
        print(f"P{i+1:2d}: {driver} ({simulator.drivers[driver]['team']})")

    print(f"\n🥇 Race Winner: {final_positions[0]}")
    print(f"🥈 Second Place: {final_positions[1]}")
    print(f"🥉 Third Place: {final_positions[2]}")

    return simulator, race_data

# Run the simulation
if __name__ == "__main__":
    simulator, race_data = run_f1_simulation()

# Additional analysis functions
def get_race_statistics(simulator, race_data):
    """Get detailed race statistics"""
    final_positions = race_data[-1]['positions']

    stats = {
        'winner': final_positions[0],
        'podium': final_positions[:3],
        'points_finishers': final_positions[:10],
        'total_laps': len(race_data) - 1,
        'race_duration': f"{len(race_data) * 1.3:.1f} minutes",
    }

    return stats

# Example usage for additional insights
print("\n📈 Additional Race Statistics:")
stats = get_race_statistics(simulator, race_data)
for key, value in stats.items():
    print(f"{key.replace('_', ' ').title()}: {value}")

print("\n🎯 Ready for your LinkedIn post! 🎯")

🏎️ Starting F1 Spanish Grand Prix 2025 Simulation...
🏎️ Simulating Spanish Grand Prix 2025...
✅ Race simulation complete!

📊 Creating interactive visualizations...



🏁 FINAL RACE RESULTS:
P 1: Max Verstappen (Red Bull)
P 2: Charles Leclerc (Ferrari)
P 3: Sergio Pérez (Red Bull)
P 4: Carlos Sainz (Ferrari)
P 5: George Russell (Mercedes)
P 6: Oscar Piastri (McLaren)
P 7: Fernando Alonso (Aston Martin)
P 8: Lando Norris (McLaren)
P 9: Lewis Hamilton (Mercedes)
P10: Lance Stroll (Aston Martin)

🥇 Race Winner: Max Verstappen
🥈 Second Place: Charles Leclerc
🥉 Third Place: Sergio Pérez

📈 Additional Race Statistics:
Winner: Max Verstappen
Podium: ['Max Verstappen', 'Charles Leclerc', 'Sergio Pérez']
Points Finishers: ['Max Verstappen', 'Charles Leclerc', 'Sergio Pérez', 'Carlos Sainz', 'George Russell', 'Oscar Piastri', 'Fernando Alonso', 'Lando Norris', 'Lewis Hamilton', 'Lance Stroll']
Total Laps: 66
Race Duration: 87.1 minutes

🎯 Ready for your LinkedIn post! 🎯


In [40]:
 !pip install -U kaleido


