In [45]:
import fastf1
import os
import pandas as pd
from matplotlib import pyplot as plt
from dotenv import load_dotenv
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from fuzzywuzzy import process
import numpy as np
from datetime import timedelta
from collections import defaultdict

# Get the base directory path
fastf1.Cache.enable_cache('../data')  # Cache in /data

In [46]:
# === CONFIG ===
DRY_CONDITION = "dry"
WET_CONDITION = "wet"
CURRENT_YEAR = 2025
MIN_RACES_REQUIRED = 2
MAX_POINTS = 25
MAX_GRID = 20
ALPHA = 0.7  # weight for normalized points
BETA = 0.3   # weight for normalized position delta
POINTS_TABLE = {1: 25, 2: 18, 3: 15, 4: 12, 5: 10, 6: 8, 7: 6, 8: 4, 9: 2, 10: 1}

In [None]:
# === Mocked functions for demo - Replace with real API data ===
def get_races():
    """Return list of races and their weather condition"""
    # Example: [{"year": 2025, "session_name": "British Grand Prix", "weather": "wet"}, {"year": 2025, "session_name": "Italian Grand Prix", "weather": "dry"}]
    return [
        {"year": 2025, "session_name": "Silverstone", "weather": "wet"},
        {"year": 2025, "session_name": "Australia", "weather": "wet"},
        {"year": 2025, "session_name": "Belgium", "weather": "wet"},
        {"year": 2024, "session_name": "Sao Paulo", "weather": "wet"},
        {"year": 2025, "session_name": "China", "weather": "dry"},
        {"year": 2025, "session_name": "Bahrain", "weather": "dry"},
        {"year": 2025, "session_name": "Miami", "weather": "dry"},
        {"year": 2025, "session_name": "Monaco", "weather": "dry"},
        {"year": 2025, "session_name": "Canada", "weather": "dry"},
        {"year": 2025, "session_name": "Austria", "weather": "dry"},
        {"year": 2025, "session_name": "Japan", "weather": "dry"}
        # Add more races as needed
    ]

def get_race_results(year: int, session_name: str = 'British Grand Prix'):
    """Return list of dicts with Driver and finishing_position"""
    # Example: [{"Driver": "VER", "position": 1}, {"Driver": "LEC", "position": 2}]
    if not isinstance(year, int) or year < 1950:
        raise ValueError("Year must be a valid integer (>=1950).")
    
    try:
        session = fastf1.get_session(year, session_name, 'R')
        session.load()
    except Exception as e:
        raise ValueError(f"Failed to load session data: {e}")
    # Return driver results as list of dicts
    results_df = session.results
    return [
        {
            "Driver": row['Abbreviation'], 
            "position": row['Position'],
            "grid_position": row['GridPosition'],
        }
        for _, row in results_df.iterrows()
    ]

In [None]:
# === Normalized Points + Gains ===
def calculate_performance_factors():
    driver_stats = defaultdict(lambda: {
        "wet_scores": [],
        "dry_scores": [],
        "wet_points": 0, 
        "wet_races": 0,
        "dry_points": 0, 
        "dry_races": 0
    })

    races = get_races()

    for race in races:
        year = race["year"]
        session_name = race["session_name"]
        weather = race["weather"]  # "wet" or "dry"
        results = get_race_results(year, session_name)

        for result in results:
            driver = result["Driver"]
            grid = result.get("grid_position")
            position = result.get("position")
            points = result.get("points", 0)

             # === Handle pit lane or missing grid values ===
            if grid is None or grid <= 0:
                grid = MAX_GRID  # assume back of the grid start

            if position is None or position <= 0:
                continue

            # Normalize
            norm_points = points / MAX_POINTS
            norm_gain = (grid - position) / (MAX_GRID - 1)

            hybrid_score = ALPHA * norm_points + BETA * norm_gain

            if weather == "wet":
                driver_stats[driver]["wet_scores"].append(hybrid_score)
                driver_stats[driver]["wet_points"] += points
                driver_stats[driver]["wet_races"] += 1
            elif weather == "dry":
                driver_stats[driver]["dry_scores"].append(hybrid_score)
                driver_stats[driver]["dry_points"] += points
                driver_stats[driver]["dry_races"] += 1

    # Compute averages
    wet_averages = {}
    dry_averages = {}
    wet_deltas = {}

    for driver, stats in driver_stats.items():
        wet_scores = stats["wet_scores"]
        dry_scores = stats["dry_scores"]

        wet_avg = sum(wet_scores) / len(wet_scores) if len(wet_scores) >= MIN_RACES_REQUIRED else None
        dry_avg = sum(dry_scores) / len(dry_scores) if len(dry_scores) >= MIN_RACES_REQUIRED else None

        wet_averages[driver] = wet_avg
        dry_averages[driver] = dry_avg

        if wet_avg is not None and dry_avg is not None:
            wet_deltas[driver] = wet_avg - dry_avg
        else:
            wet_deltas[driver] = None

    # Min-max scaling
    def min_max_scale(value, min_val, max_val):
        if value is None or max_val == min_val:
            return 0.5
        return round((value - min_val) / (max_val - min_val), 3)

    valid_deltas = [v for v in wet_deltas.values() if v is not None]
    valid_dry = [v for v in dry_averages.values() if v is not None]

    min_delta = min(valid_deltas) if valid_deltas else 0
    max_delta = max(valid_deltas) if valid_deltas else 0
    min_dry = min(valid_dry) if valid_dry else 0
    max_dry = max(valid_dry) if valid_dry else 0

    # Final output
    rows = []
    for driver, stats in driver_stats.items():
        wet_factor = min_max_scale(wet_deltas[driver], min_delta, max_delta)
        dry_factor = min_max_scale(dry_averages[driver], min_dry, max_dry)

        rows.append({
            "Driver": driver,
            "wet_performance_factor": wet_factor,
            "dry_performance_factor": dry_factor,
            "wet_points": stats["wet_points"],
            "dry_points": stats["dry_points"],
            "wet_races": stats["wet_races"],
            "dry_races": stats["dry_races"],
            "wet_deltas": wet_deltas[driver],
            "dry_averages": dry_averages[driver],
        })

    return pd.DataFrame(rows)

In [51]:
df = calculate_performance_factors()

core           INFO 	Loading data for British Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '27', '44', '1', '10', '18', '23', '14', '63', '87', '55', '31', '16', '22', '12', '6', '5', '30', '43']
core           INFO 	Loading data for Australian Grand Prix - 

In [54]:
df

Unnamed: 0,driver_id,wet_performance_factor,dry_performance_factor,wet_points,dry_points,wet_races,dry_races,wet_deltas,dry_averages
0,NOR,0.559,0.231,0,0,4,7,3.469447e-18,-0.015789
1,PIA,0.41,0.423,0,0,4,7,-0.03045113,0.006767
2,HUL,0.994,0.481,0,0,4,7,0.08909774,0.013534
3,HAM,0.873,0.288,0,0,4,7,0.06428571,-0.009023
4,VER,0.953,0.115,0,0,4,7,0.0806391,-0.029323
5,GAS,0.843,0.308,0,0,4,7,0.05808271,-0.006767
6,STR,0.586,0.654,0,0,4,7,0.005639098,0.033835
7,ALB,0.52,0.231,0,0,4,7,-0.007894737,-0.015789
8,ALO,0.528,0.115,0,0,4,7,-0.006203008,-0.029323
9,RUS,0.388,0.462,0,0,4,7,-0.03496241,0.011278


In [55]:
# save the dataframe to a CSV file
df.to_csv('../data/performance/wet_performance_factors.csv', index=False)