# PitWall Live - Driver Performance Analysis

This notebook develops comprehensive driver performance analysis tools:
1. Head-to-head driver comparisons
2. Teammate performance battles
3. Performance consistency metrics
4. Circuit-specific driver strength analysis
5. Season trend analysis
6. Driver skill decomposition (qualifying, race pace, tire management)

## Analysis Objectives
- Quantify driver performance across multiple dimensions
- Compare teammates to isolate driver skill from car performance
- Identify driver strengths and weaknesses
- Track performance trends over time

In [None]:
import os
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from tqdm.notebook import tqdm
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass
from scipy import stats

import fastf1
from fastf1 import get_session, get_event_schedule

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# Configure plotting
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['font.size'] = 11

# Enable FastF1 caching
CACHE_DIR = Path('../data/cache')
CACHE_DIR.mkdir(parents=True, exist_ok=True)
fastf1.Cache.enable_cache(str(CACHE_DIR))

print("Driver Performance Analysis - Setup Complete")

## 1. Load Driver Performance Data

In [None]:
@dataclass
class DriverRacePerformance:
    """Structured driver race performance data."""
    driver: str
    team: str
    season: int
    round_num: int
    event: str
    grid_position: int
    finish_position: int
    points: float
    positions_gained: int
    avg_lap_time: float
    best_lap_time: float
    lap_time_consistency: float
    dnf: bool
    quali_position: Optional[int] = None
    teammate: Optional[str] = None
    teammate_finish: Optional[int] = None


class DriverDataLoader:
    """Load comprehensive driver performance data."""
    
    def __init__(self, cache_dir: Path = CACHE_DIR):
        self.cache_dir = cache_dir
        
    def load_season_data(self, year: int, max_races: int = None) -> pd.DataFrame:
        """Load all driver performance data for a season."""
        schedule = get_event_schedule(year)
        race_events = schedule[schedule['EventFormat'] != 'testing']
        
        if max_races:
            race_events = race_events.head(max_races)
        
        all_data = []
        
        for _, event in tqdm(race_events.iterrows(), 
                             total=len(race_events), 
                             desc=f"Loading {year}"):
            try:
                race_data = self._load_race_performance(year, event)
                if len(race_data) > 0:
                    all_data.extend(race_data)
            except Exception as e:
                print(f"Error loading {event['EventName']}: {e}")
                continue
        
        return pd.DataFrame(all_data)
    
    def _load_race_performance(self, year: int, event: pd.Series) -> List[Dict]:
        """Load performance data for a single race."""
        # Load race session
        race_session = get_session(year, event['EventName'], 'R')
        race_session.load()
        
        results = race_session.results
        laps = race_session.laps
        
        # Try to load qualifying
        quali_results = None
        try:
            quali_session = get_session(year, event['EventName'], 'Q')
            quali_session.load()
            quali_results = quali_session.results[['Abbreviation', 'Position']]
            quali_results = quali_results.rename(columns={'Position': 'QualiPosition'})
        except:
            pass
        
        # Build team roster for teammate comparison
        team_drivers = results.groupby('TeamName')['Abbreviation'].apply(list).to_dict()
        
        race_data = []
        
        for _, driver_result in results.iterrows():
            driver = driver_result['Abbreviation']
            team = driver_result['TeamName']
            
            # Get driver's laps
            driver_laps = laps[laps['Driver'] == driver].copy()
            driver_laps['LapTimeSeconds'] = driver_laps['LapTime'].dt.total_seconds()
            
            # Filter valid laps
            valid_laps = driver_laps[
                (driver_laps['LapTimeSeconds'] > 60) &
                (driver_laps['LapTimeSeconds'] < 180) &
                (driver_laps['PitInTime'].isna()) &
                (driver_laps['PitOutTime'].isna())
            ]
            
            # Calculate metrics
            avg_lap_time = valid_laps['LapTimeSeconds'].mean() if len(valid_laps) > 0 else np.nan
            best_lap_time = valid_laps['LapTimeSeconds'].min() if len(valid_laps) > 0 else np.nan
            lap_consistency = valid_laps['LapTimeSeconds'].std() if len(valid_laps) > 3 else np.nan
            
            # Get teammate info
            teammates = team_drivers.get(team, [])
            teammate = [t for t in teammates if t != driver]
            teammate = teammate[0] if teammate else None
            teammate_finish = None
            
            if teammate:
                teammate_result = results[results['Abbreviation'] == teammate]
                if len(teammate_result) > 0:
                    teammate_finish = teammate_result['Position'].iloc[0]
            
            # Get quali position
            quali_pos = None
            if quali_results is not None:
                quali_row = quali_results[quali_results['Abbreviation'] == driver]
                if len(quali_row) > 0:
                    quali_pos = quali_row['QualiPosition'].iloc[0]
            
            # Determine DNF
            status = driver_result.get('Status', '')
            dnf = status not in ['Finished', '+1 Lap', '+2 Laps', '+3 Laps']
            
            race_data.append({
                'Driver': driver,
                'Team': team,
                'Season': year,
                'Round': event['RoundNumber'],
                'Event': event['EventName'],
                'Circuit': event['Location'],
                'GridPosition': driver_result['GridPosition'],
                'FinishPosition': driver_result['Position'],
                'Points': driver_result['Points'],
                'PositionsGained': driver_result['GridPosition'] - driver_result['Position'],
                'AvgLapTime': avg_lap_time,
                'BestLapTime': best_lap_time,
                'LapTimeConsistency': lap_consistency,
                'DNF': dnf,
                'QualiPosition': quali_pos,
                'Teammate': teammate,
                'TeammateFinish': teammate_finish,
                'Status': status
            })
        
        return race_data


# Initialize loader
driver_loader = DriverDataLoader()

In [None]:
# Load 2023 season data (limited for demo)
print("Loading driver performance data...")
performance_df = driver_loader.load_season_data(2023, max_races=10)
print(f"\nLoaded {len(performance_df)} driver-race entries")
print(f"Unique drivers: {performance_df['Driver'].nunique()}")
print(f"Races covered: {performance_df['Event'].nunique()}")

In [None]:
# Preview data
performance_df.head(10)

## 2. Driver Performance Metrics Calculator

In [None]:
class DriverMetricsCalculator:
    """Calculate comprehensive driver performance metrics."""
    
    def __init__(self):
        self.metrics = {}
        
    def calculate_season_metrics(self, df: pd.DataFrame) -> pd.DataFrame:
        """Calculate aggregated season metrics for each driver."""
        metrics = []
        
        for driver in df['Driver'].unique():
            driver_data = df[df['Driver'] == driver]
            
            # Skip if insufficient data
            if len(driver_data) < 3:
                continue
            
            # Race metrics
            races = len(driver_data)
            wins = (driver_data['FinishPosition'] == 1).sum()
            podiums = (driver_data['FinishPosition'] <= 3).sum()
            points_finishes = (driver_data['FinishPosition'] <= 10).sum()
            total_points = driver_data['Points'].sum()
            avg_finish = driver_data[~driver_data['DNF']]['FinishPosition'].mean()
            
            # Qualifying metrics
            quali_data = driver_data[driver_data['QualiPosition'].notna()]
            avg_quali = quali_data['QualiPosition'].mean() if len(quali_data) > 0 else np.nan
            poles = (quali_data['QualiPosition'] == 1).sum() if len(quali_data) > 0 else 0
            
            # Race pace metrics
            pace_data = driver_data[driver_data['AvgLapTime'].notna()]
            avg_race_pace = pace_data['AvgLapTime'].mean() if len(pace_data) > 0 else np.nan
            consistency = pace_data['LapTimeConsistency'].mean() if len(pace_data) > 0 else np.nan
            
            # Positions gained/lost
            avg_positions_gained = driver_data['PositionsGained'].mean()
            
            # First lap performance
            first_lap_gain = (driver_data['GridPosition'] - driver_data['FinishPosition']).mean()
            
            # Teammate comparison
            teammate_races = driver_data[driver_data['TeammateFinish'].notna()]
            if len(teammate_races) > 0:
                beat_teammate = (teammate_races['FinishPosition'] < teammate_races['TeammateFinish']).mean()
            else:
                beat_teammate = np.nan
            
            # DNF rate
            dnf_rate = driver_data['DNF'].mean()
            
            metrics.append({
                'Driver': driver,
                'Team': driver_data['Team'].iloc[0],
                'Races': races,
                'Wins': wins,
                'WinRate': wins / races,
                'Podiums': podiums,
                'PodiumRate': podiums / races,
                'PointsFinishes': points_finishes,
                'TotalPoints': total_points,
                'PointsPerRace': total_points / races,
                'AvgFinishPosition': avg_finish,
                'AvgQualiPosition': avg_quali,
                'Poles': poles,
                'AvgRacePace': avg_race_pace,
                'Consistency': consistency,
                'AvgPositionsGained': avg_positions_gained,
                'BeatTeammateRate': beat_teammate,
                'DNFRate': dnf_rate
            })
        
        return pd.DataFrame(metrics).sort_values('TotalPoints', ascending=False)
    
    def calculate_skill_scores(self, df: pd.DataFrame) -> pd.DataFrame:
        """Calculate normalized skill scores across dimensions."""
        metrics_df = self.calculate_season_metrics(df)
        
        if len(metrics_df) == 0:
            return pd.DataFrame()
        
        scaler = MinMaxScaler()
        
        # Qualifying skill (lower is better, so invert)
        if metrics_df['AvgQualiPosition'].notna().sum() > 0:
            quali_normalized = 1 - scaler.fit_transform(
                metrics_df[['AvgQualiPosition']].fillna(metrics_df['AvgQualiPosition'].max())
            )
            metrics_df['QualiSkill'] = quali_normalized.flatten() * 100
        else:
            metrics_df['QualiSkill'] = 50
        
        # Race pace skill (lower is better, so invert)
        if metrics_df['AvgRacePace'].notna().sum() > 0:
            pace_normalized = 1 - scaler.fit_transform(
                metrics_df[['AvgRacePace']].fillna(metrics_df['AvgRacePace'].max())
            )
            metrics_df['RacePaceSkill'] = pace_normalized.flatten() * 100
        else:
            metrics_df['RacePaceSkill'] = 50
        
        # Consistency skill (lower std is better, so invert)
        if metrics_df['Consistency'].notna().sum() > 0:
            consistency_normalized = 1 - scaler.fit_transform(
                metrics_df[['Consistency']].fillna(metrics_df['Consistency'].max())
            )
            metrics_df['ConsistencySkill'] = consistency_normalized.flatten() * 100
        else:
            metrics_df['ConsistencySkill'] = 50
        
        # Racecraft skill (positions gained)
        racecraft = scaler.fit_transform(
            metrics_df[['AvgPositionsGained']].fillna(0)
        )
        metrics_df['RacecraftSkill'] = racecraft.flatten() * 100
        
        # Overall score
        metrics_df['OverallSkill'] = (
            metrics_df['QualiSkill'] * 0.25 +
            metrics_df['RacePaceSkill'] * 0.30 +
            metrics_df['ConsistencySkill'] * 0.20 +
            metrics_df['RacecraftSkill'] * 0.25
        )
        
        return metrics_df


# Initialize calculator
metrics_calc = DriverMetricsCalculator()

In [None]:
# Calculate season metrics
season_metrics = metrics_calc.calculate_season_metrics(performance_df)
print("Season Metrics:")
season_metrics[['Driver', 'Team', 'Races', 'Wins', 'Podiums', 'TotalPoints', 
                'AvgFinishPosition', 'AvgQualiPosition', 'BeatTeammateRate']].head(15)

In [None]:
# Calculate skill scores
skill_scores = metrics_calc.calculate_skill_scores(performance_df)
print("Driver Skill Scores:")
skill_scores[['Driver', 'Team', 'QualiSkill', 'RacePaceSkill', 'ConsistencySkill', 
              'RacecraftSkill', 'OverallSkill']].sort_values('OverallSkill', ascending=False).head(15)

## 3. Teammate Comparison Analysis

In [None]:
class TeammateAnalyzer:
    """Analyze head-to-head teammate performance."""
    
    def __init__(self):
        pass
    
    def calculate_teammate_battles(self, df: pd.DataFrame) -> pd.DataFrame:
        """Calculate teammate head-to-head statistics."""
        battles = []
        
        # Get unique teams
        for team in df['Team'].unique():
            team_data = df[df['Team'] == team]
            drivers = team_data['Driver'].unique()
            
            if len(drivers) < 2:
                continue
            
            # Analyze each pair
            for i, driver1 in enumerate(drivers):
                for driver2 in drivers[i+1:]:
                    battle = self._analyze_pair(team_data, driver1, driver2)
                    if battle:
                        battle['Team'] = team
                        battles.append(battle)
        
        return pd.DataFrame(battles)
    
    def _analyze_pair(self, team_data: pd.DataFrame, driver1: str, driver2: str) -> Optional[Dict]:
        """Analyze head-to-head between two teammates."""
        d1_data = team_data[team_data['Driver'] == driver1]
        d2_data = team_data[team_data['Driver'] == driver2]
        
        # Find common races
        common_races = set(d1_data['Event']) & set(d2_data['Event'])
        
        if len(common_races) < 3:
            return None
        
        # Analyze each common race
        quali_wins_d1 = 0
        race_wins_d1 = 0
        total_quali_gap = 0
        total_race_gap = 0
        valid_quali = 0
        valid_race = 0
        
        for race in common_races:
            d1_race = d1_data[d1_data['Event'] == race].iloc[0]
            d2_race = d2_data[d2_data['Event'] == race].iloc[0]
            
            # Qualifying comparison
            if pd.notna(d1_race['QualiPosition']) and pd.notna(d2_race['QualiPosition']):
                if d1_race['QualiPosition'] < d2_race['QualiPosition']:
                    quali_wins_d1 += 1
                total_quali_gap += d2_race['QualiPosition'] - d1_race['QualiPosition']
                valid_quali += 1
            
            # Race comparison (excluding DNFs)
            if not d1_race['DNF'] and not d2_race['DNF']:
                if d1_race['FinishPosition'] < d2_race['FinishPosition']:
                    race_wins_d1 += 1
                total_race_gap += d2_race['FinishPosition'] - d1_race['FinishPosition']
                valid_race += 1
        
        if valid_quali == 0 and valid_race == 0:
            return None
        
        return {
            'Driver1': driver1,
            'Driver2': driver2,
            'CommonRaces': len(common_races),
            'QualiH2H': f"{quali_wins_d1}-{valid_quali - quali_wins_d1}" if valid_quali > 0 else 'N/A',
            'RaceH2H': f"{race_wins_d1}-{valid_race - race_wins_d1}" if valid_race > 0 else 'N/A',
            'D1QualiWinRate': quali_wins_d1 / valid_quali if valid_quali > 0 else np.nan,
            'D1RaceWinRate': race_wins_d1 / valid_race if valid_race > 0 else np.nan,
            'AvgQualiGap': total_quali_gap / valid_quali if valid_quali > 0 else np.nan,
            'AvgRaceGap': total_race_gap / valid_race if valid_race > 0 else np.nan
        }
    
    def plot_teammate_battle(self, df: pd.DataFrame, team: str):
        """Visualize teammate battle over the season."""
        team_data = df[df['Team'] == team].copy()
        drivers = team_data['Driver'].unique()
        
        if len(drivers) < 2:
            print(f"Not enough drivers for {team}")
            return
        
        driver1, driver2 = drivers[0], drivers[1]
        
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        
        # Race results over season
        for driver, color in zip([driver1, driver2], ['steelblue', 'coral']):
            driver_data = team_data[team_data['Driver'] == driver].sort_values('Round')
            axes[0, 0].plot(driver_data['Round'], driver_data['FinishPosition'], 
                           marker='o', label=driver, color=color)
        
        axes[0, 0].set_xlabel('Round')
        axes[0, 0].set_ylabel('Finish Position')
        axes[0, 0].set_title(f'{team} - Race Results')
        axes[0, 0].legend()
        axes[0, 0].invert_yaxis()
        
        # Qualifying positions
        for driver, color in zip([driver1, driver2], ['steelblue', 'coral']):
            driver_data = team_data[team_data['Driver'] == driver].sort_values('Round')
            quali_data = driver_data[driver_data['QualiPosition'].notna()]
            axes[0, 1].plot(quali_data['Round'], quali_data['QualiPosition'], 
                           marker='s', label=driver, color=color)
        
        axes[0, 1].set_xlabel('Round')
        axes[0, 1].set_ylabel('Qualifying Position')
        axes[0, 1].set_title(f'{team} - Qualifying Results')
        axes[0, 1].legend()
        axes[0, 1].invert_yaxis()
        
        # Points accumulation
        for driver, color in zip([driver1, driver2], ['steelblue', 'coral']):
            driver_data = team_data[team_data['Driver'] == driver].sort_values('Round')
            cumulative_points = driver_data['Points'].cumsum()
            axes[1, 0].plot(driver_data['Round'], cumulative_points, 
                           marker='o', label=driver, color=color)
        
        axes[1, 0].set_xlabel('Round')
        axes[1, 0].set_ylabel('Cumulative Points')
        axes[1, 0].set_title(f'{team} - Points Progression')
        axes[1, 0].legend()
        
        # Lap time comparison (bar chart)
        avg_pace = team_data.groupby('Driver')['AvgLapTime'].mean()
        colors = ['steelblue' if d == driver1 else 'coral' for d in avg_pace.index]
        axes[1, 1].bar(avg_pace.index, avg_pace.values, color=colors)
        axes[1, 1].set_ylabel('Average Lap Time (s)')
        axes[1, 1].set_title(f'{team} - Average Race Pace')
        
        plt.tight_layout()
        plt.show()


# Initialize analyzer
teammate_analyzer = TeammateAnalyzer()

In [None]:
# Calculate teammate battles
teammate_battles = teammate_analyzer.calculate_teammate_battles(performance_df)
print("Teammate Head-to-Head Battles:")
teammate_battles

In [None]:
# Plot a teammate battle (pick a team with data)
if len(performance_df) > 0:
    sample_team = performance_df['Team'].value_counts().index[0]
    teammate_analyzer.plot_teammate_battle(performance_df, sample_team)

## 4. Circuit-Specific Performance Analysis

In [None]:
class CircuitPerformanceAnalyzer:
    """Analyze driver performance by circuit type and characteristics."""
    
    # Circuit classifications
    CIRCUIT_TYPES = {
        'Street': ['Monaco', 'Singapore', 'Jeddah', 'Baku', 'Las Vegas', 'Melbourne'],
        'High-Speed': ['Monza', 'Spa', 'Silverstone', 'Bahrain', 'Jeddah'],
        'Technical': ['Monaco', 'Hungary', 'Singapore', 'Zandvoort'],
        'High-Downforce': ['Monaco', 'Hungary', 'Singapore', 'Barcelona'],
        'Low-Downforce': ['Monza', 'Spa', 'Baku']
    }
    
    def __init__(self):
        pass
    
    def analyze_circuit_performance(self, df: pd.DataFrame, driver: str) -> pd.DataFrame:
        """Analyze driver's performance across different circuits."""
        driver_data = df[df['Driver'] == driver]
        
        circuit_stats = driver_data.groupby('Circuit').agg({
            'FinishPosition': ['mean', 'min', 'count'],
            'QualiPosition': 'mean',
            'Points': 'sum',
            'PositionsGained': 'mean'
        }).round(2)
        
        circuit_stats.columns = ['AvgFinish', 'BestFinish', 'Races', 'AvgQuali', 'TotalPoints', 'AvgPosGained']
        
        return circuit_stats.sort_values('AvgFinish')
    
    def analyze_circuit_type_performance(self, df: pd.DataFrame) -> pd.DataFrame:
        """Analyze performance by circuit type for all drivers."""
        results = []
        
        for driver in df['Driver'].unique():
            driver_data = df[df['Driver'] == driver]
            
            for circuit_type, circuits in self.CIRCUIT_TYPES.items():
                type_data = driver_data[driver_data['Circuit'].isin(circuits)]
                
                if len(type_data) >= 1:
                    results.append({
                        'Driver': driver,
                        'CircuitType': circuit_type,
                        'Races': len(type_data),
                        'AvgFinish': type_data['FinishPosition'].mean(),
                        'AvgQuali': type_data['QualiPosition'].mean(),
                        'Points': type_data['Points'].sum()
                    })
        
        return pd.DataFrame(results)
    
    def find_specialist_drivers(self, df: pd.DataFrame, circuit_type: str) -> pd.DataFrame:
        """Find drivers who excel at specific circuit types."""
        circuits = self.CIRCUIT_TYPES.get(circuit_type, [])
        type_data = df[df['Circuit'].isin(circuits)]
        
        if len(type_data) == 0:
            return pd.DataFrame()
        
        specialist_stats = type_data.groupby('Driver').agg({
            'FinishPosition': 'mean',
            'QualiPosition': 'mean',
            'Points': 'sum',
            'Event': 'count'
        }).reset_index()
        
        specialist_stats.columns = ['Driver', 'AvgFinish', 'AvgQuali', 'TotalPoints', 'Races']
        
        # Only include drivers with multiple races at this type
        specialist_stats = specialist_stats[specialist_stats['Races'] >= 1]
        
        return specialist_stats.sort_values('AvgFinish')


# Initialize analyzer
circuit_analyzer = CircuitPerformanceAnalyzer()

In [None]:
# Analyze circuit type performance
if len(performance_df) > 0:
    # Pick a driver with data
    sample_driver = performance_df['Driver'].value_counts().index[0]
    circuit_perf = circuit_analyzer.analyze_circuit_performance(performance_df, sample_driver)
    print(f"Circuit Performance for {sample_driver}:")
    circuit_perf

In [None]:
# Find specialists for high-speed circuits
high_speed_specialists = circuit_analyzer.find_specialist_drivers(performance_df, 'High-Speed')
print("High-Speed Circuit Specialists:")
high_speed_specialists.head(10)

## 5. Driver Performance Radar Chart

In [None]:
def create_driver_radar(skill_df: pd.DataFrame, drivers: List[str], title: str = "Driver Skill Comparison"):
    """Create a radar chart comparing driver skills."""
    categories = ['QualiSkill', 'RacePaceSkill', 'ConsistencySkill', 'RacecraftSkill']
    category_labels = ['Qualifying', 'Race Pace', 'Consistency', 'Racecraft']
    
    # Number of variables
    N = len(categories)
    
    # Compute angle for each axis
    angles = [n / float(N) * 2 * np.pi for n in range(N)]
    angles += angles[:1]  # Complete the loop
    
    # Initialize figure
    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))
    
    # Colors for different drivers
    colors = plt.cm.Set1(np.linspace(0, 1, len(drivers)))
    
    for i, driver in enumerate(drivers):
        driver_data = skill_df[skill_df['Driver'] == driver]
        
        if len(driver_data) == 0:
            continue
        
        values = driver_data[categories].values.flatten().tolist()
        values += values[:1]  # Complete the loop
        
        ax.plot(angles, values, 'o-', linewidth=2, label=driver, color=colors[i])
        ax.fill(angles, values, alpha=0.15, color=colors[i])
    
    # Set category labels
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(category_labels, size=12)
    
    # Set radial limits
    ax.set_ylim(0, 100)
    
    # Add legend
    ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))
    
    plt.title(title, size=14, y=1.08)
    plt.tight_layout()
    plt.show()


# Create radar chart for top drivers
if len(skill_scores) > 0:
    top_drivers = skill_scores.nlargest(5, 'OverallSkill')['Driver'].tolist()
    create_driver_radar(skill_scores, top_drivers, "Top 5 Drivers - Skill Comparison")

## 6. Performance Trend Analysis

In [None]:
class PerformanceTrendAnalyzer:
    """Analyze driver performance trends over time."""
    
    def __init__(self):
        pass
    
    def calculate_rolling_performance(self, 
                                      df: pd.DataFrame, 
                                      driver: str, 
                                      window: int = 5) -> pd.DataFrame:
        """Calculate rolling performance metrics for a driver."""
        driver_data = df[df['Driver'] == driver].sort_values(['Season', 'Round'])
        
        if len(driver_data) < window:
            return driver_data
        
        driver_data = driver_data.copy()
        
        # Rolling metrics
        driver_data['RollingAvgFinish'] = (
            driver_data['FinishPosition']
            .rolling(window=window, min_periods=1)
            .mean()
        )
        
        driver_data['RollingPoints'] = (
            driver_data['Points']
            .rolling(window=window, min_periods=1)
            .sum()
        )
        
        driver_data['RollingAvgPace'] = (
            driver_data['AvgLapTime']
            .rolling(window=window, min_periods=1)
            .mean()
        )
        
        return driver_data
    
    def detect_performance_shifts(self, 
                                  df: pd.DataFrame, 
                                  driver: str,
                                  threshold: float = 2.0) -> List[Dict]:
        """Detect significant shifts in driver performance."""
        driver_data = df[df['Driver'] == driver].sort_values(['Season', 'Round'])
        
        if len(driver_data) < 5:
            return []
        
        shifts = []
        
        # Calculate short and long-term averages
        driver_data = driver_data.copy()
        driver_data['ShortTermAvg'] = driver_data['FinishPosition'].rolling(3, min_periods=1).mean()
        driver_data['LongTermAvg'] = driver_data['FinishPosition'].rolling(7, min_periods=3).mean()
        
        for i in range(3, len(driver_data)):
            row = driver_data.iloc[i]
            prev_row = driver_data.iloc[i-1]
            
            # Detect improvement
            if row['ShortTermAvg'] < row['LongTermAvg'] - threshold:
                shifts.append({
                    'Round': row['Round'],
                    'Event': row['Event'],
                    'Type': 'IMPROVEMENT',
                    'ShortTermAvg': row['ShortTermAvg'],
                    'LongTermAvg': row['LongTermAvg']
                })
            # Detect decline
            elif row['ShortTermAvg'] > row['LongTermAvg'] + threshold:
                shifts.append({
                    'Round': row['Round'],
                    'Event': row['Event'],
                    'Type': 'DECLINE',
                    'ShortTermAvg': row['ShortTermAvg'],
                    'LongTermAvg': row['LongTermAvg']
                })
        
        return shifts
    
    def plot_performance_trend(self, df: pd.DataFrame, driver: str):
        """Plot driver's performance trend over the season."""
        driver_data = self.calculate_rolling_performance(df, driver)
        
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        
        # Finish position trend
        axes[0, 0].plot(driver_data['Round'], driver_data['FinishPosition'], 
                        'o-', alpha=0.5, label='Actual')
        axes[0, 0].plot(driver_data['Round'], driver_data['RollingAvgFinish'], 
                        '-', linewidth=2, label='5-Race Avg')
        axes[0, 0].set_xlabel('Round')
        axes[0, 0].set_ylabel('Finish Position')
        axes[0, 0].set_title(f'{driver} - Finish Position Trend')
        axes[0, 0].invert_yaxis()
        axes[0, 0].legend()
        
        # Points accumulation
        cumulative_points = driver_data['Points'].cumsum()
        axes[0, 1].fill_between(driver_data['Round'], 0, cumulative_points, alpha=0.3)
        axes[0, 1].plot(driver_data['Round'], cumulative_points, 'o-')
        axes[0, 1].set_xlabel('Round')
        axes[0, 1].set_ylabel('Cumulative Points')
        axes[0, 1].set_title(f'{driver} - Points Progression')
        
        # Qualifying vs Race performance
        quali_data = driver_data[driver_data['QualiPosition'].notna()]
        axes[1, 0].scatter(quali_data['QualiPosition'], quali_data['FinishPosition'], 
                          c=quali_data['Round'], cmap='viridis', s=100)
        axes[1, 0].plot([1, 20], [1, 20], 'r--', alpha=0.5, label='Equal')
        axes[1, 0].set_xlabel('Qualifying Position')
        axes[1, 0].set_ylabel('Finish Position')
        axes[1, 0].set_title(f'{driver} - Quali vs Race')
        axes[1, 0].legend()
        
        # Positions gained distribution
        axes[1, 1].hist(driver_data['PositionsGained'], bins=15, edgecolor='black')
        axes[1, 1].axvline(x=0, color='red', linestyle='--')
        axes[1, 1].axvline(x=driver_data['PositionsGained'].mean(), color='green', 
                          linestyle='-', label=f'Avg: {driver_data["PositionsGained"].mean():.1f}')
        axes[1, 1].set_xlabel('Positions Gained/Lost')
        axes[1, 1].set_ylabel('Frequency')
        axes[1, 1].set_title(f'{driver} - Positions Gained Distribution')
        axes[1, 1].legend()
        
        plt.tight_layout()
        plt.show()


# Initialize analyzer
trend_analyzer = PerformanceTrendAnalyzer()

In [None]:
# Plot performance trend for a top driver
if len(performance_df) > 0:
    top_driver = performance_df.groupby('Driver')['Points'].sum().idxmax()
    trend_analyzer.plot_performance_trend(performance_df, top_driver)

## 7. Driver Comparison Tool

In [None]:
class DriverComparisonTool:
    """Tool for comprehensive driver-to-driver comparison."""
    
    def __init__(self):
        pass
    
    def compare_drivers(self, 
                       df: pd.DataFrame, 
                       driver1: str, 
                       driver2: str) -> Dict:
        """Generate comprehensive comparison between two drivers."""
        d1_data = df[df['Driver'] == driver1]
        d2_data = df[df['Driver'] == driver2]
        
        # Common races for fair comparison
        common_events = set(d1_data['Event']) & set(d2_data['Event'])
        
        d1_common = d1_data[d1_data['Event'].isin(common_events)]
        d2_common = d2_data[d2_data['Event'].isin(common_events)]
        
        comparison = {
            'CommonRaces': len(common_events),
            driver1: {
                'TotalPoints': d1_common['Points'].sum(),
                'AvgFinish': d1_common['FinishPosition'].mean(),
                'AvgQuali': d1_common['QualiPosition'].mean(),
                'Wins': (d1_common['FinishPosition'] == 1).sum(),
                'Podiums': (d1_common['FinishPosition'] <= 3).sum(),
                'DNFs': d1_common['DNF'].sum(),
                'AvgPosGained': d1_common['PositionsGained'].mean()
            },
            driver2: {
                'TotalPoints': d2_common['Points'].sum(),
                'AvgFinish': d2_common['FinishPosition'].mean(),
                'AvgQuali': d2_common['QualiPosition'].mean(),
                'Wins': (d2_common['FinishPosition'] == 1).sum(),
                'Podiums': (d2_common['FinishPosition'] <= 3).sum(),
                'DNFs': d2_common['DNF'].sum(),
                'AvgPosGained': d2_common['PositionsGained'].mean()
            }
        }
        
        # Head-to-head
        h2h_quali = 0
        h2h_race = 0
        
        for event in common_events:
            d1_event = d1_common[d1_common['Event'] == event].iloc[0]
            d2_event = d2_common[d2_common['Event'] == event].iloc[0]
            
            if pd.notna(d1_event['QualiPosition']) and pd.notna(d2_event['QualiPosition']):
                if d1_event['QualiPosition'] < d2_event['QualiPosition']:
                    h2h_quali += 1
            
            if not d1_event['DNF'] and not d2_event['DNF']:
                if d1_event['FinishPosition'] < d2_event['FinishPosition']:
                    h2h_race += 1
        
        comparison['HeadToHead'] = {
            'Qualifying': f"{h2h_quali} - {len(common_events) - h2h_quali}",
            'Race': f"{h2h_race} - {len(common_events) - h2h_race}"
        }
        
        return comparison
    
    def plot_comparison(self, 
                       df: pd.DataFrame, 
                       driver1: str, 
                       driver2: str):
        """Visualize driver comparison."""
        d1_data = df[df['Driver'] == driver1].sort_values(['Season', 'Round'])
        d2_data = df[df['Driver'] == driver2].sort_values(['Season', 'Round'])
        
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        
        # Points comparison
        metrics = ['Points', 'AvgFinish', 'AvgQuali', 'AvgPosGained']
        d1_values = [
            d1_data['Points'].sum(),
            d1_data['FinishPosition'].mean(),
            d1_data['QualiPosition'].mean(),
            d1_data['PositionsGained'].mean()
        ]
        d2_values = [
            d2_data['Points'].sum(),
            d2_data['FinishPosition'].mean(),
            d2_data['QualiPosition'].mean(),
            d2_data['PositionsGained'].mean()
        ]
        
        x = np.arange(len(metrics))
        width = 0.35
        
        axes[0, 0].bar(x - width/2, d1_values, width, label=driver1, color='steelblue')
        axes[0, 0].bar(x + width/2, d2_values, width, label=driver2, color='coral')
        axes[0, 0].set_xticks(x)
        axes[0, 0].set_xticklabels(['Total Points', 'Avg Finish', 'Avg Quali', 'Avg Pos Gained'])
        axes[0, 0].set_title('Key Metrics Comparison')
        axes[0, 0].legend()
        
        # Season progression
        d1_cumsum = d1_data['Points'].cumsum()
        d2_cumsum = d2_data['Points'].cumsum()
        
        axes[0, 1].plot(range(len(d1_cumsum)), d1_cumsum, 'o-', label=driver1, color='steelblue')
        axes[0, 1].plot(range(len(d2_cumsum)), d2_cumsum, 'o-', label=driver2, color='coral')
        axes[0, 1].set_xlabel('Race')
        axes[0, 1].set_ylabel('Cumulative Points')
        axes[0, 1].set_title('Points Progression')
        axes[0, 1].legend()
        
        # Finish position distribution
        axes[1, 0].hist(d1_data['FinishPosition'], bins=20, alpha=0.5, label=driver1, color='steelblue')
        axes[1, 0].hist(d2_data['FinishPosition'], bins=20, alpha=0.5, label=driver2, color='coral')
        axes[1, 0].set_xlabel('Finish Position')
        axes[1, 0].set_ylabel('Frequency')
        axes[1, 0].set_title('Finish Position Distribution')
        axes[1, 0].legend()
        
        # Qualifying position distribution
        d1_quali = d1_data[d1_data['QualiPosition'].notna()]['QualiPosition']
        d2_quali = d2_data[d2_data['QualiPosition'].notna()]['QualiPosition']
        
        axes[1, 1].hist(d1_quali, bins=20, alpha=0.5, label=driver1, color='steelblue')
        axes[1, 1].hist(d2_quali, bins=20, alpha=0.5, label=driver2, color='coral')
        axes[1, 1].set_xlabel('Qualifying Position')
        axes[1, 1].set_ylabel('Frequency')
        axes[1, 1].set_title('Qualifying Position Distribution')
        axes[1, 1].legend()
        
        plt.tight_layout()
        plt.show()


# Initialize comparison tool
comparison_tool = DriverComparisonTool()

In [None]:
# Compare two drivers
if len(performance_df) > 0:
    top_2_drivers = performance_df.groupby('Driver')['Points'].sum().nlargest(2).index.tolist()
    
    if len(top_2_drivers) >= 2:
        driver1, driver2 = top_2_drivers[0], top_2_drivers[1]
        
        print(f"Comparing {driver1} vs {driver2}:")
        comparison = comparison_tool.compare_drivers(performance_df, driver1, driver2)
        
        print(f"\nCommon races: {comparison['CommonRaces']}")
        print(f"\nHead-to-Head:")
        print(f"  Qualifying: {comparison['HeadToHead']['Qualifying']}")
        print(f"  Race: {comparison['HeadToHead']['Race']}")
        
        print(f"\n{driver1}:")
        for k, v in comparison[driver1].items():
            print(f"  {k}: {v:.2f}" if isinstance(v, float) else f"  {k}: {v}")
        
        print(f"\n{driver2}:")
        for k, v in comparison[driver2].items():
            print(f"  {k}: {v:.2f}" if isinstance(v, float) else f"  {k}: {v}")
        
        # Plot comparison
        comparison_tool.plot_comparison(performance_df, driver1, driver2)

## 8. Save Analysis Results

In [None]:
import json

# Create output directory
output_dir = Path('../data/processed/driver_analysis')
output_dir.mkdir(parents=True, exist_ok=True)

# Save season metrics
if len(season_metrics) > 0:
    season_metrics.to_csv(output_dir / 'season_metrics.csv', index=False)
    season_metrics.to_parquet(output_dir / 'season_metrics.parquet', index=False)

# Save skill scores
if len(skill_scores) > 0:
    skill_scores.to_csv(output_dir / 'skill_scores.csv', index=False)
    skill_scores.to_parquet(output_dir / 'skill_scores.parquet', index=False)

# Save teammate battles
if len(teammate_battles) > 0:
    teammate_battles.to_csv(output_dir / 'teammate_battles.csv', index=False)

# Save raw performance data
if len(performance_df) > 0:
    performance_df.to_parquet(output_dir / 'performance_data.parquet', index=False)

print(f"Analysis results saved to {output_dir}")

## Summary

### Analysis Components Developed:

1. **DriverDataLoader**: Loads comprehensive performance data from FastF1
2. **DriverMetricsCalculator**: Calculates season-level metrics and skill scores
3. **TeammateAnalyzer**: Head-to-head teammate comparison
4. **CircuitPerformanceAnalyzer**: Circuit-specific performance analysis
5. **PerformanceTrendAnalyzer**: Trend detection and rolling metrics
6. **DriverComparisonTool**: Comprehensive driver-to-driver comparison

### Key Metrics Tracked:
- **Qualifying**: Average position, poles, Q3 rate
- **Race Pace**: Average lap time, best lap, consistency
- **Results**: Wins, podiums, points, positions gained
- **Reliability**: DNF rate, mechanical failures
- **Skill Scores**: Normalized 0-100 scores across dimensions

### Visualization Features:
- Radar charts for skill comparison
- Season progression plots
- Teammate battle visualizations
- Distribution histograms

### Integration Points:
- Live race commentary generation
- Pre-race predictions
- Post-race analysis reports