In [9]:
# Race Performance Modeling & Driver Behavior Clustering in F1 Telemetry
# --------------------------------------------------------

In [10]:
# 1. Set Up Environment
import fastf1
from fastf1 import plotting
from fastf1.core import Laps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns   
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
import warnings
warnings.filterwarnings('ignore')

In [11]:
# Enable cache
fastf1.Cache.enable_cache('./cache')

In [12]:
# 2. Driver & Lap Selection
session = fastf1.get_session(2023, 'Monza', 'Q')
session.load()

core           INFO 	Loading data for Italian Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for car_data
req  

In [13]:
# Pick top 5 drivers based on fastest lap using user-provided session parameters
def get_user_session(year, event, session_type):
    s = fastf1.get_session(year, event, session_type)
    s.load()
    return s

# Get session and top drivers
user_session = get_user_session(2023, 'Monza', 'Q')

# Get top 5 drivers with valid lap times
valid_laps = user_session.laps.pick_quicklaps().dropna(subset=['LapTime'])
top5_drivers = valid_laps.groupby('Driver')['LapTime'].min().nsmallest(5).index.tolist()

# Get fastest lap for each driver
laps = {}
for driver in top5_drivers:
    driver_laps = user_session.laps.pick_drivers([driver]).pick_quicklaps()
    if not driver_laps.empty:
        fastest_lap = driver_laps.pick_fastest()
        laps[driver] = fastest_lap

print(f"Top 5 drivers: {top5_drivers}")

core           INFO 	Loading data for Italian Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for car_data
req  

Top 5 drivers: ['SAI', 'VER', 'LEC', 'RUS', 'PER']


In [14]:
# 3. Data Engineering (Car data extraction + derived metrics)
def extract_features(lap):
    try:
        tel = lap.get_car_data().add_distance()
        
        # Time calculations
        tel['Time_s'] = (tel['Time'] - tel['Time'].iloc[0]).dt.total_seconds()
        
        # Speed and acceleration
        tel['DeltaSpeed'] = tel['Speed'].diff().fillna(0)
        tel['DeltaTime'] = tel['Time_s'].diff().fillna(0.001)  # Avoid division by zero
        tel['Acceleration'] = tel['DeltaSpeed'] / tel['DeltaTime']
        tel['Acceleration'] = tel['Acceleration'].replace([np.inf, -np.inf], 0).fillna(0)
        
        # Calculate aggression index safely
        throttle_mean = tel['Throttle'].mean()
        brake_mean = tel['Brake'].mean()
        aggression_index = throttle_mean / (brake_mean + 1e-6) if brake_mean > 0 else throttle_mean
        
        # Extract features with proper null handling
        features = {
            'Speed_mean': tel['Speed'].mean(),
            'Speed_std': tel['Speed'].std(),
            'Throttle_mean': tel['Throttle'].mean(),
            'Throttle_std': tel['Throttle'].std(),
            'Brake_mean': tel['Brake'].mean(),
            'Brake_std': tel['Brake'].std(),
            'Steer_mean': abs(tel['Steer']).mean(),  # Use absolute steering
            'Steer_std': tel['Steer'].std(),
            'Gear_mean': tel['nGear'].mean(),
            'RPM_mean': tel['RPM'].mean(),
            'DRS_pct': tel['DRS'].mean(),
            'Acceleration_mean': tel['Acceleration'].mean(),
            'AggressionIndex': aggression_index,
            'LapTime': lap['LapTime'].total_seconds()
        }
        
        # Fill any NaN values
        for key, value in features.items():
            if pd.isna(value):
                features[key] = 0
                
        return features, tel
    
    except Exception as e:
        print(f"Error extracting features: {e}")
        return None, None

# Extract features for all drivers
driver_features = {}
driver_telemetry = {}

for driver in top5_drivers:
    if driver in laps:
        features, tel = extract_features(laps[driver])
        if features is not None:
            driver_features[driver] = features
            driver_telemetry[driver] = tel

# Create features dataframe
features_df = pd.DataFrame(driver_features).T
print(f"Features extracted for {len(features_df)} drivers")
print(features_df.head())

Error extracting features: 'Steer'
Error extracting features: 'Steer'
Error extracting features: 'Steer'
Error extracting features: 'Steer'
Error extracting features: 'Steer'
Features extracted for 0 drivers
Empty DataFrame
Columns: []
Index: []


In [15]:
# 4. Driving Style Clustering
if len(features_df) > 0:
    # Select features for clustering (exclude LapTime)
    cluster_features = ['Speed_mean', 'Throttle_mean', 'Brake_mean', 'Steer_mean', 
                       'Gear_mean', 'RPM_mean', 'DRS_pct', 'Acceleration_mean', 'AggressionIndex']
    
    # Normalize features
    scaler = StandardScaler()
    X = scaler.fit_transform(features_df[cluster_features])
    
    # KMeans clustering (adjust n_clusters based on data size)
    n_clusters = min(3, len(features_df))
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(X)
    features_df['Cluster'] = clusters
    
    # Cluster labels
    cluster_names = {0: 'Smooth', 1: 'Aggressive', 2: 'Balanced'}
    features_df['Style'] = features_df['Cluster'].map(cluster_names)
    
    print("Clustering completed:")
    print(features_df[['Style', 'Cluster']].head())

In [16]:
# 5. Lap Time Prediction Model
if len(features_df) > 2:  # Need at least 3 samples for training
    regress_features = ['Speed_mean', 'Throttle_mean', 'Brake_mean', 'Steer_mean', 
                       'Gear_mean', 'RPM_mean', 'DRS_pct', 'Acceleration_mean', 'AggressionIndex']
    
    # Prepare data for regression
    X_reg = features_df[regress_features]
    y_reg = features_df['LapTime']
    
    # Train model
    rf = RandomForestRegressor(n_estimators=50, random_state=42)  # Reduced estimators for small dataset
    rf.fit(X_reg, y_reg)
    
    # Predict
    lap_time_pred = rf.predict(X_reg)
    features_df['LapTime_pred'] = lap_time_pred
    
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': regress_features,
        'importance': rf.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("Feature importance:")
    print(feature_importance)
else:
    print("Not enough data for regression model")
    # Only set LapTime_pred if LapTime exists
    if 'LapTime' in features_df.columns:
        features_df['LapTime_pred'] = features_df['LapTime']
    else:
        print("LapTime column not found in features dataframe")
        features_df['LapTime_pred'] = 0  # Set default value

Not enough data for regression model
LapTime column not found in features dataframe


In [17]:
# 6. Visualization
if len(driver_telemetry) > 0:
    # Subplot: Speed, Throttle, Brake by distance
    fig, axs = plt.subplots(3, 1, figsize=(12, 10), sharex=True)
    
    colors = plt.cm.Set1(np.linspace(0, 1, len(top5_drivers)))
    
    for i, driver in enumerate(top5_drivers):
        if driver in driver_telemetry:
            tel = driver_telemetry[driver]
            color = colors[i]
            
            axs[0].plot(tel['Distance'], tel['Speed'], label=driver, color=color, alpha=0.8)
            axs[1].plot(tel['Distance'], tel['Throttle'], label=driver, color=color, alpha=0.8)
            axs[2].plot(tel['Distance'], tel['Brake'], label=driver, color=color, alpha=0.8)
    
    axs[0].set_ylabel('Speed [km/h]')
    axs[0].set_title('Speed Comparison')
    axs[0].grid(True, alpha=0.3)
    axs[0].legend()
    
    axs[1].set_ylabel('Throttle [%]')
    axs[1].set_title('Throttle Comparison')
    axs[1].grid(True, alpha=0.3)
    
    axs[2].set_ylabel('Brake [%]')
    axs[2].set_title('Brake Comparison')
    axs[2].set_xlabel('Distance [m]')
    axs[2].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Radar chart for driver comparison
    if len(features_df) > 0:
        categories = regress_features
        N = len(categories)
        angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
        angles += angles[:1]
        
        fig, ax = plt.subplots(figsize=(10, 8), subplot_kw=dict(projection='polar'))
        
        # Normalize data for radar chart
        radar_data = features_df[regress_features].copy()
        for col in radar_data.columns:
            radar_data[col] = (radar_data[col] - radar_data[col].min()) / (radar_data[col].max() - radar_data[col].min())
        
        for driver in radar_data.index:
            values = radar_data.loc[driver].tolist()
            values += values[:1]
            ax.plot(angles, values, 'o-', linewidth=2, label=driver, alpha=0.8)
            ax.fill(angles, values, alpha=0.1)
        
        ax.set_xticks(angles[:-1])
        ax.set_xticklabels(categories)
        ax.set_ylim(0, 1)
        ax.set_title('Driver Style Comparison (Normalized)', size=14, pad=20)
        ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))
        ax.grid(True)
        
        plt.tight_layout()
        plt.show()

In [18]:
# 7. Reporting (Textual)
print("="*60)
print("DRIVER PERFORMANCE AND STYLE ANALYSIS")
print("="*60)

for driver in top5_drivers:
    if driver in features_df.index:
        print(f"\nDriver: {driver}")
        print(f"  Lap Time: {features_df.loc[driver, 'LapTime']:.3f}s")
        if 'LapTime_pred' in features_df.columns:
            pred_time = features_df.loc[driver, 'LapTime_pred']
            actual_time = features_df.loc[driver, 'LapTime']
            error = abs(pred_time - actual_time)
            print(f"  Predicted Time: {pred_time:.3f}s (Error: {error:.3f}s)")
        
        if 'Style' in features_df.columns:
            print(f"  Driving Style: {features_df.loc[driver, 'Style']}")
        
        print(f"  Key Metrics:")
        print(f"    - Average Speed: {features_df.loc[driver, 'Speed_mean']:.1f} km/h")
        print(f"    - Throttle Usage: {features_df.loc[driver, 'Throttle_mean']:.1f}%")
        print(f"    - Brake Usage: {features_df.loc[driver, 'Brake_mean']:.1f}%")
        print(f"    - Aggression Index: {features_df.loc[driver, 'AggressionIndex']:.2f}")
        print("-" * 50)

# Summary statistics
if len(features_df) > 0:
    print(f"\nSUMMARY:")
    print(f"Fastest Lap: {features_df['LapTime'].min():.3f}s ({features_df['LapTime'].idxmin()})")
    print(f"Slowest Lap: {features_df['LapTime'].max():.3f}s ({features_df['LapTime'].idxmax()})")
    print(f"Average Lap Time: {features_df['LapTime'].mean():.3f}s")
    print(f"Time Spread: {features_df['LapTime'].max() - features_df['LapTime'].min():.3f}s")

DRIVER PERFORMANCE AND STYLE ANALYSIS


In [19]:
# Advanced Analysis Ideas:
print("NEXT STEPS FOR DEEPER ANALYSIS:")
print("1. Sector Analysis: Compare sector times to identify strengths/weaknesses")
print("2. Tire Compound Impact: Analyze performance by tire type")
print("3. Track Position: Study overtaking zones and defensive driving")
print("4. Weather Conditions: Factor in track temperature and conditions")
print("5. Setup Correlation: Link car setup to driving style preferences")

# Example: Quick sector analysis if data is available
try:
    sector_data = session.laps.pick_drivers(top5_drivers)[['Driver', 'Sector1Time', 'Sector2Time', 'Sector3Time']].dropna()
    if not sector_data.empty:
        print(f"\nSector Times Available for {len(sector_data)} laps")
        sector_summary = sector_data.groupby('Driver').mean()
        print(sector_summary)
except Exception as e:
    print(f"Sector analysis not available: {e}")

NEXT STEPS FOR DEEPER ANALYSIS:
1. Sector Analysis: Compare sector times to identify strengths/weaknesses
2. Tire Compound Impact: Analyze performance by tire type
3. Track Position: Study overtaking zones and defensive driving
4. Weather Conditions: Factor in track temperature and conditions
5. Setup Correlation: Link car setup to driving style preferences

Sector Times Available for 73 laps
                     Sector1Time               Sector2Time  \
Driver                                                       
LEC    0 days 00:00:33.836266666 0 days 00:00:31.662333333   
PER    0 days 00:00:31.119214285 0 days 00:00:31.199071428   
RUS    0 days 00:00:34.312428571 0 days 00:00:31.171785714   
SAI    0 days 00:00:37.824866666 0 days 00:00:32.021866666   
VER       0 days 00:00:35.181600    0 days 00:00:32.317600   

                     Sector3Time  
Driver                            
LEC    0 days 00:00:32.758533333  
PER       0 days 00:00:32.254000  
RUS    0 days 00:00:32.244571