In [1]:
# Race Performance Modeling & Driver Behavior Clustering in F1 Telemetry
# --------------------------------------------------------

In [2]:
# 1. Set Up Environment
import fastf1
from fastf1 import plotting
from fastf1.core import Laps
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor

In [4]:
# Enable cache
fastf1.Cache.enable_cache('./cache')

In [5]:
# 2. Driver & Lap Selection
session = fastf1.get_session(2023, 'Monza', 'Q')
session.load()

core           INFO 	Loading data for Italian Grand Prix - Qualifying [v3.6.1]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 

In [6]:
# Pick top 5 drivers based on fastest lap using user-provided session parameters
# Accepts: year (int), event (str), session_type (str) as per FastF1 documentation

def get_user_session(year, event, session_type):
    s = fastf1.get_session(year, event, session_type)
    s.load()
    return s

# Example usage: user_session = get_user_session(2023, 'Monza', 'Q')
# Replace the arguments below with user input as needed
user_session = get_user_session(2023, 'Monza', 'Q')

top5_drivers = user_session.laps.pick_quicklaps().sort_values('LapTime')['Driver'].unique()[:5]
laps = {driver: user_session.laps.pick_drivers([driver]).pick_fastest() for driver in top5_drivers}

core           INFO 	Loading data for Italian Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '1', '16', '63', '11', '23', '81', '44', '4', '14', '22', '40', '27', '77', '2', '24', '10', '31', '20', '18']


In [7]:
# 3. Data Engineering (Car data extraction + derived metrics)
def extract_features(lap):
    tel = lap.get_car_data().add_distance()
    tel['Time_s'] = (tel['Time'] - tel['Time'].iloc[0]).dt.total_seconds()
    tel['DeltaSpeed'] = tel['Speed'].diff()
    tel['DeltaTime'] = tel['Time_s'].diff()
    tel['Acceleration'] = tel['DeltaSpeed'] / tel['DeltaTime']
    tel['Throttle_pct'] = tel['Throttle']
    tel['Brake_pct'] = tel['Brake']
    tel['AggressionIndex'] = (tel['Throttle'].mean() / (tel['Brake'].mean() + 1e-6))
    # Add sector info
    tel['Sector'] = lap.loc['Sector1Time': 'Sector3Time'].index.get_indexer(tel.index, method='ffill')
    # Add derived summary stats
    features = {
        'Speed_mean': tel['Speed'].mean(),
        'Speed_std': tel['Speed'].std(),
        'Throttle_mean': tel['Throttle'].mean(),
        'Throttle_std': tel['Throttle'].std(),
        'Brake_mean': tel['Brake'].mean(),
        'Brake_std': tel['Brake'].std(),
        'Steer_mean': tel['Steer'].mean(),
        'Steer_std': tel['Steer'].std(),
        'Gear_mean': tel['nGear'].mean(),
        'RPM_mean': tel['RPM'].mean(),
        'DRS_pct': tel['DRS'].mean(),
        'Acceleration_mean': tel['Acceleration'].mean(),
        'AggressionIndex': tel['AggressionIndex'].mean()
    }
    return features, tel

driver_features = {}
driver_telemetry = {}
for driver in top5_drivers:
    features, tel = extract_features(laps[driver])
    driver_features[driver] = features
    driver_telemetry[driver] = tel

features_df = pd.DataFrame(driver_features).T

TypeError: '<' not supported between instances of 'int' and 'str'

In [None]:
# 4. Driving Style Clustering
# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(features_df)

# KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X)
features_df['Cluster'] = clusters

# Cluster labels (example: 0-late brakers, 1-smooth, 2-aggressive)
cluster_names = {0: 'Late Braker', 1: 'Smooth', 2: 'Aggressive'}
features_df['Style'] = features_df['Cluster'].map(cluster_names)

In [None]:
# 5. Lap Time Prediction Model
# Build dataframe for regression: lap time + features
lap_time_df = session.laps.pick_drivers(top5_drivers).copy()
lap_time_df = lap_time_df.assign(**features_df)
regress_features = ['Speed_mean','Throttle_mean','Brake_mean','Steer_mean','Gear_mean','RPM_mean','DRS_pct','Acceleration_mean','AggressionIndex']

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(features_df[regress_features], session.laps.pick_drivers(top5_drivers)['LapTime'].dt.total_seconds().values)
lap_time_pred = rf.predict(features_df[regress_features])
features_df['LapTime_pred'] = lap_time_pred

In [None]:
# 6. Visualization

# Subplot: Speed, Throttle, Brake by sector
fig, axs = plt.subplots(3, 1, figsize=(12, 10), sharex=True)
for driver in top5_drivers:
    tel = driver_telemetry[driver]
    axs[0].plot(tel['Distance'], tel['Speed'], label=driver)
    axs[1].plot(tel['Distance'], tel['Throttle'], label=driver)
    axs[2].plot(tel['Distance'], tel['Brake'], label=driver)
axs[0].set_ylabel('Speed [km/h]')
axs[1].set_ylabel('Throttle [%]')
axs[2].set_ylabel('Brake [%]')
axs[2].set_xlabel('Distance [m]')
axs[0].set_title('Speed Comparison')
axs[1].set_title('Throttle Comparison')
axs[2].set_title('Brake Comparison')
for ax in axs: ax.legend()
plt.tight_layout()
plt.show()

# Radar chart: Each driver's style summary
import matplotlib
categories = regress_features
N = len(categories)
angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
angles += angles[:1]
fig = plt.figure(figsize=(8, 8))
ax = plt.subplot(111, polar=True)
for driver in top5_drivers:
    values = features_df.loc[driver, regress_features].tolist()
    values += values[:1]
    ax.plot(angles, values, label=driver)
ax.set_thetagrids(np.degrees(angles), categories)
plt.title('Driver Style Radar Chart')
plt.legend(loc='upper right')
plt.show()

In [None]:
# 7. Reporting (Textual)
for driver in top5_drivers:
    print(f"Driver: {driver}")
    print(f"  Lap Time (actual/predicted): {laps[driver]['LapTime']} / {features_df.loc[driver, 'LapTime_pred']:.2f}s")
    print(f"  Driving Style Cluster: {features_df.loc[driver, 'Style']}")
    print("  Feature Summary:")
    print(features_df.loc[driver, regress_features])
    print("-" * 50)

In [None]:
# To dive deeper:
# - Sector gain/loss: Use session.laps.pick_driver(driver)['Sector1Time'], etc.
# - Link style to tire wear: requires tire data (session.laps['Compound'])
# - Compare car setup: see session.laps['Setup'] if available