In [1]:
!pip install fastf1 matplotlib seaborn pandas

Collecting fastf1
  Downloading fastf1-3.5.3-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-24.1.3-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.0-py3-none-any.whl.metadata (4.9 kB)
Downloading fastf1-3.5.3-py3-none-any.whl (151 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [2]:
import fastf1
import os
import pandas as pd
import numpy as np

# Setup
os.makedirs('data/cache', exist_ok=True)
fastf1.Cache.enable_cache('data/cache')

# Load session (Q or R)
session = fastf1.get_session(2023, 'Monza', 'Q')
session.load()

laps = session.laps
laps = laps[laps['LapTime'].notnull()]
laps = laps[laps['LapTime'] > pd.Timedelta(seconds=50)]
laps['LapTimeSeconds'] = laps['LapTime'].dt.total_seconds()

laps.reset_index(drop=True, inplace=True)


core           INFO 	Loading data for Italian Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Italian Grand Prix - Qualifying [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for sess

In [3]:
# Feature: Driver ID (categorical encoding)
laps['DriverID'] = pd.factorize(laps['Driver'])[0]

# Feature: Team ID
laps['TeamID'] = pd.factorize(laps['Team'])[0]

# Feature: Tire compound encoding
laps['TyreCode'] = pd.factorize(laps['Compound'])[0]

# Feature: Stint normalized
laps['StintNorm'] = (laps['Stint'] - laps['Stint'].min()) / (laps['Stint'].max() - laps['Stint'].min())

# Feature: Lap number trend
laps['LapTrend'] = laps['LapNumber'] / laps['LapNumber'].max()

# Feature: Seconds since session start
laps['TimeSeconds'] = laps['Time'].dt.total_seconds()

laps[['Driver', 'LapTimeSeconds', 'DriverID', 'TeamID', 'TyreCode', 'StintNorm', 'LapTrend', 'TimeSeconds']].head()


Unnamed: 0,Driver,LapTimeSeconds,DriverID,TeamID,TyreCode,StintNorm,LapTrend,TimeSeconds
0,SAI,82.321,0,0,0,0.0,0.086957,1314.042
1,SAI,124.404,0,0,0,0.0,0.130435,1438.446
2,SAI,142.338,0,0,0,0.166667,0.173913,1580.784
3,SAI,81.965,0,0,0,0.166667,0.217391,1662.749
4,SAI,119.905,0,0,0,0.166667,0.26087,1782.654


In [4]:
laps.to_csv('data/processed_lap_data.csv', index=False)


In [5]:
laps['PaceDeviation'] = laps.groupby('Driver')['LapTimeSeconds'].transform(lambda x: x - x.min())
session_best = laps['LapTimeSeconds'].min()
laps['DeltaToBest'] = laps['LapTimeSeconds'] - session_best
laps['RollingAvgDriver'] = laps.groupby('Driver')['LapTimeSeconds'].transform(lambda x: x.rolling(2, min_periods=1).mean())


In [8]:
laps.to_csv('data/processed_lap_data1.csv', index=False)