In [7]:
# 📒 02_build_features_fastf1.ipynb
# Enrich 2025 race data with qualifying times, weather, and optional telemetry features

import fastf1
import pandas as pd
from tqdm import tqdm
from fastf1.core import Laps
from datetime import datetime

# Enable FastF1 cache
fastf1.Cache.enable_cache("../cache")

# Load base dataset
df = pd.read_csv("../data/f1_features_2025_fastf1.csv")
df['date'] = pd.to_datetime(df['date'])

# Extract unique rounds to process
rounds = df['round'].unique()
season = 2025

# New feature containers
qualifying_times = []
weather_data = []
telemetry_data = []

for rnd in tqdm(rounds, desc="Processing Rounds"):
    try:
        session_q = fastf1.get_session(season, rnd, 'Q')
        session_q.load()

        # 1. Build full name lookup from session results
        full_name_lookup = session_q.results.set_index('Abbreviation')['FullName'].to_dict()

        # 2. Get best lap time per driver
        laps_q = session_q.laps.pick_quicklaps()
        best_laps = laps_q.groupby('Driver')['LapTime'].min().reset_index()
        print(f"🔎 Best laps (Driver column) for round {rnd}:")
        print(best_laps['Driver'].unique())

        best_laps['qualifying_time'] = best_laps['LapTime'].dt.total_seconds()

        # 3. Map abbreviation to full name
        best_laps['driver'] = best_laps['Driver'].map(full_name_lookup)
        best_laps = best_laps.dropna(subset=['driver'])  # safety check

        # 4. Finalize and store
        best_laps['round'] = rnd
        best_laps = best_laps[['driver', 'qualifying_time', 'round']]
        qualifying_times.append(best_laps)

        # 5. Weather data
        weather = session_q.weather_data.iloc[0]
        weather_data.append({
            'round': rnd,
            'air_temp': weather['AirTemp'],
            'track_temp': weather['TrackTemp'],
            'humidity': weather['Humidity']
        })

        # 6. Optional telemetry
        try:
            telemetry_avg = laps_q.groupby("Driver").agg({
                'SpeedST': 'mean',
                'Throttle': 'mean',
                'Brake': 'mean'
            }).reset_index()
            telemetry_avg['round'] = rnd
            telemetry_data.append(telemetry_avg)
        except KeyError:
            print(f"⚠️ Telemetry missing for round {rnd} — skipping telemetry columns.")

    except Exception as e:
        print(f"⚠️ Qualifying data missing for round {rnd}: {e}")
        continue

# Merge all data
if qualifying_times:
    df_q = pd.concat(qualifying_times, ignore_index=True)
    df_enriched = df.merge(df_q, on=['round', 'driver'], how='left')
else:
    print("❌ No qualifying data loaded. Exiting.")
    raise SystemExit

df_weather = pd.DataFrame(weather_data)
df_enriched = df_enriched.merge(df_weather, on='round', how='left')

if telemetry_data:
    df_tele = pd.concat(telemetry_data, ignore_index=True)
    df_enriched = df_enriched.merge(df_tele, left_on=['round', 'driver'], right_on=['round', 'Driver'], how='left')
    df_enriched.drop(['Driver'], axis=1, inplace=True)
else:
    print("⚠️ No telemetry data found — skipping telemetry merge.")

# Save final enriched dataset
df_enriched.to_csv("../data/f1_features_2025_enriched.csv", index=False)
print("✅ Enriched dataset saved as 'f1_features_2025_enriched.csv'")
df_enriched.head()


Processing Rounds:   0%|                                                                         | 0/7 [00:00<?, ?it/s]core           INFO 	Loading data for Australian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '1', '63', '22', '23', '16', '44', '10', '55', '6', '14', '18', '7', '5', '12', '27', '30', 

🔎 Best laps (Driver column) for round 1:
['ALB' 'ALO' 'ANT' 'BOR' 'DOO' 'GAS' 'HAD' 'HAM' 'HUL' 'LAW' 'LEC' 'NOR'
 'OCO' 'PIA' 'RUS' 'SAI' 'STR' 'TSU' 'VER']
⚠️ Telemetry missing for round 1 — skipping telemetry columns.


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '4', '1', '44', '16', '6', '12', '22', '23', '31', '27', '14', '18', '55', '10', '87', '7', '5', '30']
Processing Rounds:  29%|██████████████████▌                                              | 2/7 [00:02<00:06,  1.32s/it]core           INFO 	Loading data for Japanese Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timin

🔎 Best laps (Driver column) for round 2:
['ALB' 'ALO' 'ANT' 'BEA' 'BOR' 'DOO' 'GAS' 'HAD' 'HAM' 'HUL' 'LAW' 'LEC'
 'NOR' 'OCO' 'PIA' 'RUS' 'SAI' 'STR' 'TSU' 'VER']
⚠️ Telemetry missing for round 2 — skipping telemetry columns.


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '63', '12', '6', '44', '23', '87', '10', '55', '14', '30', '22', '27', '5', '31', '7', '18']
Processing Rounds:  43%|███████████████████████████▊                                     | 3/7 [00:03<00:05,  1.29s/it]core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing

🔎 Best laps (Driver column) for round 3:
['ALB' 'ALO' 'ANT' 'BEA' 'BOR' 'DOO' 'GAS' 'HAD' 'HAM' 'HUL' 'LAW' 'LEC'
 'NOR' 'OCO' 'PIA' 'RUS' 'SAI' 'STR' 'TSU' 'VER']
⚠️ Telemetry missing for round 3 — skipping telemetry columns.


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '16', '12', '10', '4', '1', '55', '44', '22', '7', '6', '14', '31', '23', '27', '30', '5', '18', '87']
Processing Rounds:  57%|█████████████████████████████████████▏                           | 4/7 [00:05<00:04,  1.37s/it]core           INFO 	Loading data for Saudi Arabian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing 

🔎 Best laps (Driver column) for round 4:
['ALB' 'ALO' 'ANT' 'BEA' 'BOR' 'DOO' 'GAS' 'HAD' 'HAM' 'HUL' 'LAW' 'LEC'
 'NOR' 'OCO' 'PIA' 'RUS' 'SAI' 'STR' 'TSU' 'VER']
⚠️ Telemetry missing for round 4 — skipping telemetry columns.


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '81', '63', '16', '12', '55', '44', '22', '10', '4', '23', '30', '14', '6', '87', '18', '7', '27', '31', '5']
Processing Rounds:  71%|██████████████████████████████████████████████▍                  | 5/7 [00:06<00:02,  1.36s/it]core           INFO 	Loading data for Miami Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing d

🔎 Best laps (Driver column) for round 5:
['ALB' 'ALO' 'ANT' 'BEA' 'BOR' 'DOO' 'GAS' 'HAD' 'HAM' 'HUL' 'LAW' 'LEC'
 'NOR' 'OCO' 'PIA' 'RUS' 'SAI' 'STR' 'TSU' 'VER']
⚠️ Telemetry missing for round 5 — skipping telemetry columns.


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '12', '81', '63', '55', '23', '16', '31', '22', '6', '44', '5', '7', '30', '27', '14', '10', '18', '87']
Processing Rounds:  86%|███████████████████████████████████████████████████████▋         | 6/7 [00:07<00:01,  1.32s/it]core           INFO 	Loading data for Emilia Romagna Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing

🔎 Best laps (Driver column) for round 6:
['ALB' 'ALO' 'ANT' 'BEA' 'BOR' 'DOO' 'GAS' 'HAD' 'HAM' 'HUL' 'LAW' 'LEC'
 'NOR' 'OCO' 'PIA' 'RUS' 'SAI' 'STR' 'TSU' 'VER']
⚠️ Telemetry missing for round 6 — skipping telemetry columns.


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '1', '63', '4', '14', '55', '23', '18', '6', '10', '16', '44', '12', '5', '43', '30', '27', '31', '87', '22']
Processing Rounds: 100%|█████████████████████████████████████████████████████████████████| 7/7 [00:09<00:00,  1.35s/it]

🔎 Best laps (Driver column) for round 7:
['ALB' 'ALO' 'ANT' 'BEA' 'BOR' 'COL' 'GAS' 'HAD' 'HAM' 'HUL' 'LAW' 'LEC'
 'NOR' 'OCO' 'PIA' 'RUS' 'SAI' 'STR' 'VER']
⚠️ Telemetry missing for round 7 — skipping telemetry columns.
⚠️ No telemetry data found — skipping telemetry merge.
✅ Enriched dataset saved as 'f1_features_2025_enriched.csv'





Unnamed: 0,date,season,round,circuit,driver,constructor,grid,position,qualifying_time,air_temp,track_temp,humidity
0,2025-03-16,2025,1,Australian Grand Prix,Lando Norris,McLaren,1.0,1.0,75.096,32.5,40.5,41.0
1,2025-03-16,2025,1,Australian Grand Prix,Max Verstappen,Red Bull Racing,3.0,2.0,75.481,32.5,40.5,41.0
2,2025-03-16,2025,1,Australian Grand Prix,George Russell,Mercedes,4.0,3.0,75.546,32.5,40.5,41.0
3,2025-03-16,2025,1,Australian Grand Prix,Andrea Kimi Antonelli,Mercedes,16.0,4.0,76.525,32.5,40.5,41.0
4,2025-03-16,2025,1,Australian Grand Prix,Alexander Albon,Williams,6.0,5.0,75.737,32.5,40.5,41.0
