In [22]:
# FASTF1 Setup & 2023 Data Export
import fastf1
from fastf1 import get_event_schedule, get_session
import pandas as pd
import os
import logging


In [23]:
# --- Logging Setup ---
logging.basicConfig(filename='fastf1_data_export.log', level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')


In [24]:
# --- FastF1 Cache ---
cache_dir = '../.fastf1_cache'
os.makedirs(cache_dir, exist_ok=True)
fastf1.Cache.enable_cache(cache_dir)


In [25]:
output_dir = '../data/processed'
os.makedirs(output_dir, exist_ok=True)

In [26]:

# === CONFIG ===
SEASON = 2023  # or 2024
combined_export_path = f"{output_dir}/all_races_combined_{SEASON}.csv"

schedule = get_event_schedule(SEASON, include_testing=False)
race_events = schedule[schedule['EventFormat'] == 'conventional']
compound_map = {
    'SOFT': 'Soft', 'MEDIUM': 'Medium', 'HARD': 'Hard',
    'INTERMEDIATE': 'Intermediate', 'WET': 'Wet'
}

all_races = []

In [27]:
for _, row in race_events.iterrows():
    round_num = row['RoundNumber']
    gp_name = row['EventName'].lower().replace(" ", "_")

    try:
        session = get_session(SEASON, round_num, 'R')
        session.load()
    except Exception as e:
        logging.error(f"[LOAD FAIL] {gp_name}: {e}")
        print(f"❌ Failed to load session for {gp_name} — {e}")
        continue

    try:
        laps = session.laps.reset_index(drop=True)
        selected_cols = [
            'Driver', 'Team', 'LapNumber', 'LapTime',
            'Sector1Time', 'Sector2Time', 'Sector3Time',
            'Compound', 'TyreLife', 'Stint',
            'PitInTime', 'PitOutTime', 'TrackStatus',
            'IsAccurate', 'Time'
        ]
        lap_data = laps[selected_cols].copy()

        # Time conversions
        for col in ['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time']:
            lap_data[f'{col}Seconds'] = lap_data[col].dt.total_seconds()
        lap_data['LapStartTime'] = lap_data['Time'].dt.total_seconds()

        # Weather merge
        weather = session.weather_data.rename(columns={'Time': 'WeatherTime'})
        weather['WeatherTime'] = weather['WeatherTime'].dt.total_seconds()
        lap_data = pd.merge_asof(
            lap_data.sort_values('LapStartTime'),
            weather.sort_values('WeatherTime'),
            left_on='LapStartTime', right_on='WeatherTime',
            direction='nearest'
        )

        # Rain flags
        lap_data['IsWetLap'] = lap_data['Rainfall'] > 0.1
        lap_data['IsDryLap'] = lap_data['Rainfall'] <= 0.1

        # --- 🏁 Circuit Metadata Injection ---
        try:
            circuit_info = session.get_circuit_info()  # ✅ Preferred for FastF1 3.5+
            lap_data['CircuitName'] = circuit_info.name
            lap_data['CircuitShort'] = circuit_info.location
            lap_data['CircuitCountry'] = circuit_info.country
            lap_data['TrackLengthKM'] = circuit_info.length / 1000 if circuit_info.length else None
            lap_data['AltitudeM'] = circuit_info.altitude
        except Exception:
            event = session.event
            lap_data['CircuitName'] = event.get('OfficialEventName', gp_name.title())
            lap_data['CircuitShort'] = event.get('Location', 'Unknown')
            lap_data['CircuitCountry'] = event.get('Country', 'Unknown')
            lap_data['TrackLengthKM'] = None
            lap_data['AltitudeM'] = None

        lap_data['CircuitType'] = lap_data['CircuitShort'].apply(lambda name: (
            "Street" if isinstance(name, str) and any(x in name.lower() for x in ['monaco', 'baku', 'miami', 'jeddah'])
            else "Hybrid" if isinstance(name, str) and 'marina' in name.lower()
            else "Permanent"
        ))

        # Safe types
        lap_data['TrackStatus'] = lap_data['TrackStatus'].astype(str)
        lap_data['IsAccurate'] = lap_data['IsAccurate'].astype(bool)
        lap_data['LapNumber'] = lap_data['LapNumber'].fillna(0).astype(int)
        lap_data['TyreLife'] = lap_data['TyreLife'].fillna(0).astype(int)
        lap_data['Stint'] = lap_data['Stint'].fillna(0).astype(int)

        lap_data.dropna(subset=[
            'LapTimeSeconds', 'Sector1TimeSeconds', 'Sector2TimeSeconds',
            'Sector3TimeSeconds', 'Compound'
        ], inplace=True)

        lap_data['Compound'] = lap_data['Compound'].str.upper().map(compound_map).fillna(lap_data['Compound'])
        lap_data['PitLap'] = lap_data.apply(
            lambda row: row['LapNumber'] if pd.notna(row['PitInTime']) else None, axis=1
        )
        lap_data['PitDuration'] = (lap_data['PitOutTime'] - lap_data['PitInTime']).dt.total_seconds()

        # Sector & stint features
        lap_data['Sector1Pct'] = lap_data['Sector1TimeSeconds'] / lap_data['LapTimeSeconds']
        lap_data['Sector2Pct'] = lap_data['Sector2TimeSeconds'] / lap_data['LapTimeSeconds']
        lap_data['Sector3Pct'] = lap_data['Sector3TimeSeconds'] / lap_data['LapTimeSeconds']
        lap_data['BestSector'] = lap_data[['Sector1TimeSeconds', 'Sector2TimeSeconds', 'Sector3TimeSeconds']].idxmin(axis=1)
        lap_data['BestSector'] = lap_data['BestSector'].str.extract(r'(\d)').astype(int)
        lap_data['IsValidLap'] = (lap_data['TrackStatus'] == 'Green') & lap_data['IsAccurate']

        lap_data['GrandPrix'] = gp_name
        lap_data['SeasonYear'] = SEASON

        if 'CarNumber' in laps.columns:             #car number 
            lap_data['CarNumber'] = laps['CarNumber']

        stint_summary = lap_data.groupby(['Driver', 'Stint']).agg(
            AvgLapTime=('LapTimeSeconds', 'mean'),
            StintLength=('LapNumber', 'count')
        ).reset_index()
        lap_data = lap_data.merge(stint_summary, on=['Driver', 'Stint'], how='left')

        stint_max_map = lap_data.groupby('Driver')['Stint'].max().to_dict()
        lap_data['StintType'] = lap_data.apply(
            lambda row: "Opening" if row['Stint'] == 1 else
                        "Closing" if row['Stint'] == stint_max_map.get(row['Driver'], 3) else "Mid", axis=1
        )

        fastest_per_driver = lap_data.groupby("Driver")["LapTimeSeconds"].min().to_dict()         # Delta to fastest(per lap)
        lap_data['DeltaToFastestLap'] = lap_data.apply(
            lambda row: row["LapTimeSeconds"] - fastest_per_driver.get(row["Driver"], row["LapTimeSeconds"]),
            axis=1
        )

        lap_data["IsSC"] = lap_data["TrackStatus"].str.contains("4").fillna(False)
        lap_data["IsVSC"] = lap_data["TrackStatus"].str.contains("8").fillna(False)
        lap_data["IsRedFlag"] = lap_data["TrackStatus"].str.contains("16").fillna(False)
        race_max_lap = lap_data["LapNumber"].max()
        lap_data["IsDNF"] = lap_data["LapNumber"] < (race_max_lap - 3)

        # Export
        race_csv = f"{output_dir}/race_summary_{SEASON}_{gp_name}.csv"
        lap_data.to_csv(race_csv, index=False)
        all_races.append(lap_data)
        logging.info(f"[EXPORT OK] {gp_name}")
        print(f"✅ Exported: {gp_name}")

    except Exception as e:
        logging.error(f"[PROCESS FAIL] {gp_name}: {e}")
        print(f"❌ Failed processing for {gp_name} — {e}")

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '55', '44', '18', '63', '77', '10', '23', '22', '2', '20', '21', '27', '24', '4', '31', '16', '81']
core           INFO 	Loading data for Saudi Arabian Grand Pri

✅ Exported: bahrain_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['11', '1', '14', '63', '44', '55', '16', '31', '10', '20', '22', '27', '24', '21', '81', '2', '4', '77', '23', '18']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: saudi_arabian_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '44', '14', '18', '11', '4', '27', '81', '24', '22', '77', '55', '10', '31', '21', '2', '20', '63', '23', '16']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: australian_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '63', '55', '44', '16', '10', '31', '20', '22', '18', '77', '23', '27', '24', '4', '21', '81', '2']
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: miami_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '14', '31', '44', '63', '16', '10', '55', '4', '81', '77', '21', '24', '23', '22', '11', '27', '2', '20', '18']
core           INFO 	Loading data for Spanish Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: monaco_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '44', '63', '11', '55', '18', '14', '31', '24', '10', '16', '22', '81', '21', '27', '23', '4', '20', '77', '2']
core           INFO 	Loading data for Canadian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: spanish_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '14', '44', '16', '55', '11', '23', '31', '18', '77', '81', '10', '4', '22', '27', '24', '20', '21', '63', '2']
core           INFO 	Loading data for British Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: canadian_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '81', '63', '11', '14', '23', '16', '55', '2', '77', '27', '18', '24', '22', '21', '10', '20', '31']
core           INFO 	Loading data for Hungarian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: british_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '11', '44', '81', '63', '16', '55', '14', '18', '23', '77', '3', '27', '22', '24', '20', '2', '31', '10']
core           INFO 	Loading data for Dutch Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: hungarian_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '14', '10', '11', '55', '44', '4', '23', '81', '31', '18', '27', '40', '77', '22', '20', '63', '24', '16', '2']
core           INFO 	Loading data for Italian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: dutch_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '44', '23', '4', '14', '77', '40', '81', '2', '24', '10', '18', '27', '20', '31', '22']
core           INFO 	Loading data for Singapore Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: italian_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '4', '44', '16', '1', '10', '81', '11', '40', '20', '23', '24', '27', '2', '14', '63', '77', '31', '22', '18']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: singapore_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '44', '55', '63', '14', '31', '10', '40', '22', '24', '27', '20', '23', '2', '18', '11', '77']
core           INFO 	Loading data for Mexico City Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: japanese_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '44', '16', '55', '4', '63', '3', '81', '23', '31', '10', '22', '27', '24', '77', '2', '18', '14', '20', '11']
core           INFO 	Loading data for Las Vegas Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: mexico_city_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '11', '31', '18', '55', '44', '63', '14', '81', '10', '23', '20', '3', '24', '2', '77', '22', '27', '4']
core           INFO 	Loading data for Abu Dhabi Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...


✅ Exported: las_vegas_grand_prix


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '63', '11', '4', '81', '14', '22', '44', '18', '3', '31', '10', '23', '27', '2', '24', '55', '77', '20']


✅ Exported: abu_dhabi_grand_prix


In [28]:
# Final combined export
if all_races:
    combined_df = pd.concat(all_races, ignore_index=True)
    combined_df.to_csv(combined_export_path, index=False)
    print(f"\n🎉 Combined season export → {combined_export_path}")
else:
    print("⚠️ No races were processed.")


🎉 Combined season export → ../data/processed/all_races_combined_2023.csv
