In [None]:
import logging
import fastf1
import pandas as pd

logging.getLogger('fastf1').setLevel(logging.CRITICAL)
fastf1.Cache.enable_cache('cache')

season = 2023
races_per_season = {2023: 22}

all_laps = []
all_weather = []
all_trackStatus = []
all_telemetry = []
all_messages = []

for round_num in range(1, races_per_season[season] + 1):
    print(f"Loading Season {season} Round {round_num}")
    session = fastf1.get_session(season, round_num, "R")
    session.load(laps=True, telemetry=True, weather=True, messages=True)

    laps = session.laps.copy()
    laps["Season"] = season
    laps["Round"] = round_num
    all_laps.append(laps)

    weather = session.weather_data.copy()
    weather["Season"] = season
    weather["Round"] = round_num
    all_weather.append(weather)

    trackStatus = session.track_status.copy()
    trackStatus["Season"] = season
    trackStatus["Round"] = round_num
    all_trackStatus.append(trackStatus)

    messages = session.race_control_messages.copy()
    messages["Season"] = season
    messages["Round"] = round_num
    all_messages.append(messages)

    for i, lap in laps.iterrows():
        try:
            lap_telem = lap.get_telemetry()
            lap_telem = lap_telem.set_index('SessionTime')
            lap_telem["Season"] = season
            lap_telem["Round"] = round_num
            lap_telem["Driver"] = lap["Driver"]
            lap_telem["LapNumber"] = lap["LapNumber"]
            lap_telem["LapTime"] = lap["LapTime"]
            lap_telem["TyreLife"] = lap["TyreLife"] if "TyreLife" in lap else None

            agg_dict = {
                'Season': 'first',
                'Round': 'first',
                'Driver': 'first',
                'LapNumber': 'first',
                'LapTime': 'first',
                'Distance': 'max',
                'RelativeDistance': 'max',
                'TyreLife': 'first',
                'Speed': 'mean',
                'Throttle': 'mean',
                'Brake': 'max',
            }

            lap_telem_resampled = lap_telem.resample('3s').agg(agg_dict)
            lap_telem_resampled = lap_telem_resampled.reset_index()
            all_telemetry.append(lap_telem_resampled)

        except Exception as e:
            print(f"Could not load telemetry for Lap {lap['LapNumber']} ({lap['Driver']}): {e}")

laps_all = pd.concat(all_laps, ignore_index=True)
laps_all = laps_all[['Season', 'Round', 'Driver', 'LapNumber', 'LapTime', 'TyreLife','Sector1Time','Sector2Time','Sector3Time']]

weather_all = pd.concat(all_weather, ignore_index=True)
trackStatus_all = pd.concat(all_trackStatus, ignore_index=True)
telemetry_all = pd.concat(all_telemetry, ignore_index=True)
messages_all = pd.concat(all_messages, ignore_index=True)

laps_all_sorted = laps_all.sort_values('LapNumber')

telemetry_merged = telemetry_all.merge(
    laps_all_sorted,
    on=['Season', 'Round', 'Driver', 'LapNumber'],
    how='left'
)

weather_all_sorted = weather_all.sort_values('SessionTime')
telemetry_merged_sorted = telemetry_merged.sort_values('SessionTime')
messages_all_sorted = messages_all.sort_values('Time')

telemetry_with_weather = pd.merge_asof(
    telemetry_merged_sorted,
    weather_all_sorted,
    on='SessionTime',
    direction='backward',
    by=['Season', 'Round']
)

telemetry_with_messages = pd.merge_asof(
    telemetry_with_weather,
    messages_all_sorted[['Time', 'Message', 'Type']],
    left_on='SessionTime',
    right_on='Time',
    direction='backward',
    by=['Season', 'Round']
)

trackStatus_all_sorted = trackStatus_all.sort_values('SessionTime')

full_data = pd.merge_asof(
    telemetry_with_messages,
    trackStatus_all_sorted,
    on='SessionTime',
    direction='backward',
    by=['Season', 'Round']
)

full_data.to_csv('f1_2023_race_data_with_messages_3s.csv', index=False)
print("âœ… Full merged dataset with race control messages saved to 'f1_2023_race_data_with_messages_3s.csv'")


Loading Season 2023 Round 1
Loading Season 2023 Round 2
Loading Season 2023 Round 3


In [None]:
import fastf1
import pandas as pd

fastf1.Cache.enable_cache('cache')

year = 2023
round_num = 1

print(f"Loading session: {year}, Round {round_num}, Race")
session = fastf1.get_session(year, round_num, 'R')
session.load(laps=True, telemetry=True, weather=True)
print("Session data loaded.")

laps = session.laps.copy()
weather = session.weather_data.copy()
track_status = session.track_status.copy()  # <-- NEW

# Normalize column names for track_status
track_status.columns = [col.strip() for col in track_status.columns]
if 'Status' in track_status.columns:
    track_status.rename(columns={'Status': 'TrackStatus'}, inplace=True)
elif 'TrackStatus' not in track_status.columns:
    # Fallback â€” create an empty TrackStatus column if missing
    track_status['TrackStatus'] = None

all_laps_data = pd.DataFrame()

for i, lap in laps.iterrows():
    try:
        lap_telem = lap.get_telemetry()
    except Exception as e:
        print(f"Warning: Could not get telemetry for lap {lap['LapNumber']} by {lap['Driver']}. Error: {e}")
        continue

    if not lap_telem.empty:
        # Merge weather
        if not weather.empty:
            lap_telem = pd.merge_asof(
                lap_telem.sort_values('SessionTime'),
                weather.sort_values('Time'),
                left_on='SessionTime',
                right_on='Time',
                direction='backward',
                suffixes=('_telem', '_weather')
            )

        # Merge track status
        if not track_status.empty:
            lap_telem = pd.merge_asof(
                lap_telem.sort_values('SessionTime'),
                track_status.sort_values('Time'),
                left_on='SessionTime',
                right_on='Time',
                direction='backward'
            )

        # Add lap info
        lap_telem['Driver'] = lap['Driver']
        lap_telem['LapNumber'] = lap['LapNumber']
        lap_telem['LapTime'] = lap['LapTime']
        lap_telem['Sector1Time'] = lap['Sector1Time']
        lap_telem['Sector2Time'] = lap['Sector2Time']
        lap_telem['Sector3Time'] = lap['Sector3Time']

        if 'Compound' in lap:
            lap_telem['TyreCompound'] = lap['Compound']
        if 'TyreLife' in lap:
            lap_telem['TyreLife'] = lap['TyreLife']

        lap_telem = lap_telem.set_index('SessionTime')

        # Create aggregation dictionary dynamically
        agg_dict = {
    'Driver': 'first',
    'LapNumber': 'first',
    'LapTime': 'first',
    'Sector1Time': 'first',
    'Sector2Time': 'first',
    'Sector3Time': 'first',
    'Distance': 'max',
    'RelativeDistance': 'max',
    'TyreLife': 'first',
    'Speed': 'mean',
    'Throttle': 'mean',
    'Brake': 'max',
    'AirTemp': 'mean',
    'Humidity': 'mean',
    'Pressure': 'mean',
    'TrackTemp': 'mean',
    'WindDirection': 'mean',
    'WindSpeed': 'mean'
}

if 'TrackStatus' in lap_telem.columns:
    agg_dict['TrackStatus'] = 'last'

lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict)

# Then separately compute rainfall min & max
rainfall_stats = lap_telem.resample('3S')['Rainfall'].agg(
    RainfallMin='min',
    RainfallMax='max'
)

# Combine them together
lap_telem_resampled = lap_telem_resampled.join(rainfall_stats)

lap_telem_resampled = lap_telem_resampled.dropna(how='all')

all_laps_data = pd.concat([all_laps_data, lap_telem_resampled])

# Reset and reorder columns
all_laps_data = all_laps_data.reset_index()
cols = all_laps_data.columns.tolist()
cols.insert(0, cols.pop(cols.index('Driver')))
all_laps_data = all_laps_data[cols]

all_laps_data.to_csv('all_drivers_lap_telem_with_trackstatus.csv', index=False)
print("âœ… Data saved to 'all_drivers_lap_telem_with_trackstatus.csv'.")


Loading session: 2023, Round 1, Race


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '55', '44', '18', '63', '77', '10', '23', '22', '2', '20', '21', '27', '24', '4', '31', '16', '81']


Session data loaded.


  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict).dropna(how='all')
  lap_telem_resampled = lap_

âœ… Data saved to 'all_drivers_lap_telem_with_trackstatus.csv'.


In [18]:
import fastf1
import pandas as pd

# Enable cache
fastf1.Cache.enable_cache('cache')

year = 2023
round_num = 1

print(f"Loading session: {year}, Round {round_num}, Race")
session = fastf1.get_session(year, round_num, 'R')
session.load(laps=True, telemetry=True, weather=True)
print("Session data loaded.")

laps = session.laps.copy()
weather = session.weather_data.copy()
track_status = session.track_status.copy()  # <-- NEW

# Normalize column names for track_status
track_status.columns = [col.strip() for col in track_status.columns]
if 'Status' in track_status.columns:
    track_status.rename(columns={'Status': 'TrackStatus'}, inplace=True)
elif 'TrackStatus' not in track_status.columns:
    # Fallback â€” create an empty TrackStatus column if missing
    track_status['TrackStatus'] = None

all_laps_data = pd.DataFrame()

for i, lap in laps.iterrows():
    try:
        lap_telem = lap.get_telemetry()
    except Exception as e:
        print(f"Warning: Could not get telemetry for lap {lap['LapNumber']} by {lap['Driver']}. Error: {e}")
        continue

    if not lap_telem.empty:
        # Merge weather
        if not weather.empty:
            lap_telem = pd.merge_asof(
                lap_telem.sort_values('SessionTime'),
                weather.sort_values('Time'),
                left_on='SessionTime',
                right_on='Time',
                direction='backward',
                suffixes=('_telem', '_weather')
            )

        # Merge track status
        if not track_status.empty:
            lap_telem = pd.merge_asof(
                lap_telem.sort_values('SessionTime'),
                track_status.sort_values('Time'),
                left_on='SessionTime',
                right_on='Time',
                direction='backward'
            )

        # Add lap info
        lap_telem['Driver'] = lap['Driver']
        lap_telem['LapNumber'] = lap['LapNumber']
        lap_telem['LapTime'] = lap['LapTime']
        lap_telem['Sector1Time'] = lap['Sector1Time']
        lap_telem['Sector2Time'] = lap['Sector2Time']
        lap_telem['Sector3Time'] = lap['Sector3Time']

        if 'Compound' in lap:
            lap_telem['TyreCompound'] = lap['Compound']
        if 'TyreLife' in lap:
            lap_telem['TyreLife'] = lap['TyreLife']

        lap_telem = lap_telem.set_index('SessionTime')

        # Base aggregation dictionary
        agg_dict = {
            'Driver': 'first',
            'LapNumber': 'first',
            'LapTime': 'first',
            'Sector1Time': 'first',
            'Sector2Time': 'first',
            'Sector3Time': 'first',
            'Distance': 'max',
            'RelativeDistance': 'max',
            'TyreLife': 'first',
            'Speed': 'mean',
            'Throttle': 'mean',
            'Brake': 'max',
            'AirTemp': 'mean',
            'Humidity': 'mean',
            'Pressure': 'mean',
            'TrackTemp': 'mean',
            'WindDirection': 'mean',
            'WindSpeed': 'mean'
        }

        # Only include TrackStatus if it exists
        if 'TrackStatus' in lap_telem.columns:
            agg_dict['TrackStatus'] = 'last'

        # Aggregate main telemetry
        lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict)

        # Compute Rainfall min and max separately
        if 'Rainfall' in lap_telem.columns:
            rainfall_stats = lap_telem.resample('3S')['Rainfall'].agg(
                RainfallMin='min',
                RainfallMax='max'
            )
            # Join back to main DataFrame
            lap_telem_resampled = lap_telem_resampled.join(rainfall_stats)

        lap_telem_resampled = lap_telem_resampled.dropna(how='all')

        all_laps_data = pd.concat([all_laps_data, lap_telem_resampled])

# Reset and reorder columns
all_laps_data = all_laps_data.reset_index()
cols = all_laps_data.columns.tolist()
cols.insert(0, cols.pop(cols.index('Driver')))
all_laps_data = all_laps_data[cols]

# Save final dataset
all_laps_data.to_csv('all_drivers_lap_telem_with_trackstatus.csv', index=False)
print("âœ… Data saved to 'all_drivers_lap_telem_with_trackstatus.csv'.")


Loading session: 2023, Round 1, Race


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '55', '44', '18', '63', '77', '10', '23', '22', '2', '20', '21', '27', '24', '4', '31', '16', '81']
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict

Session data loaded.


  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict)
  rainfall_stats = lap_telem.resample('3S')['Rainfall'].agg(
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict)
  rainfall_stats = lap_telem.resample('3S')['Rainfall'].agg(
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict)
  rainfall_stats = lap_telem.resample('3S')['Rainfall'].agg(
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict)
  rainfall_stats = lap_telem.resample('3S')['Rainfall'].agg(
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict)
  rainfall_stats = lap_telem.resample('3S')['Rainfall'].agg(
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict)
  rainfall_stats = lap_telem.resample('3S')['Rainfall'].agg(
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict)
  rainfall_stats = lap_telem.resample('3S')['Rainfall'].agg(
  lap_telem_resampled = lap_telem.resample('3S').agg(agg_dict)
  rainfall_stats = lap_telem.resample('3S')['Rainfall'].agg(
  lap_te

âœ… Data saved to 'all_drivers_lap_telem_with_trackstatus.csv'.
