In [2]:
# we import.
# we enable caching.
# we load seasaon and races.
# we create two empty lists for all laps and for the messages for trakc status.


import fastf1
import pandas as pd





seasons = [2022, 2023]
races_per_season = {2022: 21, 2023: 21}

all_laps = []
all_messages = []


In [None]:
import fastf1
import pandas as pd

fastf1.Cache.enable_cache('cache')

seasons = [2023]
races_per_season = {2023: 22}

all_laps = []
all_messages = []
all_telemetry = []

for year in seasons:
    for round_num in range(1, races_per_season[year] + 1):
        try:
            session = fastf1.get_session(year, round_num, 'R')
            session.load(laps=True, telemetry=True, messages=True, weather=True)
        except Exception as e:
            print(f"Skipped {year} Round {round_num}: {e}")
            continue

        laps = session.laps.copy()
        messages = session.race_control_messages.copy()
        weather = session.weather_data.copy()

        # Merge weather into laps
        if not laps.empty and not weather.empty:
            laps = pd.merge_asof(
                laps.sort_values('LapStartTime'),
                weather.sort_values('Time'),
                left_on='LapStartTime',
                right_on='Time',
                direction='backward'
            )

        # Add year and round info
        laps['Year'] = year
        laps['Round'] = round_num
        messages['Year'] = year
        messages['Round'] = round_num

        # -----------------------------
        # Resample telemetry every 20 seconds
        # -----------------------------
        for idx, lap in session.laps.iterlaps():
            try:
                lap_telem = lap.get_telemetry()
                if lap_telem.empty:
                    continue

                # Create a lap-relative time column in seconds
                lap_telem['LapTimeSec'] = lap_telem['Time'] - lap['LapStartTime']
                lap_telem.set_index('LapTimeSec', inplace=True)

                # Resample in 20-second windows
                lap_telem_resampled = lap_telem.resample('20S').agg({
                    'Speed': 'mean',
                    'Throttle': 'mean',
                    'Brake': 'max',
                    'nGear': 'max',
                    'Distance': 'max',
                    'RelativeDistance': 'max'
                })

                # Add identifying info
                lap_telem_resampled['LapNumber'] = lap.LapNumber
                lap_telem_resampled['Driver'] = lap['Driver']
                lap_telem_resampled['Year'] = year
                lap_telem_resampled['Round'] = round_num

                all_telemetry.append(lap_telem_resampled)

            except Exception as e:
                print(f"Telemetry failed for lap {idx}: {e}")

        all_laps.append(laps)
        all_messages.append(messages)

# Combine all data
all_laps_df = pd.concat(all_laps, ignore_index=True)
all_messages_df = pd.concat(all_messages, ignore_index=True)
all_telemetry_df = pd.concat(all_telemetry, ignore_index=False)  # Keep index as time

# Save to CSV
all_laps_df.to_csv('all_races_laps.csv', index=False)
all_messages_df.to_csv('race_control_messages.csv', index=False)
all_telemetry_df.to_csv('telemetry_20s.csv')

print(f"Saved all laps ({len(all_laps_df)})")
print(f"Saved all messages ({len(all_messages_df)})")
print(f"Saved all telemetry segments ({len(all_telemetry_df)})")


In [3]:
# drop unnecesary columns 
cols_to_drop = [ 'SpeedFL', 
                'IsPersonalBest','PitOutTime', 'PitInTime']
all_laps_df.drop(columns=cols_to_drop, inplace=True, errors='ignore')
all_laps_df['AvgSectorSpeed'] = all_laps_df[['SpeedI1', 'SpeedI2', 'SpeedST']].mean(axis=1)
# Save to CSV
all_laps_df.to_csv('all_races_laps.csv', index=False)
all_messages_df.to_csv('race_control_messages.csv', index=False)
print(all_laps_df.columns)
print(f"Saved all laps to 'all_races_laps.csv' ({len(all_laps_df)} rows).")
print(f"Saved all race control messages to 'race_control_messages.csv' ({len(all_messages_df)} rows).")


NameError: name 'all_laps_df' is not defined

In [45]:
import fastf1
import pandas as pd

fastf1.Cache.enable_cache('cache')

year = 2023
round_num = 1

# Load session data
session = fastf1.get_session(year, round_num, 'R')
session.load(laps=True, telemetry=True)

# Get all laps, weather, and race control messages data
laps = session.laps.copy()  # All laps
weather = session.weather_data.copy()  # Weather data
messages = session.race_control_messages.copy()  # Race control messages

# DataFrame to store all telemetry data
all_laps_data = pd.DataFrame()

# --- Loop through all laps ---
for i, lap in laps.iterrows():
    # Get telemetry for each lap
    lap_telem = lap.get_telemetry()
    lap_telem['SessionTime'] = session.session_start_time + lap_telem['SessionTime']

    # --- Merge telemetry with weather data ---
    if not weather.empty:
        lap_telem = pd.merge_asof(
            lap_telem.sort_values('SessionTime'),
            weather.sort_values('Time'),
            left_on='SessionTime',
            right_on='Time',
            direction='backward'
        )

    # --- Merge race control messages ---
    if not messages.empty:
        # Ensure both 'SessionTime' and 'Time' are datetime types
        lap_telem['SessionTime'] = pd.to_datetime(lap_telem['SessionTime'])
        messages['Time'] = pd.to_datetime(messages['Time'])

        lap_telem = pd.merge_asof(
            lap_telem.sort_values('SessionTime'),
            messages.sort_values('Time'),
            left_on='SessionTime',
            right_on='Time',
            direction='backward'
        )

    # --- Add LapNumber to telemetry ---
    lap_telem['LapNumber'] = lap['LapNumber']  # Add the LapNumber to telemetry data

    # --- Merge lap-level data ---
    lap_data = laps[['LapNumber', 'Driver', 'LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time']].copy()

    # Sort lap data by 'LapNumber' before merging
    lap_data_sorted = lap_data.sort_values('LapNumber')

    # Merge lap-level data (like LapNumber, Driver, etc.) into the telemetry-weather merged data
    lap_telem = pd.merge_asof(
        lap_telem.sort_values('SessionTime'),
        lap_data_sorted,  # Now lap_data is a sorted DataFrame
        left_on='LapNumber',
        right_on='LapNumber',
        direction='backward'
    )

    # --- Ensure correct sorting of data ---
    lap_telem = lap_telem.sort_values(by=['LapNumber', 'SessionTime'])  # Sort first by LapNumber, then SessionTime

    # --- Drop unnecessary columns like Time_x and Time_y ---
    lap_telem.drop(columns=['Time_x', 'Time_y'], errors='ignore', inplace=True)

    # --- Set SessionTime as the index ---
    lap_telem = lap_telem.set_index('SessionTime')

    # --- Resample in 5-second windows ---
    lap_telem_resampled = lap_telem.resample('5S').agg({
        'Driver': 'first',  # Ensure 'Driver' info is correctly preserved
        'DriverAhead': 'first',  # Add DriverAhead to the resampled data
        'Speed': 'mean',
        'Throttle': 'mean',
        'Brake': 'max',
        'Distance': 'max',
        'RelativeDistance': 'max',  # Keep RelativeDistance for tracking relative position
        'LapNumber': 'first',  # Retain LapNumber
        'LapTime': 'first',
        'Sector1Time': 'first',
        'Sector2Time': 'first',
        'Sector3Time': 'first',
        
        # Weather data aggregation
        'AirTemp': 'mean',
        'Humidity': 'mean',
        'Pressure': 'mean',
        'Rainfall': 'mean',
        'TrackTemp': 'mean',
        'WindDirection': 'mean',
        'WindSpeed': 'mean',
        
        # Race control messages
        'Message': 'first',  # Keep the first message in the 5-second window (or adjust based on your needs)
    })

    # Append the resampled data for each lap to the final DataFrame
    all_laps_data = pd.concat([all_laps_data, lap_telem_resampled])

# Save all the merged and resampled telemetry data for all laps
all_laps_data.to_csv('all_drivers_lap_telem_resampled_with_weather_and_messages.csv', index=True)

# Output the resampled data for all drivers
print(all_laps_data)


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '55', '44', '18', '63', '77', '10', '23', '22', '2', '20', '21', '27', '24', '4', '31', '16', '81']


TypeError: dtype timedelta64[ns] cannot be converted to datetime64[ns]