In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import fastf1 as ff1

ff1.Cache.enable_cache('cache')
ff1.__version__

'3.0.4'

In [3]:
import warnings
warnings.filterwarnings('ignore')
# ignore info messages
import logging
logging.getLogger('fastf1').setLevel(logging.WARNING)

In [10]:
def extract_number(s):
    if isinstance(s, str) and s.endswith('L'):
        return int(s.split()[0])
    else:
        return 0


def load_api_data(session):
    """
    Loads the data from the API and returns it as a pandas DataFrame.
    """
    laps_data, stream_data = ff1.core.api.timing_data(session.api_path)
    laps_data.dropna(subset=['Time'], inplace=True)
    
    api_data = pd.merge_asof(laps_data.sort_values('Time'), stream_data.sort_values('Time'), on='Time', by='Driver')

    merged_laps = pd.merge(session.laps, api_data[['Driver', 'Time', 'NumberOfLaps', 'NumberOfPitStops', 'GapToLeader', 'IntervalToPositionAhead']], left_on=['LapNumber', 'DriverNumber'], right_on=['NumberOfLaps', 'Driver'])

    merged_laps['LapsToLeader'] = merged_laps['GapToLeader'].map(extract_number)

    merged_laps.replace(regex=r'^LAP', value=0, inplace=True)
    merged_laps.replace(regex=r'^\+', value='', inplace=True)
    merged_laps.replace(regex=r'^(\d+)\sL$', value=np.nan, inplace=True)

    merged_laps.rename(columns={'Time_x': 'Time', 'Driver_x':'Driver'}, inplace=True)
    merged_laps = merged_laps.astype({'GapToLeader': 'float64', 'IntervalToPositionAhead': 'float64'})

    return merged_laps[['Time', 'Driver', 'DriverNumber', 'LapTime', 'LapNumber', 'PitOutTime',
       'PitInTime','Compound', 'TyreLife', 'Stint', 'LapStartTime', 'Team',
        'TrackStatus', 'IsAccurate', 'LapStartDate', 'NumberOfPitStops', 'Position', 'GapToLeader',
       'IntervalToPositionAhead', 'LapsToLeader']]


In [5]:
def get_empty_dataframe():
    return pd.DataFrame(
            columns=[
                'LapStartTime',
                'LapNumber',
                'LapTime',
                'DriverNumber',
                'Team',
                'Compound',
                'TyreLife',
                'TrackStatus',
                'Stint',
                'DistanceToDriverAhead',
                'DriverAhead',
                'PitStatus',
                'IsAccurate',
                'NumberOfPitStops',
                'Position',
                'GapToLeader',
                'IntervalToPositionAhead',
                'LapsToLeader',
                ])

In [6]:
# Exception that is raised when there is no telemetry data for a given lap.
class NoTelemetryException(Exception):
    pass

# Find the telemetry data (Driver Ahead, Distance to Driver Ahead) for the start of a lap and merge it with the lap data.


def get_telemetry_at_start_of_lap(lap, telemetry):
    # Find the telemetry data for the start of the lap by creating a 1 second window around the lap start time.
    mask = (telemetry['Date'] > lap['LapStartDate']) & (
        telemetry['Date'] <= lap['LapStartDate'] + pd.Timedelta(seconds=1))
    rows = telemetry.loc[mask]
    # If there is no telemetry data for the lap, raise an exception.
    if rows.empty:
        raise NoTelemetryException("No telemetry data found for lap " + str(
            lap['LapNumber']) + " of " + str(lap['Driver']) + " at " + str(lap['LapStartDate']))
    # There can be multiple telemetry samples in the 1 second window, so take the first one.
    row = rows.iloc[0]
    # Get the telemetry data we are interested in.
    telemetryInfo = row[['DriverAhead', 'DistanceToDriverAhead']]
    lapInfo = lap[['LapStartTime', 'LapNumber', 'LapTime',
                   'DriverNumber', 'Team', 'Compound', 
                   'TyreLife', 'Stint', 'TrackStatus',
                   'IsAccurate', 'NumberOfPitStops', 'Position',
                   'GapToLeader', 'IntervalToPositionAhead', 'LapsToLeader',]]  # Get the lap data we are interested in.
    # Convert the pit out time to seconds.
    lap['PitOutTime'] = lap['PitOutTime'].total_seconds() if lap['PitOutTime'] is not None else 0
    # Convert the pit in time to seconds.
    lap['PitInTime'] = lap['PitInTime'].total_seconds() if lap['PitInTime'] is not None else 0
    # Get the pit status.
    lapInfo['PitStatus'] = 'OutLap' if lap['PitOutTime'] > 0 else 'InLap' if lap['PitInTime'] > 0 else 'NoPit'
    telemetryInfo.rename("Telemetry", inplace=True)
    lapInfo.rename("Lap", inplace=True)
    # Merge the telemetry and lap data.
    merge = pd.concat([telemetryInfo, lapInfo])
    return merge


In [7]:
# Exception that is raised when there is no data for a given lap.
class NoLapException(Exception):
    pass

# Find all the laps for a given driver and merge the telemetry data for the start of each lap.
def get_laps_of_driver(driver, laps):
    driver_laps = laps.loc[laps['DriverNumber'] == driver]
    if len(driver_laps['DriverNumber']) == 0:
        raise NoLapException("No laps for driver " + driver)
    driver_laps_telemetry = driver_laps.get_car_data()
    if len(driver_laps_telemetry) == 0:
        raise NoLapException("No telemetry for driver " + driver)
    try:
        driver_laps_telemetry = driver_laps_telemetry.add_driver_ahead()
        transformed_laps = []
        for index, row in driver_laps.iterrows():
            try:
                transformed_laps.append(get_telemetry_at_start_of_lap(row, driver_laps_telemetry))
            except NoTelemetryException as e:
                print(e)
    except ValueError as e:
        print("error : ", e)
        print(driver_laps_telemetry)
        return get_empty_dataframe()
    return pd.DataFrame(transformed_laps)

# Add weather data that would have been available at the start of each lap.
def add_weather_to_laps(laps, weather):
    if laps.empty:
        raise NoLapException("Laps dataframe is empty")
    # Effectuer une jointure basée sur une plage de temps
    lapsWithWeather = pd.merge_asof(laps.sort_values('LapStartTime'), 
                                    weather.sort_values('Time'), 
                                    left_on='LapStartTime', 
                                    right_on='Time', 
                                    by=None, 
                                    direction='backward')

    return lapsWithWeather

In [8]:
import os

# Get the data for all the races within a given year and save it to csv files.
def get_season_data(year, save_all_races=True):
    schedule = ff1.get_event_schedule(year, include_testing=False)
    
    path = 'data/' + str(year)
    # Create a directory for the year if it doesn't exist
    if not os.path.exists(path):
        os.makedirs(path)

    df_season = get_empty_dataframe()
    for index, event in schedule.iterrows():
        if year == 2018 and event['RoundNumber'] < 3: # The 2 first races of 2018 do not have telemetry data
            continue
        print("Processing race round - ", event['RoundNumber'])
        race = event.get_race()
        race.load()
        df_event = get_empty_dataframe()
        api_laps = load_api_data(race)
        

        for driver in race.drivers:
            print("     Processing driver - ", driver)
            try:
                df_event = pd.concat([df_event, get_laps_of_driver(driver, api_laps)])
            except NoLapException as e:
                print(e)
    
        df_event['Track'] = event['Location']
        df_event['TotalLaps'] = api_laps['LapNumber'].max()
        df_event['Year'] = year
        try:
            df_event = add_weather_to_laps(df_event, race.weather_data)
            df_event = df_event.drop(columns=['Time'])
            # Convert the laptime column to total seconds if it's not a NAN value
            df_event['LapTime'] = df_event['LapTime'].apply(lambda x: x.total_seconds() if not pd.isna(x) else x)
            df_event['LapStartTime'] = df_event['LapStartTime'].apply(lambda x: x.total_seconds() if not pd.isna(x) else x)
        except NoLapException as e:
            print(e)
        if save_all_races:
            # Save it to a csv file
            df_event.to_csv(path + '/' + event.EventName.replace(' ', '_').lower() + '.csv', index=False)
        df_season = pd.concat([df_season, df_event], axis=0)
    # Save the data for the whole season
    df_season.to_csv(path + '/season.csv', index=False)
    return df_season

In [11]:
for year in range(2023, 2024):
    get_season_data(year)

Processing race round -  1
     Processing driver -  1
     Processing driver -  11
     Processing driver -  14
     Processing driver -  55
     Processing driver -  44
     Processing driver -  18
     Processing driver -  63
     Processing driver -  77
     Processing driver -  10
     Processing driver -  23
     Processing driver -  22
     Processing driver -  2
     Processing driver -  20
     Processing driver -  21
     Processing driver -  27
     Processing driver -  24
No telemetry data found for lap 25.0 of ZHO at 2023-03-05 15:44:15.728000
     Processing driver -  4
     Processing driver -  31
     Processing driver -  16
     Processing driver -  81
Processing race round -  2




     Processing driver -  11
     Processing driver -  1
     Processing driver -  14
     Processing driver -  63
     Processing driver -  44
     Processing driver -  55
     Processing driver -  16
     Processing driver -  31
     Processing driver -  10
     Processing driver -  20
     Processing driver -  22
     Processing driver -  27
     Processing driver -  24
     Processing driver -  21
     Processing driver -  81
     Processing driver -  2
     Processing driver -  4
     Processing driver -  77
     Processing driver -  23
     Processing driver -  18
Processing race round -  3




     Processing driver -  1
     Processing driver -  44
No telemetry data found for lap 57.0 of HAM at 2023-04-02 07:00:01.392000
     Processing driver -  14
     Processing driver -  18
     Processing driver -  11
     Processing driver -  4
     Processing driver -  27
     Processing driver -  81
     Processing driver -  24
     Processing driver -  22
     Processing driver -  77
     Processing driver -  55
     Processing driver -  10
     Processing driver -  31
     Processing driver -  21
     Processing driver -  2
No telemetry data found for lap 8.0 of SAR at 2023-04-02 05:15:33.477000
     Processing driver -  20
     Processing driver -  63
     Processing driver -  23
     Processing driver -  16
No laps for driver 16
Processing race round -  4




     Processing driver -  11
     Processing driver -  1
     Processing driver -  16
     Processing driver -  14
     Processing driver -  55
     Processing driver -  44
     Processing driver -  18
     Processing driver -  63
     Processing driver -  4
     Processing driver -  22
     Processing driver -  81
     Processing driver -  23
     Processing driver -  20
     Processing driver -  10
     Processing driver -  31
     Processing driver -  2
No telemetry data found for lap 39.0 of SAR at 2023-04-30 12:15:14.676000
     Processing driver -  27
     Processing driver -  77
     Processing driver -  24
     Processing driver -  21
Processing race round -  5
     Processing driver -  1
     Processing driver -  11
     Processing driver -  14
     Processing driver -  63
     Processing driver -  55
     Processing driver -  44
     Processing driver -  16
     Processing driver -  10
     Processing driver -  31
     Processing driver -  20
No telemetry data found for lap 1



     Processing driver -  1
     Processing driver -  14
     Processing driver -  44
     Processing driver -  16
     Processing driver -  55
     Processing driver -  11
     Processing driver -  23
     Processing driver -  31
     Processing driver -  18
     Processing driver -  77
     Processing driver -  81
     Processing driver -  10
     Processing driver -  4
     Processing driver -  22
     Processing driver -  27
     Processing driver -  24
     Processing driver -  20
     Processing driver -  21
     Processing driver -  63
     Processing driver -  2




Processing race round -  9




DataNotLoadedError: The data you are trying to access has not been loaded yet. See `Session.load`

'3.0.4'