In [1]:
import fastf1 as ff1
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
from race import *
from qualifying import *
from constants import *
from practice import *
from race_sim import *
from scipy import stats
from scipy.optimize import curve_fit

# Enable the cache by providing the name of the cache folder
ff1.Cache.enable_cache('cache')

Get the race data for each driver

We want to get each driver's average stint and laptime standard deviation for each tyre and in general

In [108]:
driver_stats_df = pd.DataFrame(
    columns=['Driver', 
             'SoftTimeAll', 'SoftLapsAll', 'SoftSdAll', 'SoftStintsAll', 'SoftAvgTimeAll', 
             'SoftTimeRep', 'SoftLapsRep', 'SoftSdRep', 'SoftStintsRep', 'SoftAvgTimeRep',
             'MediumTimeAll', 'MediumLapsAll', 'MediumSdAll', 'MediumStintsAll', 'MediumAvgTimeAll', 
             'MediumTimeRep', 'MediumLapsRep', 'MediumSdRep', 'MediumStintsRep', 'MediumAvgTimeRep', 
             'HardTimeAll', 'HardLapsAll', 'HardSdAll', 'HardStintsAll', 'HardAvgTimeAll', 
             'HardTimeRep', 'HardLapsRep', 'HardSdRep', 'HardStintsRep', 'HardAvgTimeRep',
             'TotalTimeAll', 'TotalLapsAll', 'TotalSdAll', 'TotalStintsAll', 'TotalAvgTimeAll',
            'TotalTimeRep', 'TotalLapsRep', 'TotalSdRep', 'TotalStintsRep', 'TotalAvgTimeRep'])
driver_stats_df.Driver = ['LEC', 'SAI', 'HAM', 'RUS', 'MAG', 'BOT', 'OCO', 'TSU', 'ALO', 'ZHO', 'MSC', 'STR', 'ALB', 'RIC', 'NOR', 'LAT', 'HUL', 'PER', 'VER', 'GAS', 'VET', 'DEV']
driver_stats_df = driver_stats_df.fillna(0)
driver_stats_df = driver_stats_df.astype('object')

# driver_stats_df['SoftSdAll'] = [[]] * 20
# driver_stats_df['SoftSdAll'] = driver_stats_df['SoftSdAll'].astype('object')
# driver_stats_df['SoftSdRep'] = [[]] * 20
# driver_stats_df['MediumSdAll'] = [[]] * 20
# driver_stats_df['MediumSdRep'] = [[]] * 20
# driver_stats_df['HardSdAll'] = [[]] * 20
# driver_stats_df['HardSdRep'] = [[]] * 20
# driver_stats_df['TotalSdAll'] = [[]] * 20
# driver_stats_df['TotalSdRep'] = [[]] * 20


# driver_stats_df

In [109]:
# local variables for calculating the standard deviation of lap times for each tyres
# soft_sd_all_dict = {}
# soft_sd_rep_dict = {}
# medium_sd_all_dict = {}
# medium_sd_rep_dict = {}
# hard_sd_all_dict = {}
# hard_sd_rep_dict = {}
# total_sd_all_dict = {}
# total_sd_rep_dict = {}



for grand_prix_number in range(1, 2):
    gp = ff1.get_session(2022, grand_prix_number, 'R')
    gp.load()
    laps = gp.laps
    convert_laptime_to_seconds(laps)
    
    for driver in get_all_driver_names(laps):
        # We will divide the laps into two (overlapping) groups, one containing all laps recorded and another with representative laps only.
        
        # All laps
        all_laps = laps.pick_driver(driver)
        
        # Calculate the stint counts
        stint_tuples_all = get_stint_lengths(all_laps.Compound.tolist())
        for stint in stint_tuples_all:
            if stint[0] not in DRY_TYRES:
                continue
            
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, stint[0].title() + 'StintsAll'] += 1
        
        
        for compound in all_laps.Compound.unique():
            if compound not in DRY_TYRES:
                continue
        
            # Calculate the total lap time and laps done for each compound
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'TimeAll'] += all_laps.groupby('Compound').sum().loc[compound, 'LapTime']
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'LapsAll'] += all_laps.groupby('Compound').size()[compound]
                
            # Calculate the standard deviation for each tyre compound
            laptimes = all_laps[all_laps['Compound'] == compound].LapTime.dropna().tolist()
            
            driver_index_list = driver_stats_df.index[driver_stats_df['Driver'] == driver].tolist()
            if len(driver_index_list) > 0:
                driver_index = driver_stats_df.index[driver_stats_df['Driver'] == driver].tolist()[0]
            
            if driver_stats_df.at[driver_index, compound.title() + 'SdAll'] == 0:
                driver_stats_df.at[driver_index, compound.title() + 'SdAll'] = laptimes
            else:
                driver_stats_df.at[driver_index, compound.title() + 'SdAll'] += laptimes
            
            
#             print( driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'SdAll'])
#             gg  = driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'SdAll'].tolist() + (laptimes)
#             print(gg)
            
        
        # Representative laps
        rep_laps = laps.pick_driver(driver).pick_accurate().pick_wo_box().pick_track_status('1')
        
        # Calculate the stint counts
        stint_tuples_rep = get_stint_lengths(rep_laps.Compound.tolist())
        for stint in stint_tuples_rep:
            if stint[0] not in DRY_TYRES:
                continue
                
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, stint[0].title() + 'StintsRep'] += 1
        
        
        for compound in rep_laps.Compound.unique():
            if compound not in DRY_TYRES:
                continue
            
            # Calculate the total lap time and laps done for each compound
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'TimeRep'] += rep_laps.groupby('Compound').sum().loc[compound, 'LapTime']
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'LapsRep'] += rep_laps.groupby('Compound').size()[compound]
        
            # Calculate the standard deviation for each tyre compound
            laptimes = rep_laps[rep_laps['Compound'] == compound].LapTime.dropna().tolist()
            
            driver_index_list = driver_stats_df.index[driver_stats_df['Driver'] == driver].tolist()
            if len(driver_index_list) > 0:
                driver_index = driver_stats_df.index[driver_stats_df['Driver'] == driver].tolist()[0]
            
            if driver_stats_df.at[driver_index, compound.title() + 'SdRep'] == 0:
                driver_stats_df.at[driver_index, compound.title() + 'SdRep'] = laptimes
            else:
                driver_stats_df.at[driver_index, compound.title() + 'SdRep'] += laptimes
        
# Calculate the average time for each compound
for compound in DRY_TYRES:
    for type in {'All', 'Rep'}:
        time = compound.title() + 'Time' + type
        laps = compound.title() + 'Laps' + type
        avg = compound.title() + 'AvgTime' + type
        
        try:
            driver_stats_df[avg] = driver_stats_df[time] / driver_stats_df[laps]
        except:
            driver_stats_df[avg] = np.inf

# Calculate the total time statistics
for type in {'All', 'Rep'}:
    time = 'Total' + 'Time' + type
    laps = 'Total' + 'Laps' + type
    avg = 'Total' + 'AvgTime' + type
        
    for compound in DRY_TYRES:
        driver_stats_df[time] += driver_stats_df[compound.title() + 'Time' + type]
        driver_stats_df[laps] += driver_stats_df[compound.title() + 'Laps' + type]
        
        
        try:
            driver_stats_df[avg] = driver_stats_df[time] / driver_stats_df[laps]
        except:
            driver_stats_df[avg] = np.inf
            

core           INFO 	Loading data for Bahrain Grand Prix - Race [v2.2.9]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '44', '63', '20', '77', '31', '22', '14', '24', '47', '18', '23', '3', '4', '6', '27', '11', '1', '10']


In [110]:
# driver_stats_df['TotalSdAll'] = driver_stats_df.loc[:, ['SoftSdAll', 'MediumSdAll', 'HardSdAll']].apply(lambda x: )

# list(driver_stats_df['SoftSdAll']) + list(driver_stats_df['MediumSdAll']) + list(driver_stats_df['HardSdAll'])


for driver in get_all_driver_names(driver_stats_df):
    for compound in DRY_TYRES:
        for type in {'All', 'Rep'}:
            laptime_list = driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'Sd' + type].tolist()
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'Sd' + type] = np.array(laptime_list).std()


In [120]:
# driver_stats_df[['Driver', 'TotalAvgTimeAll']].sort_values(by=['TotalAvgTimeAll'])

%store driver_stats_df

Stored 'driver_stats_df' (DataFrame)


In [111]:
driver_stats_df

Unnamed: 0,Driver,SoftTimeAll,SoftLapsAll,SoftSdAll,SoftStintsAll,SoftAvgTimeAll,SoftTimeRep,SoftLapsRep,SoftSdRep,SoftStintsRep,...,TotalTimeAll,TotalLapsAll,TotalSdAll,TotalStintsAll,TotalAvgTimeAll,TotalTimeRep,TotalLapsRep,TotalSdRep,TotalStintsRep,TotalAvgTimeRep
0,LEC,3938.309,42,10.880754,2,inf,3231.843,33,1.301201,2,...,5439.304,57,0,0,inf,4491.362,46,0,0,inf
1,SAI,4668.802,46,12.925293,2,inf,3440.487,35,1.268301,2,...,5758.176,57,0,0,inf,4313.061,44,0,0,inf
2,HAM,2463.791,24,15.027259,2,inf,1475.801,15,1.682563,2,...,5761.704,57,0,0,inf,4226.759,43,0,0,inf
3,RUS,2596.43,27,11.812134,2,inf,1778.501,18,1.606227,2,...,5610.801,57,0,0,inf,4340.046,44,0,0,inf
4,MAG,4515.444,45,9.297635,2,inf,3579.003,36,1.394147,2,...,5766.005,57,0,0,inf,4459.548,45,0,0,inf
5,BOT,2620.038,26,12.596933,2,inf,1796.167,18,1.891295,2,...,5760.508,57,0,0,inf,4473.594,45,0,0,inf
6,OCO,2839.734,28,10.886247,2,inf,1793.598,18,1.53538,2,...,5766.837,57,0,0,inf,4269.994,43,0,0,inf
7,TSU,4345.732,43,10.583644,2,inf,3078.674,31,1.391501,2,...,5765.842,57,0,0,inf,4273.99,43,0,0,inf
8,ALO,2635.136,26,10.639374,2,inf,1591.643,16,1.789126,2,...,5770.8,57,0,0,inf,4180.867,42,0,0,inf
9,ZHO,2567.92,27,7.147836,2,inf,1800.819,18,1.806846,2,...,5609.646,57,0,0,inf,4376.336,44,0,0,inf


In [147]:
per = laps.pick_driver('PER')
per[per['Compound'] == SOFT].LapTime.dropna().tolist()

[99.09200000000001,
 99.47300000000001,
 98.741,
 98.861,
 99.218,
 99.26400000000001,
 99.825,
 100.215,
 99.80600000000001,
 99.93100000000001,
 99.754,
 99.992,
 100.03500000000001,
 102.44500000000001,
 117.05600000000001,
 96.302,
 96.51100000000001,
 97.119,
 96.75500000000001,
 96.98700000000001,
 97.17800000000001,
 97.63900000000001,
 97.25500000000001,
 100.22800000000001,
 116.521,
 103.56200000000001,
 128.4,
 132.05,
 136.085,
 145.098,
 141.747,
 96.38900000000001,
 96.08900000000001,
 96.113,
 96.75500000000001,
 96.436,
 96.286]

In [148]:
[[]]*20

[[],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 []]

In [111]:
gp = ff1.get_session(2022, 1, 'R')
gp.load()

core           INFO 	Loading data for Bahrain Grand Prix - Race [v2.2.9]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '44', '63', '20', '77', '31', '22', '14', '24', '47', '18', '23', '3', '4', '6', '27', '11', '1', '10']


In [113]:
laps = gp.laps
convert_laptime_to_seconds(laps)
laps

Unnamed: 0,Time,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,...,IsPersonalBest,Compound,TyreLife,FreshTyre,LapStartTime,Team,Driver,TrackStatus,IsAccurate,LapStartDate
0,0 days 01:04:14.256000,16,,1,1,0 days 00:24:54.765000,NaT,NaT,0 days 00:00:42.120000,0 days 00:00:23.984000,...,False,SOFT,1.0,True,0 days 01:02:34.872000,Ferrari,LEC,1,False,2022-03-20 15:03:34.889
1,0 days 01:05:52.109000,16,97.853,2,1,NaT,NaT,0 days 00:00:31.582000,0 days 00:00:42.159000,0 days 00:00:24.112000,...,False,SOFT,2.0,True,0 days 01:04:14.256000,Ferrari,LEC,1,True,2022-03-20 15:05:14.273
2,0 days 01:07:30.381000,16,98.272,3,1,NaT,NaT,0 days 00:00:31.635000,0 days 00:00:42.404000,0 days 00:00:24.233000,...,False,SOFT,3.0,True,0 days 01:05:52.109000,Ferrari,LEC,1,True,2022-03-20 15:06:52.126
3,0 days 01:09:08.795000,16,98.414,4,1,NaT,NaT,0 days 00:00:31.619000,0 days 00:00:42.482000,0 days 00:00:24.313000,...,False,SOFT,4.0,True,0 days 01:07:30.381000,Ferrari,LEC,1,True,2022-03-20 15:08:30.398
4,0 days 01:10:47.266000,16,98.471,5,1,NaT,NaT,0 days 00:00:31.590000,0 days 00:00:42.565000,0 days 00:00:24.316000,...,False,SOFT,5.0,True,0 days 01:09:08.795000,Ferrari,LEC,1,True,2022-03-20 15:10:08.812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1118,0 days 02:09:55.703000,10,98.204,40,3,NaT,NaT,0 days 00:00:31.492000,0 days 00:00:42.613000,0 days 00:00:24.099000,...,False,HARD,9.0,True,0 days 02:08:17.499000,AlphaTauri,GAS,1,True,2022-03-20 16:09:17.516
1119,0 days 02:11:34.004000,10,98.301,41,3,NaT,NaT,0 days 00:00:31.523000,0 days 00:00:42.621000,0 days 00:00:24.157000,...,False,HARD,10.0,True,0 days 02:09:55.703000,AlphaTauri,GAS,1,True,2022-03-20 16:10:55.720
1120,0 days 02:13:12.333000,10,98.329,42,3,NaT,NaT,0 days 00:00:31.453000,0 days 00:00:42.692000,0 days 00:00:24.184000,...,False,HARD,11.0,True,0 days 02:11:34.004000,AlphaTauri,GAS,1,True,2022-03-20 16:12:34.021
1121,0 days 02:14:50.811000,10,98.478,43,3,NaT,NaT,0 days 00:00:31.468000,0 days 00:00:42.816000,0 days 00:00:24.194000,...,False,HARD,12.0,True,0 days 02:13:12.333000,AlphaTauri,GAS,1,True,2022-03-20 16:14:12.350


In [125]:
driver = 'HAM'
rep_laps = laps.pick_driver(driver)
softs = rep_laps[rep_laps['Compound'] == SOFT]
softs.LapTime.std()

15.364992588255754

In [None]:
rep_laps.LapTime.std()

In [119]:
rep_laps

<pandas.core.groupby.generic.SeriesGroupBy object at 0x00000254C24FB8B0>

In [None]:
rep_laps.groupby('Compound').size()['SOFT']

In [None]:
rep_laps.groupby('Compound').sum()

In [None]:
rep_laps.groupby('Compound').sum().loc[HARD, 'LapTime']

In [126]:
rep_laps.Compound.unique()

array(['SOFT', 'HARD', 'MEDIUM'], dtype=object)

In [34]:
# DO NOT DELETE
# DO NOT DELETE
# DO NOT DELETE
# DO NOT DELETE

def get_stint_lengths(stints_list: list):
    if stints_list == []:
        return []
    
    stints_tuples = []

    for i in range(len(stints_list)):
        curr = stints_list[i]
        if i == 0:
            stint_laps = 1
        else:
            stint_laps += 1
            prev = stints_list[i - 1]
            if curr != prev:
                stints_tuples.append((prev, stint_laps - 1))
                stint_laps = 1

    stints_tuples.append((curr, stint_laps))
    return stints_tuples


# DO NOT DELETE
# DO NOT DELETE
# DO NOT DELETE
# DO NOT DELETE