In [1]:
import fastf1 as ff1
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
from race import *
from qualifying import *
from constants import *
from practice import *
from race_sim import *
from scipy import stats
from scipy.optimize import curve_fit

# Enable the cache by providing the name of the cache folder
ff1.Cache.enable_cache('cache')

Get the race data for each driver

We want to get each driver's average stint and laptime standard deviation for each tyre and in general

In [3]:
driver_stats_df = pd.DataFrame(
    columns=['Driver', 
             'SoftTimeAll', 'SoftLapsAll', 'SoftSdAll', 'SoftStintsAll', 'SoftAvgTimeAll', 
             'SoftTimeRep', 'SoftLapsRep', 'SoftSdRep', 'SoftStintsRep', 'SoftAvgTimeRep',
             'MediumTimeAll', 'MediumLapsAll', 'MediumSdAll', 'MediumStintsAll', 'MediumAvgTimeAll', 
             'MediumTimeRep', 'MediumLapsRep', 'MediumSdRep', 'MediumStintsRep', 'MediumAvgTimeRep', 
             'HardTimeAll', 'HardLapsAll', 'HardSdAll', 'HardStintsAll', 'HardAvgTimeAll', 
             'HardTimeRep', 'HardLapsRep', 'HardSdRep', 'HardStintsRep', 'HardAvgTimeRep',
             'TotalTimeAll', 'TotalLapsAll', 'TotalSdAll', 'TotalStintsAll', 'TotalAvgTimeAll',
            'TotalTimeRep', 'TotalLapsRep', 'TotalSdRep', 'TotalStintsRep', 'TotalAvgTimeRep'])
driver_stats_df.Driver = ['LEC', 'SAI', 'HAM', 'RUS', 'MAG', 'BOT', 'OCO', 'TSU', 'ALO', 'ZHO', 'MSC', 'STR', 'ALB', 'RIC', 'NOR', 'LAT', 'HUL', 'PER', 'VER', 'GAS', 'VET', 'DEV']
driver_stats_df = driver_stats_df.fillna(0)

for col in driver_stats_df.columns:
    if 'Sd' in col:
        driver_stats_df[col] = driver_stats_df[col].astype('object')



# driver_stats_df.dtypes

In [4]:
for grand_prix_number in range(1, 11):
    gp = ff1.get_session(2022, grand_prix_number, 'R')
    gp.load()
    laps = gp.laps
    convert_laptime_to_seconds(laps)
    
    for driver in get_all_driver_names(laps):
        # We will divide the laps into two (overlapping) groups, one containing all laps recorded and another with representative laps only.
        
        # All laps
        all_laps = laps.pick_driver(driver)
        
        # Calculate the stint counts
        stint_tuples_all = get_stint_lengths(all_laps.Compound.tolist())
        for stint in stint_tuples_all:
            if stint[0] not in DRY_TYRES:
                continue
            
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, stint[0].title() + 'StintsAll'] += 1
        
        
        for compound in all_laps.Compound.unique():
            if compound not in DRY_TYRES:
                continue
        
            # Calculate the total lap time and laps done for each compound
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'TimeAll'] += all_laps.groupby('Compound').sum().loc[compound, 'LapTime']
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'LapsAll'] += all_laps.groupby('Compound').size()[compound]
                
            # Calculate the standard deviation for each tyre compound
            laptimes = all_laps[all_laps['Compound'] == compound].LapTime.dropna().tolist()
            
            driver_index_list = driver_stats_df.index[driver_stats_df['Driver'] == driver].tolist()
            if len(driver_index_list) > 0:
                driver_index = driver_stats_df.index[driver_stats_df['Driver'] == driver].tolist()[0]
            
            if driver_stats_df.at[driver_index, compound.title() + 'SdAll'] == 0:
                driver_stats_df.at[driver_index, compound.title() + 'SdAll'] = laptimes
            else:
                driver_stats_df.at[driver_index, compound.title() + 'SdAll'] += laptimes
            if driver_stats_df.at[driver_index, 'TotalSdAll'] == 0:
                driver_stats_df.at[driver_index, 'TotalSdAll'] = laptimes
            else:
                driver_stats_df.at[driver_index, 'TotalSdAll'] += laptimes
            
        
        # Representative laps
        rep_laps = laps.pick_driver(driver).pick_accurate().pick_wo_box().pick_track_status('1')
        
        # Calculate the stint counts
        stint_tuples_rep = get_stint_lengths(rep_laps.Compound.tolist())
        for stint in stint_tuples_rep:
            if stint[0] not in DRY_TYRES:
                continue
                
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, stint[0].title() + 'StintsRep'] += 1
        
        
        for compound in rep_laps.Compound.unique():
            if compound not in DRY_TYRES:
                continue
            
            # Calculate the total lap time and laps done for each compound
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'TimeRep'] += rep_laps.groupby('Compound').sum().loc[compound, 'LapTime']
            driver_stats_df.loc[driver_stats_df['Driver'] == driver, compound.title() + 'LapsRep'] += rep_laps.groupby('Compound').size()[compound]
        
            # Calculate the standard deviation for each tyre compound
            laptimes = rep_laps[rep_laps['Compound'] == compound].LapTime.dropna().tolist()
            
            driver_index_list = driver_stats_df.index[driver_stats_df['Driver'] == driver].tolist()
            if len(driver_index_list) > 0:
                driver_index = driver_stats_df.index[driver_stats_df['Driver'] == driver].tolist()[0]
            
            if driver_stats_df.at[driver_index, compound.title() + 'SdRep'] == 0:
                driver_stats_df.at[driver_index, compound.title() + 'SdRep'] = laptimes
            else:
                driver_stats_df.at[driver_index, compound.title() + 'SdRep'] += laptimes
            if driver_stats_df.at[driver_index, 'TotalSdRep'] == 0:
                driver_stats_df.at[driver_index, 'TotalSdRep'] = laptimes
            else:
                driver_stats_df.at[driver_index, 'TotalSdRep'] += laptimes
                
print('Done!')

core           INFO 	Loading data for Bahrain Grand Prix - Race [v2.2.9]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '44', '63', '20', '77', '31', '22', '14', '24', '47', '18', '23', '3', '4', '6', '27', '11', '1', '10']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v2.2.9]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timi

api            INFO 	Using cached data for weather_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '11', '44', '16', '14', '4', '1', '47', '5', '20', '18', '6', '3', '22', '31', '10', '77', '63', '24', '23']


Done!


In [5]:
# Calculate the average time for each compound
for compound in DRY_TYRES:
    for type in {'All', 'Rep'}:
        time = compound.title() + 'Time' + type
        laps = compound.title() + 'Laps' + type
        avg = compound.title() + 'AvgTime' + type
        
        driver_stats_df[avg] = driver_stats_df[time] / driver_stats_df[laps]

In [7]:
# Calculate the total time statistics
for type in {'All', 'Rep'}:
    time = 'Total' + 'Time' + type
    laps = 'Total' + 'Laps' + type
    avg = 'TotalAvgTime' + type
    stints = 'TotalStints' + type
    
    total_time = 0
    total_laps = 0
    total_stints = 0
    for compound in DRY_TYRES:
        total_time  += driver_stats_df[compound.title() + 'Time' + type] 
        total_laps += driver_stats_df[compound.title() + 'Laps' + type]
        total_stints += driver_stats_df[compound.title() + 'Stints' + type]
        
    driver_stats_df[time] = total_time
    driver_stats_df[laps] = total_laps
    driver_stats_df[stints] = total_stints
        
    driver_stats_df[avg] =  driver_stats_df[time] / driver_stats_df[laps]
    
    


In [9]:
# Calculate the total SD by changing the SD columns
for col in driver_stats_df.columns:
    if 'Sd' in col:
        all_driver_laptime_list = list(driver_stats_df[col])
        all_driver_sd_list = []
        for driver in all_driver_laptime_list:
            if driver != 0:
                all_driver_sd_list.append(np.array(driver).std())
            else:
                all_driver_sd_list.append(0)
        
        driver_stats_df[col] = all_driver_sd_list

In [None]:
# driver_stats_df[['Driver', 'TotalAvgTimeAll']].sort_values(by=['TotalAvgTimeAll'])

%store driver_stats_df

In [10]:
pd.options.display.max_columns = None
driver_stats_df

Unnamed: 0,Driver,SoftTimeAll,SoftLapsAll,SoftSdAll,SoftStintsAll,SoftAvgTimeAll,SoftTimeRep,SoftLapsRep,SoftSdRep,SoftStintsRep,SoftAvgTimeRep,MediumTimeAll,MediumLapsAll,MediumSdAll,MediumStintsAll,MediumAvgTimeAll,MediumTimeRep,MediumLapsRep,MediumSdRep,MediumStintsRep,MediumAvgTimeRep,HardTimeAll,HardLapsAll,HardSdAll,HardStintsAll,HardAvgTimeAll,HardTimeRep,HardLapsRep,HardSdRep,HardStintsRep,HardAvgTimeRep,TotalTimeAll,TotalLapsAll,TotalSdAll,TotalStintsAll,TotalAvgTimeAll,TotalTimeRep,TotalLapsRep,TotalSdRep,TotalStintsRep,TotalAvgTimeRep
0,LEC,6891.365,77,13.197623,4,89.498247,5603.156,61,8.858488,4,91.855016,15634.008,176,13.02183,9,88.829591,12609.408,141,9.73698,9,89.428426,19020.996,226,14.221124,7,84.163699,14577.26,169,8.469168,7,86.255976,41546.369,479,13.197623,20,86.735635,32789.824,371,8.858488,20,88.382275
1,SAI,8245.663,90,12.458556,5,91.618478,6533.003,70,7.851677,5,93.328614,12049.019,137,8.11071,7,87.949044,10465.681,114,7.119336,7,91.804219,15155.595,177,15.527383,6,85.624831,11359.436,134,8.339356,5,84.77191,35450.277,404,12.458556,18,87.74821,28358.12,318,7.851677,17,89.176478
2,HAM,6871.052,76,13.490353,5,90.408579,5299.546,58,9.356686,5,91.371483,19897.045,227,11.765721,11,87.652181,16840.236,190,8.891284,10,88.632821,22428.886,244,15.784318,8,91.921664,16880.108,185,11.461186,8,91.243827,49196.983,547,13.490353,24,89.93964,39019.89,433,9.356686,23,90.115219
3,RUS,4985.681,55,13.677114,4,90.648745,3793.623,41,9.618249,4,92.52739,18543.932,213,13.321088,9,87.060714,15118.777,175,8.22767,9,86.393011,21001.228,230,14.542828,8,91.309687,16937.355,184,11.066026,7,92.050842,44530.841,498,13.677114,21,89.419359,35849.755,400,9.618249,20,89.624388
4,MAG,4515.444,47,13.116337,4,96.073277,3579.003,36,8.664348,2,99.41675,18990.768,210,12.164474,10,90.432229,15294.052,169,7.78171,10,90.497349,20695.998,226,14.017629,6,91.575212,16509.453,183,9.389118,6,90.21559,44202.21,483,13.116337,20,91.515963,35382.508,388,8.664348,18,91.192031
5,BOT,3783.486,40,13.15827,3,94.58715,2687.359,28,9.97117,3,95.977107,22692.24,251,11.542505,10,90.407331,19572.289,215,8.852874,9,91.033902,18511.118,212,15.029795,6,87.316594,14638.984,165,11.670255,6,88.721115,44986.844,503,13.15827,19,89.437066,36898.632,408,9.97117,18,90.437824
6,OCO,10179.01,109,13.326389,6,93.385413,7546.63,81,9.399819,6,93.168272,18266.867,208,13.336033,10,87.821476,15236.337,174,9.994544,9,87.565155,20073.794,215,13.861399,8,93.366484,16165.361,174,10.194797,8,92.904374,48519.671,532,13.326389,24,91.202389,38948.328,429,9.399819,23,90.788643
7,TSU,14405.741,153,12.750127,9,94.15517,10704.473,113,9.524112,9,94.72985,13497.994,155,12.256385,9,87.083832,11270.45,129,8.551848,8,87.367829,15075.818,164,13.694545,6,91.92572,12218.727,134,10.702178,6,91.18453,42979.553,472,12.750127,24,91.058375,34193.65,376,9.524112,23,90.940559
8,ALO,7678.733,83,13.595725,5,92.514855,5682.825,62,9.368206,5,91.658468,19410.012,218,12.615739,10,89.036752,16119.188,180,10.556616,10,89.551044,17309.825,185,15.151263,8,93.566622,12785.545,136,10.022891,8,94.01136,44398.57,486,13.595725,23,91.355082,34587.558,378,9.368206,23,91.501476
9,ZHO,3387.665,37,13.70551,3,91.558514,2253.186,23,9.293828,3,97.964609,17456.781,198,12.335649,10,88.165561,14660.244,166,8.766296,9,88.314723,13153.293,146,16.17848,5,90.091048,9209.272,106,9.893148,5,86.879925,33997.739,381,13.70551,18,89.232911,26122.702,295,9.293828,17,88.551532


In [None]:
# ssd = driver_stats_df.SoftSdRep
# msd = driver_stats_df.MediumSdRep
# sno = driver_stats_df.SoftLapsRep
# mno = driver_stats_df.MediumLapsRep
# savg = driver_stats_df.SoftAvgTimeRep
# mavg = driver_stats_df.MediumAvgTimeRep

# sm_var = ((mno - 1) * msd ** 2 + (sno - 1) * ssd ** 2) / (mno + sno - 1) + mno * sno * (mavg - savg) ** 2 / ((sno + mno) * (sno + mno - 1))
# sm_no = sno + mno
# sm_avg = (sno * savg + mno* mavg) / sm_no


# hsd = driver_stats_df.HardSdRep
# hno = driver_stats_df.HardLapsRep
# havg = driver_stats_df.HardAvgTimeRep

# all_var = ((hno - 1) * hsd ** 2 + (sm_no - 1) * sm_var) / (hno + sm_no - 1) + hno * sm_no * (havg - sm_avg) ** 2 / ((sm_no + hno) * (sm_no + hno - 1))
# all_var

In [None]:
gp = ff1.get_session(2022, 1, 'R')
gp.load()

In [None]:
laps = gp.laps
convert_laptime_to_seconds(laps)
laps

In [None]:
driver = 'HAM'
rep_laps = laps.pick_driver(driver)
softs = rep_laps[rep_laps['Compound'] == SOFT]
softs.LapTime.std()

In [None]:
rep_laps.LapTime.std()

In [None]:
rep_laps

In [None]:
rep_laps.groupby('Compound').size()['SOFT']

In [None]:
rep_laps.groupby('Compound').sum()

In [None]:
rep_laps.groupby('Compound').sum().loc[HARD, 'LapTime']

In [None]:
rep_laps.Compound.unique()

In [2]:
# DO NOT DELETE
# DO NOT DELETE
# DO NOT DELETE
# DO NOT DELETE

def get_stint_lengths(stints_list: list):
    if stints_list == []:
        return []
    
    stints_tuples = []
    stint_laps = 0
    curr = None
    
    for i in range(len(stints_list)):
        curr = stints_list[i]
        if i == 0:
            stint_laps = 1
        else:
            stint_laps += 1
            prev = stints_list[i - 1]
            if curr != prev:
                stints_tuples.append((prev, stint_laps - 1))
                stint_laps = 1

    stints_tuples.append((curr, stint_laps))
    return stints_tuples


# DO NOT DELETE
# DO NOT DELETE
# DO NOT DELETE
# DO NOT DELETE