In [65]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import kagglehub

In [66]:
fpath=kagglehub.dataset_download("rohanrao/formula-1-world-championship-1950-2020")
circuits = pd.read_csv(f'{fpath}/circuits.csv', index_col=0, na_values=r'\N')
constructorResults = pd.read_csv(f'{fpath}/constructor_results.csv', index_col=0, na_values=r'\N')
constructors = pd.read_csv(f'{fpath}/constructors.csv', index_col=0, na_values=r'\N')
constructorStandings = pd.read_csv(f'{fpath}/constructor_standings.csv', index_col=0, na_values=r'\N')
drivers = pd.read_csv(f'{fpath}/drivers.csv', index_col=0, na_values=r'\N')
driverStandings = pd.read_csv(f'{fpath}/driver_standings.csv', index_col=0, na_values=r'\N')
lapTimes = pd.read_csv(f'{fpath}/lap_times.csv')
pitStops = pd.read_csv(f'{fpath}/pit_stops.csv')
qualifying = pd.read_csv(f'{fpath}/qualifying.csv', index_col=0, na_values=r'\N')
races = pd.read_csv(f'{fpath}/races.csv', na_values=r'\N')
results = pd.read_csv(f'{fpath}/results.csv', index_col=0, na_values=r'\N')
seasons = pd.read_csv(f'{fpath}/seasons.csv', index_col=0, na_values=r'\N')
status = pd.read_csv(f'{fpath}/status.csv', index_col=0, na_values=r'\N')

In [67]:
print(races.index.nlevels)


1


In [68]:
# Post-reading formatting 
circuits = circuits.rename(columns={'name':'circuitName','location':'circuitLocation','country':'circuitCountry','url':'circuitUrl'})
drivers = drivers.rename(columns={'nationality':'driverNationality','url':'driverUrl'})
drivers['driverName'] = drivers['forename']+' '+drivers['surname']
constructors = constructors.rename(columns={'name':'constructorName','nationality':'constructorNationality','url':'constructorUrl'})
races = races.set_index(['raceId', 'year', 'round', 'circuitId', 'name', 'date', 'time', 'url', 'fp1_date', 'fp1_time'])

races.index = races.index.set_names(['raceId','year','round','circuitId','raceName','date','time','raceUrl','a','b'])
races = races[[]].reset_index()[['raceId','year','round','circuitId','raceName','date','time','raceUrl']]
races.set_index('raceId',inplace=True)
races['date'] = races['date'].apply(lambda x: dt.datetime.strptime(x,'%Y-%m-%d'))
pitStops = pitStops.rename(columns={'time':'pitTime'})
pitStops['seconds'] = pitStops['milliseconds'].apply(lambda x: x/1000)
results['seconds'] = results['milliseconds'].apply(lambda x: x/1000)

In [69]:
# Constructor color mapping
constructor_color_map = {
    'Toro Rosso':'#0000FF',
    'Mercedes':'#6CD3BF',
    'Red Bull':'#1E5BC6',
    'Ferrari':'#ED1C24',
    'Williams':'#37BEDD',
    'Force India':'#FF80C7',
    'Virgin':'#c82e37',
    'Renault':'#FFD800',
    'McLaren':'#F58020',
    'Sauber':'#006EFF',
    'Lotus':'#FFB800',
    'HRT':'#b2945e',
    'Caterham':'#0b361f',
    'Lotus F1':'#FFB800',
    'Marussia':'#6E0000',
    'Manor Marussia':'#6E0000',
    'Haas F1 Team':'#B6BABD',
    'Racing Point':'#F596C8',
    'Aston Martin':'#2D826D',
    'Alfa Romeo':'#B12039',
    'AlphaTauri':'#4E7C9B',
    'Alpine F1 Team':'#2293D1'
}

In [70]:
pitStops.describe().T


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
raceId,10990.0,975.731665,89.041843,841.0,893.0,967.0,1055.0,1132.0
driverId,10990.0,542.601274,385.555797,1.0,20.0,817.0,832.0,860.0
stop,10990.0,1.797179,1.540691,1.0,1.0,2.0,2.0,70.0
lap,10990.0,25.314741,14.896984,1.0,13.0,25.0,36.0,78.0
milliseconds,10990.0,85304.309554,311489.432628,12897.0,21951.25,23629.0,26503.5,3069017.0
seconds,10990.0,85.30431,311.489433,12.897,21.95125,23.629,26.5035,3069.017


In [71]:
newResults = pd.merge(results,races,left_on='raceId',right_index=True,how='left')
newResults = pd.merge(newResults,circuits,left_on='circuitId',right_index=True,how='left')
newResults = pd.merge(newResults,constructors,left_on='constructorId',right_index=True,how='left')
newResults = pd.merge(newResults,drivers,left_on='driverId',right_index=True,how='left')
newResults

Unnamed: 0_level_0,raceId,driverId,constructorId,number_x,grid,position,positionText,positionOrder,points,laps,...,constructorUrl,driverRef,number_y,code,forename,surname,dob,driverNationality,driverUrl,driverName
resultId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,18,1,1,22.0,1,1.0,1,1,10.0,58,...,http://en.wikipedia.org/wiki/McLaren,hamilton,44.0,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,Lewis Hamilton
2,18,2,2,3.0,5,2.0,2,2,8.0,58,...,http://en.wikipedia.org/wiki/BMW_Sauber,heidfeld,,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld,Nick Heidfeld
3,18,3,3,7.0,7,3.0,3,3,6.0,58,...,http://en.wikipedia.org/wiki/Williams_Grand_Pr...,rosberg,6.0,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg,Nico Rosberg
4,18,4,4,5.0,11,4.0,4,4,5.0,58,...,http://en.wikipedia.org/wiki/Renault_in_Formul...,alonso,14.0,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso,Fernando Alonso
5,18,5,1,23.0,3,5.0,5,5,4.0,58,...,http://en.wikipedia.org/wiki/McLaren,kovalainen,,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen,Heikki Kovalainen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26520,1132,839,214,31.0,18,16.0,16,16,0.0,50,...,http://en.wikipedia.org/wiki/Alpine_F1_Team,ocon,31.0,OCO,Esteban,Ocon,1996-09-17,French,http://en.wikipedia.org/wiki/Esteban_Ocon,Esteban Ocon
26521,1132,815,9,11.0,0,17.0,17,17,0.0,50,...,http://en.wikipedia.org/wiki/Red_Bull_Racing,perez,11.0,PER,Sergio,Pérez,1990-01-26,Mexican,http://en.wikipedia.org/wiki/Sergio_P%C3%A9rez,Sergio Pérez
26522,1132,855,15,24.0,14,18.0,18,18,0.0,50,...,http://en.wikipedia.org/wiki/Sauber_Motorsport,zhou,24.0,ZHO,Guanyu,Zhou,1999-05-30,Chinese,http://en.wikipedia.org/wiki/Zhou_Guanyu,Guanyu Zhou
26523,1132,847,131,63.0,1,,R,19,0.0,33,...,http://en.wikipedia.org/wiki/Mercedes-Benz_in_...,russell,63.0,RUS,George,Russell,1998-02-15,British,http://en.wikipedia.org/wiki/George_Russell_(r...,George Russell


In [72]:
newPitStops = pd.merge(pitStops,races,left_on='raceId',right_index=True,how='left')
newPitStops = pd.merge(newPitStops,circuits,left_on='circuitId',right_index=True,how='left')
newPitStops = pd.merge(newPitStops,newResults[['raceId','driverId','driverName','constructorId','constructorName']],left_on=['raceId','driverId'],right_on=['raceId','driverId'])
newPitStops

Unnamed: 0,raceId,driverId,stop,lap,pitTime,duration,milliseconds,seconds,year,round,...,circuitName,circuitLocation,circuitCountry,lat,lng,alt,circuitUrl,driverName,constructorId,constructorName
0,841,153,1,1,17:05:23,26.898,26898,26.898,2011,1,...,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Jaime Alguersuari,5,Toro Rosso
1,841,30,1,1,17:05:52,25.021,25021,25.021,2011,1,...,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Michael Schumacher,131,Mercedes
2,841,17,1,11,17:20:48,23.426,23426,23.426,2011,1,...,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Mark Webber,9,Red Bull
3,841,4,1,12,17:22:34,23.251,23251,23.251,2011,1,...,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Fernando Alonso,6,Ferrari
4,841,13,1,13,17:24:10,23.842,23842,23.842,2011,1,...,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.96800,10,http://en.wikipedia.org/wiki/Melbourne_Grand_P...,Felipe Massa,6,Ferrari
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10985,1132,807,2,39,16:06:28,30.265,30265,30.265,2024,12,...,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153,http://en.wikipedia.org/wiki/Silverstone_Circuit,Nico Hülkenberg,210,Haas F1 Team
10986,1132,840,2,39,16:06:33,29.469,29469,29.469,2024,12,...,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153,http://en.wikipedia.org/wiki/Silverstone_Circuit,Lance Stroll,117,Aston Martin
10987,1132,839,4,38,16:06:52,29.086,29086,29.086,2024,12,...,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153,http://en.wikipedia.org/wiki/Silverstone_Circuit,Esteban Ocon,214,Alpine F1 Team
10988,1132,815,4,47,16:20:38,28.871,28871,28.871,2024,12,...,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153,http://en.wikipedia.org/wiki/Silverstone_Circuit,Sergio Pérez,9,Red Bull


In [77]:
# Filter out pit stop times where 'seconds' are over 50
filteredPitStops = newPitStops[newPitStops['seconds'] <= 50]

# Now, calculate the mean for the filtered data
filteredPitStopsMean = filteredPitStops.groupby(['constructorId', 'year'])['seconds'].mean()

# Describe the result
filteredPitStopsMean


constructorId  year
1              2011    22.582109
               2012    22.594636
               2013    22.994317
               2014    24.454975
               2015    24.665944
                         ...    
214            2021    24.983246
               2022    23.999500
               2023    24.895107
               2024    24.764050
215            2024    23.783647
Name: seconds, Length: 147, dtype: float64

In [79]:
fig = px.box(newPitStops[newPitStops['seconds']<50].groupby(by=['raceId','raceName','date','constructorName'])['seconds'].mean().reset_index().sort_values(by='seconds',ascending=True),
                 x='constructorName',
                 y='seconds',
                 color='constructorName',
                 color_discrete_map=constructor_color_map,
                )
fig.update_layout(
    title_text='Pit Stop Durations by Constructor from 2011 to date',
)
fig.show()



