In [45]:
from common.utils.utils import load_dataset
import pandas as pd
import plotly.express as px

In [2]:
lap_times_df = load_dataset('lap_times')
lap_times_df.head()

Unnamed: 0,raceId,driverId,lap,position,time,milliseconds
0,841,20,1,1,1:38.109,98109
1,841,20,2,1,1:33.006,93006
2,841,20,3,1,1:32.713,92713
3,841,20,4,1,1:32.803,92803
4,841,20,5,1,1:32.342,92342


In [7]:
races_df = load_dataset('races')
races_df.head()

Unnamed: 0,raceId,year,round,circuitId,name,date,time,url,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time
0,1,2009,1,1,Australian Grand Prix,2009-03-29,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_G...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
1,2,2009,2,2,Malaysian Grand Prix,2009-04-05,09:00:00,http://en.wikipedia.org/wiki/2009_Malaysian_Gr...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
2,3,2009,3,17,Chinese Grand Prix,2009-04-19,07:00:00,http://en.wikipedia.org/wiki/2009_Chinese_Gran...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
3,4,2009,4,3,Bahrain Grand Prix,2009-04-26,12:00:00,http://en.wikipedia.org/wiki/2009_Bahrain_Gran...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
4,5,2009,5,4,Spanish Grand Prix,2009-05-10,12:00:00,http://en.wikipedia.org/wiki/2009_Spanish_Gran...,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N


In [5]:
drivers_df = load_dataset('drivers')
drivers_df.head()

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton
1,2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld
2,3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg
3,4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso
4,5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen


In [15]:
year = 2020
race_name = 'Turkish Grand Prix'

# Filter data for selected season
races_for_season = races_df[races_df['year'] == year]
# Return races in selected season
races_for_season.name

1018            Austrian Grand Prix
1019             Styrian Grand Prix
1020           Hungarian Grand Prix
1021             British Grand Prix
1022    70th Anniversary Grand Prix
1023             Spanish Grand Prix
1024             Belgian Grand Prix
1025             Italian Grand Prix
1026              Tuscan Grand Prix
1027             Russian Grand Prix
1028               Eifel Grand Prix
1029          Portuguese Grand Prix
1030      Emilia Romagna Grand Prix
1031             Turkish Grand Prix
1032             Bahrain Grand Prix
1033              Sakhir Grand Prix
1034           Abu Dhabi Grand Prix
Name: name, dtype: object

In [39]:
# Filter data for selected race
selected_race = races_for_season[races_for_season['name'] == race_name]
assert len(selected_race) == 1, f'There are multiple races named {race_name} in year {year}'
selected_race_id = selected_race['raceId'].values[0]
selected_race_id

1044

In [53]:
# Get lap times data for selected race
lap_times_for_race = lap_times_df[lap_times_df['raceId'] == selected_race_id]
# Merge driver info
lap_times_for_race = pd.merge(lap_times_for_race, drivers_df, on='driverId')
# Select only necessary data
lap_times_for_race = lap_times_for_race[['lap', 'position', 'time', 'milliseconds', 'number', 'code', 'forename', 'surname']]
# Combine driver name and surname
lap_times_for_race['driver_full_name'] = lap_times_for_race['forename'] + ' ' + lap_times_for_race['surname']
lap_times_for_race.drop(['forename', 'surname'], axis=1, inplace=True)
# Rename columns for clarity
lap_times_for_race.rename(columns={'number': 'driver_number', 'code': 'driver_code'}, inplace=True) 
lap_times_for_race.head()

Unnamed: 0,lap,position,time,milliseconds,driver_number,driver_code,driver_full_name
0,1,1,1:59.973,119973,18,STR,Lance Stroll
1,2,1,1:55.125,115125,18,STR,Lance Stroll
2,3,1,1:54.247,114247,18,STR,Lance Stroll
3,4,1,1:54.170,114170,18,STR,Lance Stroll
4,5,1,1:53.516,113516,18,STR,Lance Stroll


In [54]:
# Plot lap times using Plotly
fig = px.line(lap_times_for_race,
              x='lap',
              y='milliseconds',
              color='driver_full_name',
              labels={'lap': 'Lap', 'milliseconds': 'Lap Time (ms)', 'position': 'Position', 'driver_full_name': 'Full Name'},
              height=700)
# Update x-axis to use circuit names as ticks
fig.show()
