In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
results_df = pd.read_csv('F1KaggleData/results.csv')
races_df = pd.read_csv('F1KaggleData/races.csv')
drivers_df = pd.read_csv('F1KaggleData/drivers.csv')
laptimes_df = pd.read_csv('F1KaggleData/laptimes.csv')


In [3]:
display(results_df.sample(20))

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId
4485,4486,228,49,15,15.0,10,,R,18,0.0,32,,,,,,,23
23567,23572,978,838,1,2.0,8,11.0,11,11,0.0,50,,,43.0,13.0,01:33.5,226.906,11
677,678,50,5,4,4.0,11,2.0,2,2,8.0,67,8.377,7242956.0,34.0,9.0,01:29.7,183.222,1
4221,4222,216,64,21,2.0,16,,R,18,0.0,8,,,,,,,3
3555,3556,186,21,22,9.0,13,11.0,11,11,0.0,44,+1:32.195,5235252.0,,,,,1
14964,14965,605,304,1,14.0,8,5.0,5,5,2.0,95,,,,,,,11
20581,20584,347,29,164,20.0,22,,R,22,0.0,19,,,16.0,22.0,01:23.5,197.105,5
13767,13768,561,172,27,26.0,8,,R,19,0.0,49,,,,,,,5
13556,13557,554,238,25,3.0,15,5.0,5,5,2.0,54,72.09,5908580.0,,,,,1
17237,17238,711,397,172,24.0,4,9.0,9,9,0.0,28,,,,,,,25


In [4]:
# Find the fastest lap for a given race. 

def enrich_lap_times(races_df, laptimes_df, drivers_df):

    races_narrow_df = races_df[['raceId', 'year', 'name']]
    drivers_narrow_df = drivers_df[['driverId','driverRef']]

    enriched_laptimes_df = pd.merge(laptimes_df, races_narrow_df, on='raceId')
    return pd.merge(enriched_laptimes_df, drivers_narrow_df, on='driverId')


enriched_laptimes_df = enrich_lap_times(races_df, laptimes_df, drivers_df)

display(enriched_laptimes_df.sample(10))

Unnamed: 0,raceId,driverId,lap,position,time,milliseconds,year,name,driverRef
194829,130,37,22,16,1:23.606,83606,2002,Monaco Grand Prix,rosa
359135,229,69,40,11,1:34.370,94370,1996,Monaco Grand Prix,badoer
65134,11,18,36,8,1:40.135,100135,2009,European Grand Prix,button
161979,135,22,61,4,1:19.019,79019,2002,German Grand Prix,barrichello
5915,22,20,36,17,1:28.947,88947,2008,Turkish Grand Prix,vettel
114302,100,2,18,13,1:26.465,86465,2004,British Grand Prix,heidfeld
240369,951,821,35,18,1:43.145,103145,2016,Russian Grand Prix,gutierrez
328231,191,49,35,3,1:36.108,96108,1998,Australian Grand Prix,frentzen
132050,16,5,61,9,1:14.562,74562,2009,Brazilian Grand Prix,kovalainen
290235,236,71,27,6,1:55.079,115079,1996,Belgian Grand Prix,damon_hill


In [5]:
fastest_laps = enriched_laptimes_df.loc[enriched_laptimes_df.groupby(['year', 'name'])['milliseconds'].idxmin()]

pd.set_option('display.max_rows', 100)

fastest_laps_minimal = fastest_laps[['year','name','driverRef', 'milliseconds']]

# display(fastest_laps_minimal)
display(fastest_laps_minimal.loc[fastest_laps_minimal['year'] == 1996])
# display(fastest_laps_minimal)

Unnamed: 0,year,name,driverRef,milliseconds
283232,1996,Argentine Grand Prix,alesi,89413
292542,1996,Australian Grand Prix,villeneuve,93421
288602,1996,Belgian Grand Prix,berger,113067
289661,1996,Brazilian Grand Prix,damon_hill,81547
293088,1996,British Grand Prix,villeneuve,89288
292993,1996,Canadian Grand Prix,villeneuve,81916
289794,1996,European Grand Prix,damon_hill,81363
293043,1996,French Grand Prix,villeneuve,78610
290112,1996,German Grand Prix,damon_hill,106504
290198,1996,Hungarian Grand Prix,damon_hill,80093


In [10]:
import copy

multi_index_fast_laps = copy.copy(fastest_laps_minimal).set_index(['year', 'name'])


In [30]:
display(multi_index_fast_laps.loc[1998])

Unnamed: 0_level_0,driverRef,milliseconds
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Argentine Grand Prix,wurz,88179
Australian Grand Prix,hakkinen,91649
Austrian Grand Prix,coulthard,72878
Belgian Grand Prix,michael_schumacher,123766
Brazilian Grand Prix,hakkinen,79337
British Grand Prix,michael_schumacher,95704
Canadian Grand Prix,michael_schumacher,79379
French Grand Prix,coulthard,77523
German Grand Prix,coulthard,106116
Hungarian Grand Prix,michael_schumacher,79286


In [44]:
from ipywidgets import interact, interact_manual

@interact
def display_fastest_laps_for_year(year=(1996, 2014, 1)):
    
    if year not in multi_index_fast_laps.index:
        return 'No fastest laps data available for: {}'.format(year)
    else:
        fastest_laps_for_year = multi_index_fast_laps.loc[year]['driverRef'].value_counts().to_frame()
        fastest_laps_for_year.rename(columns = {'driverRef':'FastestLaps'}, inplace = True)
        return fastest_laps_for_year
    
    


interactive(children=(IntSlider(value=2005, description='year', max=2014, min=1996), Output()), _dom_classes=(…

In [45]:
display(display_fastest_laps_for_year(1998))

Unnamed: 0,FastestLaps
michael_schumacher,6
hakkinen,6
coulthard,3
wurz,1


In [21]:
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets



In [59]:
# display(multi_index_fast_laps.sample(20))
def display_fastest_laps_for_circuit(name):

    return multi_index_fast_laps.loc[pd.IndexSlice[:, name], :]

circuits = multi_index_fast_laps.index.get_level_values('name').unique().to_list()

interact(display_fastest_laps_for_circuit, name=circuits)

interactive(children=(Dropdown(description='name', options=('Argentine Grand Prix', 'Australian Grand Prix', '…

<function __main__.display_fastest_laps_for_circuit(name)>

ideas

Dominance, teams of Mercedes, REdBull, McLaren etc and show wins per season over the last 20y

driver wins, fastest laps, coloured byteam

show non top 3 team podiums -. ie. basically none

downfall of Williams

mid table... see how that seems more competitive

back marker teams... who picked up what points... 

points allocated to the non Top3 teams... 