In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import urllib.request
import zipfile

# Download

In [2]:
url = 'https://ergast.com/downloads/f1db_csv.zip'
urllib.request.urlretrieve(url, '/home/julian/Downloads/f1db_csv.zip')

('/home/julian/Downloads/f1db_csv.zip',
 <http.client.HTTPMessage at 0x7f4bd61f77f0>)

In [3]:
with zipfile.ZipFile('/home/julian/Downloads/f1db_csv.zip', 'r') as zip_ref:
    zip_ref.extractall('/home/julian/Desktop/Physics/f1/data')

# Data

In [4]:
pit_times = pd.read_csv('data/pit_stops.csv', usecols = (0,1,2,6),names =['raceId','driverId','stop','milliseconds'])
lap_times = pd.read_csv('data/lap_times.csv', usecols = (0,1,2,5),names =['raceId','driverId','lap','milliseconds'])
races = pd.read_csv('data/races.csv', usecols = (0,1,4),names =['raceId','year','name'])
drivers = pd.read_csv('data/driver.csv', usecols = [0,1],names =['driverId','driverRef'])
positions = pd.read_csv('data/results.csv', usecols = [1,2,3,5,8,17],names =['raceId','driverId','constructorId','grid','positionOrder','statusId'])
status = pd.read_csv('data/status.csv', names =['statusId','status'])
teams = pd.read_csv('data/constructors.csv', names =['constructorId','constructorRef','name','nationality','url'])

In [5]:
df = pd.read_csv('data/constructor_results.csv', usecols = (1,2,3), names =['raceId','constructorId','points'])
times = pd.read_csv('data/results.csv', usecols = (1,3,5,6,16), names =['raceId','constructorId','grid','position','fastestLapSpeed'])

# Dictionaries

In [6]:
dict_drivers = dict(zip(drivers.values[:,0], drivers.values[:,1]))
dict_year = dict(zip(races.values[:,0], races.values[:,1]))
dict_team = dict(zip(teams.values[:,0], teams.values[:,1]))
dict_circuit = dict(zip(races.values[:,0], races.values[:,2]))
dict_status = dict(zip(status.values[:,0], status.values[:,1]))

In [7]:
dict_year = dict(zip(races.values[:,0], races.values[:,1]))
color_dict_teams = {'red_bull': '#1E41FF', 'mclaren': '#FF8700', 'renault': '#FFF500', 'ferrari': '#DC0000', 'toro_rosso': '#469BFF', 'force_india': '#F596C8', 'lotus_racing': 'g', 'virgin': 'lime', 'williams': '#FFFFFF', 'mercedes': '#00D2BE', 'sauber': '#9B0000', 'hrt': 'purple', 'lotus_f1': '#FFF500', 'marussia': 'k', 'caterham': 'g', 'manor': 'k', 'haas': '#F0D787', 'racing_point': '#F596C8', 'alfa': '#9B0000'}

# Pit Times

In [8]:
pit_times['year'] = pit_times['raceId'].map(dict_year)
pit_times['circuit'] =  pit_times['raceId'].map(dict_circuit)
pit_times['driver'] = pit_times['driverId'].map(dict_drivers)
pit_times.drop(['raceId'], axis = 1, inplace=True)
pit_times.drop(['driverId'], axis = 1, inplace=True)
pit_times.head()

Unnamed: 0,stop,milliseconds,year,circuit,driver
0,1,26898,2011,Australian Grand Prix,alguersuari
1,1,25021,2011,Australian Grand Prix,michael_schumacher
2,1,23426,2011,Australian Grand Prix,webber
3,1,23251,2011,Australian Grand Prix,alonso
4,1,23842,2011,Australian Grand Prix,massa


# Lap Times

In [9]:
lap_times['year'] = lap_times['raceId'].map(dict_year)
lap_times['driver'] = lap_times['driverId'].map(dict_drivers)
lap_times['circuit'] =  lap_times['raceId'].map(dict_circuit)
lap_times.drop(['raceId'], axis = 1, inplace=True)
lap_times.drop(['driverId'], axis = 1, inplace=True)
lap_times = lap_times[lap_times['year'] >= 2011]
lap_times.head()

Unnamed: 0,lap,milliseconds,year,driver,circuit
0,1,98109,2011,vettel,Australian Grand Prix
1,2,93006,2011,vettel,Australian Grand Prix
2,3,92713,2011,vettel,Australian Grand Prix
3,4,92803,2011,vettel,Australian Grand Prix
4,5,92342,2011,vettel,Australian Grand Prix


# Results

In [10]:
positions['year'] = positions['raceId'].map(dict_year)
positions = positions[positions['year'] >= 2011]
positions['driver'] = positions['driverId'].map(dict_drivers)
positions['circuit'] =  positions['raceId'].map(dict_circuit)
positions['team'] = positions['constructorId'].map(dict_team)
number_laps = [58,56,56,58,66,78,70,53,52,67,70,44,53,61,53,55,60,55,71,57,56,71,53,71,51,53]
best_time = [1.4,1.57,1.54,1.5,1.3,1.24,1.23,1.57,1.51,1.23,1.32,1.77,1.35,1.70,1.53,1.69,1.46,1.67,1.18,1.52,1.62,1.11,1.6,1.31,1.72,1.32]
best_time_dict = dict(zip(positions['circuit'].unique(), best_time))
number_laps_dict = dict(zip(positions['circuit'].unique(), number_laps))
positions['color'] = positions['team'].map(color_dict_teams)
positions['statusId'] = positions['statusId'].map(dict_status)
positions['best_lap'] = positions['circuit'].map(best_time_dict)
positions['number_laps'] = positions['circuit'].map(number_laps_dict)
positions.drop(['raceId'], axis = 1, inplace=True)
positions.drop(['driverId'], axis = 1, inplace=True)
positions.drop(['constructorId'], axis = 1, inplace=True)
positions.head()

Unnamed: 0,grid,positionOrder,statusId,year,driver,circuit,team,color,best_lap,number_laps
20776,1,1,Finished,2011,vettel,Australian Grand Prix,red_bull,#1E41FF,1.4,58
20777,2,2,Finished,2011,hamilton,Australian Grand Prix,mclaren,#FF8700,1.4,58
20778,6,3,Finished,2011,petrov,Australian Grand Prix,renault,#FFF500,1.4,58
20779,5,4,Finished,2011,alonso,Australian Grand Prix,ferrari,#DC0000,1.4,58
20780,3,5,Finished,2011,webber,Australian Grand Prix,red_bull,#1E41FF,1.4,58


In [11]:
df['raceId'] = df['raceId'].map(dict_year)
df['constructorId'] = df['constructorId'].map(dict_team)
above_2010 = df.copy()[df['raceId'] >= 2011]
above_2010['color'] = above_2010['constructorId'].map(color_dict_teams)
above_2010.head()

Unnamed: 0,raceId,constructorId,points,color
9631,2011,red_bull,35.0,#1E41FF
9632,2011,mclaren,26.0,#FF8700
9633,2011,renault,15.0,#FFF500
9634,2011,ferrari,18.0,#DC0000
9635,2011,sauber,0.0,#9B0000


In [12]:
times['raceId'] = times['raceId'].map(dict_year)
above_2010_s = times.copy()[times['raceId'] >= 2011]
above_2010_s['constructorId'] = above_2010_s['constructorId'].map(dict_team)
above_2010_s['position'].replace(r'\N', 'R', inplace = True)
above_2010_s['fastestLapSpeed'].replace(r'\N', 'NAN', inplace = True)
above_2010_s['fastestLapSpeed'] = above_2010_s['fastestLapSpeed'].astype(float)
above_2010_s.head()

Unnamed: 0,raceId,constructorId,grid,position,fastestLapSpeed
20776,2011,red_bull,1,1,212.488
20777,2011,mclaren,2,2,211.382
20778,2011,renault,6,3,211.969
20779,2011,ferrari,5,4,213.336
20780,2011,red_bull,3,5,213.066


# Seasons and Circuits

In [13]:
sea = pit_times['year'].unique()
circ = pit_times['circuit'].unique()
teams = above_2010['constructorId'].unique()

# Main Code

In [14]:
def plot(Season, Circuit, Incidents):
    fig = plt.figure(figsize=(18,8))
    lap_times_list = list()
    pit_times_list = list()
    drivers = list()
    position_list = list()
    number_stops_list = list()
    grid_position_list = list()
    colors = list()
    teams = list()
    status_list = list()
    driver_incident = list()
    color_incident = list()
    tmp_race_pit = pit_times[(pit_times['year'] == Season) & (pit_times['circuit'] == Circuit)]
    tmp_race_times = lap_times[(lap_times['year'] == Season) & (lap_times['circuit'] == Circuit)]
    tmp_positions = positions[(positions['year'] == Season) & (positions['circuit'] == Circuit)]
    circuit_image = plt.imread(f'images/{Circuit}.png')
    ax = plt.subplot(3,4,(1,6))
    ax1 = plt.subplot(3,4,(3,4))
    ax2 = plt.subplot(3,4,(11,12))
    ax3 = plt.subplot(3,4,9)
    ax4 = plt.subplot(3,4,10)
    ax5 = plt.subplot(3,4,(7,8))
    for driver in tmp_positions['driver'].unique():
        tmp_driver_pit = tmp_race_pit[(tmp_race_pit['driver'] == driver)]
        tmp_driver_times = tmp_race_times[(tmp_race_times['driver'] == driver)]
        tmp_driver_position = tmp_positions[(tmp_positions['driver'] == driver)]
        pit_time = tmp_driver_pit.values[:,1]
        number_stops = tmp_driver_pit.values[:,0]
        position = tmp_driver_position.values[:,1]
        team = tmp_driver_position.values[:,6]
        color = tmp_driver_position.values[:,7]
        laps = tmp_driver_position.values[0,9]
        best_time = tmp_driver_position.values[0,8]
        grid_position = tmp_driver_position.values[:,0]
        lap_time = tmp_driver_times.values[:,1]
        status = tmp_driver_position.values[:,2]
        if len(number_stops) != 0:
            lap_times_list.append((np.sum(lap_time)-(np.sum(pit_time)))/(60000*len(lap_time)))
            pit_times_list.append(np.sum(pit_time)/10000)
            position_list.append(position)
            grid_position_list.append(grid_position)
            number_stops_list.append(int(np.max(number_stops)))
            drivers.append(driver)
            colors.append(color[0])
            if status[0] != 'Finished':
                status_list.append(status[0])
                driver_incident.append(driver)
                color_incident.append(color[0])
            if team in teams:
                ax1.scatter(driver, position,c = color, edgecolor = 'k')
            else:
                ax1.scatter(driver, position,c = color, edgecolor = 'k', label = team[0])
            teams.append(team[0])
    ax1.legend(bbox_to_anchor=(-0.01, 1.3), loc='upper center', borderaxespad=0.,ncol=len(teams))
    ax1.invert_yaxis()
    ax1.set_ylabel('Position')
    ax1.set_xticklabels([])
    ax.imshow(circuit_image, aspect ='auto')
    ax.text(0,30,f'{int(laps)}',fontsize = 20,weight="bold")
    ax.text(700,30,f'{best_time}', color = 'r',fontsize = 20,weight="bold")
    ax.axis('off')
    ax4.scatter(drivers, pit_times_list, c = colors, edgecolor = 'k')
    ax4.set_ylabel('Total Pit Time in Seconds')
    ax4.set_xticklabels(drivers,rotation=90)
    ax2.scatter(drivers, lap_times_list,c = colors, edgecolor = 'k')
    ax2.set_ylabel('Average Lap Time in minutes')
    ax2.set_ylim(np.min(lap_times_list), np.max(lap_times_list))
    ax2.invert_yaxis()
    ax2.set_xticklabels(drivers,rotation=90)
    ax2.set_xticklabels(drivers,rotation=90)
    ax5.scatter(drivers, grid_position_list,c = colors, edgecolor = 'k')
    ax5.set_ylabel('Grid Position')
    ax5.set_xticklabels(drivers,rotation=90)
    ax5.invert_yaxis()
    ax5.set_xticklabels([])
    if Incidents == True:
        ax3.scatter(driver_incident, status_list, c = color_incident, edgecolor = 'k')
        ax3.set_ylabel('Incident')
        ax3.set_xticklabels(driver_incident,rotation=90)
    else:
        ax3.scatter(drivers, number_stops_list,c = colors, edgecolor = 'k')
        ax3.set_ylabel('# of Pit Stops')
        ax3.set_xticklabels(drivers,rotation=90)

In [15]:
def graph(Season):
    fig = plt.figure(figsize = (15,7))
    ax1 = plt.subplot(221)
    ax2 = plt.subplot(222)
    ax3 = plt.subplot(223)
    ax4 = plt.subplot(224)
    tmp1 = above_2010.copy()[above_2010['raceId'] == Season].dropna()
    tmp = above_2010_s.copy()[above_2010_s['raceId'] == Season].dropna()
    points = list()
    avg_speed = list()
    avg_grid = list()
    team_list = list()
    acc_list = list()
    colors = list()
    for team in teams:
        speed = tmp.copy()[tmp['constructorId'] == team].values[:,4]
        grid = tmp.copy()[tmp['constructorId'] == team].values[:,2]
        accidents = tmp.copy()[tmp['constructorId'] == team].values[:,3]
        acc = 0
        if len(speed) !=0:
            points.append(np.sum(tmp1.copy()[tmp1['constructorId'] == team].values[:,2]))
            colors.append(tmp1.copy()[tmp1['constructorId'] == team].values[0,3])
            avg_speed.append(np.sum(speed)/len(speed))
            avg_grid.append(np.sum(grid)/len(grid))
            team_list.append(team)
            for value in accidents:
                if value == 'R':
                    acc+=1
            acc_list.append(acc)
    ax1.scatter(team_list,points, color = colors, edgecolors='k', s = 100)
    ax1.set_ylabel('Constructors Points')
    ax1.set_xticklabels([])
    ax2.scatter(team_list,avg_speed, color = colors, edgecolors='k', s = 100)
    ax2.set_ylabel('Average Speed')
    ax2.set_xticklabels([])
    ax3.scatter(team_list,avg_grid, color = colors, edgecolors='k', s = 100)
    ax3.set_ylabel('Average Grid Position')
    ax3.set_xticklabels(team_list,rotation=60)
    ax3.invert_yaxis()
    ax4.scatter(team_list,acc_list, color = colors, edgecolors='k', s = 100)
    ax4.set_ylabel('# of Retirements')
    ax4.set_xticklabels(team_list,rotation=60)
    ax4.invert_yaxis()
    plt.show()

In [16]:
_ = widgets.interact(plot, Season = sea, Circuit = circ, Incidents = False)

interactive(children=(Dropdown(description='Season', options=(2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, …

In [17]:
_ = widgets.interact(graph, Season = sea)

interactive(children=(Dropdown(description='Season', options=(2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, …