In [43]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px

In [44]:
#Read data from multiple CSV
circuits = pd.read_csv('../data/circuits.csv', index_col=0, na_values=r'\N')
constructorResults = pd.read_csv('../data/constructor_results.csv', index_col=0, na_values=r'\N')
constructors = pd.read_csv('../data/constructors.csv', index_col=0, na_values=r'\N')
constructorStandings = pd.read_csv('../data/constructor_standings.csv', index_col=0, na_values=r'\N')
drivers = pd.read_csv('../data/drivers.csv', index_col=0, na_values=r'\N')
driverStandings = pd.read_csv('../data/driver_standings.csv', index_col=0, na_values=r'\N')
lapTimes = pd.read_csv('../data/lap_times.csv')
pitStops = pd.read_csv('../data/pit_stops.csv')
qualifying = pd.read_csv('../data/qualifying.csv', index_col=0, na_values=r'\N')
races = pd.read_csv('../data/races.csv', na_values=r'\N')
results = pd.read_csv('../data/results.csv', index_col=0, na_values=r'\N')
seasons = pd.read_csv('../data/seasons.csv', index_col=0, na_values=r'\N')
status = pd.read_csv('../data/status.csv', index_col=0, na_values=r'\N')

In [45]:
constructor_color_map = {
    'Toro Rosso': '#0000FF',
    'Mercedes': '#6CD3BF',
    'Red Bull': '#1E5BC6',
    'Ferrari': '#ED1C24',
    'Williams': '#37BEDD',
    'Force India': '#FF80C7',
    'Virgin': '#c82e37',
    'Renault': '#FFD800',
    'McLaren': '#F58020',
    'Sauber': '#006EFF',
    'Lotus': '#FFB800',
    'HRT': '#b2945e',
    'Caterham': '#0b361f',
    'Lotus F1': '#FFB800',
    'Marussia': '#6E0000',
    'Manor Marussia': '#6E0000',
    'Haas F1 Team': '#B6BABD',
    'Racing Point': '#F596C8',
    'Aston Martin': '#2D826D',
    'Alfa Romeo': '#B12039',
    'AlphaTauri': '#4E7C9B',
    'Alpine F1 Team': '#2293D1'
}

In [None]:
drivers_color_map = {
    'Lewis Hamilton': '#6CD3BF',
    'George Russel': '#6CD3BF',
    'Max Verstappen': '#1E5BC6',
    'Checo Perez': '#1E5BC6',
    'Charles Leclerc': '#ED1C24',
    'Ferrari': '#ED1C24',
    'McLaren': '#F58020',
    'McLaren': '#F58020',
    'Alpine F1 Team': '#2293D1',
    'Alpine F1 Team': '#2293D1'
}

In [46]:
# Post-reading formatting
drivers = drivers.rename(columns={'nationality': 'driverNationality', 'url': 'driverUrl'})
drivers['driverName'] = drivers['forename'] + ' ' + drivers['surname']

constructors = constructors.rename(
    columns={'name': 'constructorName', 'nationality': 'constructorNationality', 'url': 'constructorUrl'})

races.set_index('raceId', inplace=True)
races['date'] = races['date'].apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%d'))

pitStops = pitStops.rename(columns={'time': 'pitTime'})
pitStops['seconds'] = pitStops['milliseconds'].apply(lambda x: x / 1000)

results['seconds'] = results['milliseconds'].apply(lambda x: x / 1000)

circuits = circuits.rename(
    columns={'name': 'circuitName', 'location': 'circuitLocation', 'country': 'circuitCountry', 'url': 'circuitUrl'})

In [47]:
newResults = pd.merge(results, races, left_on='raceId', right_index=True, how='left')
newResults = pd.merge(newResults, constructors, left_on='constructorId', right_index=True, how='left')
newResults = pd.merge(newResults, drivers, left_on='driverId', right_index=True, how='left')
newResults = pd.merge(newResults,circuits,left_on='circuitId',right_index=True,how='left')

In [48]:
newPitStops = pd.merge(pitStops, races, left_on='raceId', right_index=True, how='left')
newPitStops = pd.merge(newPitStops, newResults[['raceId', 'driverId', 'driverName', 'constructorId', 'constructorName']], left_on=['raceId', 'driverId'], right_on=['raceId', 'driverId'])

In [49]:
df_drivers_and_constructors = newResults[['driverName', 'constructorName']]
df_drivers_and_constructors = df_drivers_and_constructors.drop_duplicates(subset='driverName')
df_drivers_and_constructors


Unnamed: 0_level_0,driverName,constructorName
resultId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Lewis Hamilton,McLaren
2,Nick Heidfeld,BMW Sauber
3,Nico Rosberg,Williams
4,Fernando Alonso,Renault
5,Heikki Kovalainen,McLaren
...,...,...
24974,Yuki Tsunoda,AlphaTauri
24981,Mick Schumacher,Haas F1 Team
24985,Nikita Mazepin,Haas F1 Team
25415,Guanyu Zhou,Alfa Romeo


In [50]:
year = 2022
champion = "Max Verstappen"
focusedRace = 'French Grand Prix'

In [51]:
fig = px.box(newPitStops[(newPitStops['seconds'] < 50) & (newPitStops['year'] == year)].groupby(
    by=['raceId', 'name', 'date', 'constructorName']).mean().reset_index().sort_values(by='seconds', ascending=True),
             x='constructorName',
             y='seconds',
             color='constructorName',
             color_discrete_map=constructor_color_map,
             width=1000,
             height=500
             )
fig.update_layout(
    title_text=f'Pit Stop Durations by Constructor for {year} Season',
)
fig.show()



The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [52]:
df_driver_focus = newResults[(newResults['driverName'] == champion) & (newResults['year'] == year)]
df_pos_max = newResults[(newResults['year'] == year)]
df_driver_focus["position"] = df_driver_focus["position"].fillna(df_pos_max["position"].max())



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [53]:
fig = px.line(df_driver_focus, x='name', y='position', color='constructorName', color_discrete_map=constructor_color_map, width=1000, height=500)
fig.update_yaxes(nticks=int(df_pos_max['position'].max()))
fig.update_yaxes(range=[df_pos_max['position'].max(), 1])
fig.update_xaxes(nticks=int(df_driver_focus['round'].max()))
fig.update_layout(
    title_text=f'Positions per race for the {year} champion during the year of his championship victory',
)
fig.show()

In [54]:
df_focus_on_race = pd.merge(lapTimes, newResults[['raceId', 'driverId', 'driverName', 'constructorId', 'constructorName', 'name', 'year']], left_on=['raceId', 'driverId'], right_on=['raceId', 'driverId'])
df_focus_on_race = df_focus_on_race[(df_focus_on_race['name'] == focusedRace) & (df_focus_on_race['driverName'] == champion) & (df_focus_on_race['year'] == year)]

In [55]:
fig = px.line(df_focus_on_race, x='lap', y='milliseconds', markers=True, color='constructorName', color_discrete_map=constructor_color_map)
fig.show()

In [56]:
df_points_focus = newResults[(newResults['year'] == year)]
df_points_focus = df_points_focus[['driverName', 'constructorName', 'points']]
df_points_focus = df_points_focus.groupby("driverName").agg(constructorName = ('constructorName', 'first'), mean_points = ('points', 'mean')).reset_index()
df_points_focus = df_points_focus.sort_values("mean_points", ascending=True)
df_points_focus

Unnamed: 0,driverName,constructorName,mean_points
15,Nico Hülkenberg,Aston Martin,0.0
14,Nicholas Latifi,Williams,0.090909
0,Alexander Albon,Williams,0.190476
7,Guanyu Zhou,Alfa Romeo,0.272727
13,Mick Schumacher,Haas F1 Team,0.545455
21,Yuki Tsunoda,AlphaTauri,0.545455
9,Lance Stroll,Aston Martin,0.818182
8,Kevin Magnussen,Haas F1 Team,0.954545
17,Pierre Gasly,AlphaTauri,1.045455
3,Daniel Ricciardo,McLaren,1.545455


In [57]:
fig = px.bar(df_points_focus,x='driverName', y='mean_points', color='constructorName', color_discrete_map=constructor_color_map)
fig.show()

In [58]:
df_nb_accident = newResults[(newResults['year'] == year) & (newResults['position'].isnull())]
df_nb_accident = df_nb_accident[['driverName', 'constructorName']]
df_nb_accident

Unnamed: 0_level_0,driverName,constructorName
resultId,Unnamed: 1_level_1,Unnamed: 2_level_1
25425,Pierre Gasly,AlphaTauri
25440,Valtteri Bottas,Alfa Romeo
25441,Fernando Alonso,Alpine F1 Team
25442,Daniel Ricciardo,McLaren
25443,Nicholas Latifi,Williams
...,...,...
25805,Yuki Tsunoda,AlphaTauri
25823,Lando Norris,McLaren
25824,Kevin Magnussen,Haas F1 Team
25825,Daniel Ricciardo,McLaren


In [59]:
grouped_counts = df_nb_accident.groupby('driverName').agg(constructorName = ('constructorName', 'first'), count = ('driverName', 'size')).reset_index()
grouped_counts = grouped_counts.sort_values('count', ascending=True)
grouped_counts
# Create the pie chart


Unnamed: 0,driverName,constructorName,count
6,George Russell,Mercedes,1
11,Lewis Hamilton,Mercedes,1
12,Max Verstappen,Red Bull,1
9,Lance Stroll,Aston Martin,2
17,Sergio Pérez,Red Bull,2
4,Esteban Ocon,Alpine F1 Team,2
16,Sebastian Vettel,Aston Martin,2
10,Lando Norris,McLaren,2
2,Charles Leclerc,Ferrari,3
3,Daniel Ricciardo,McLaren,3


In [60]:
fig = px.bar(grouped_counts, x='driverName', y='count', color='constructorName', color_discrete_map=constructor_color_map)
fig.show()

In [61]:
df_focus_on_quali = pd.merge(newResults, qualifying, left_on='raceId', right_index=True, how='left')
df_focus_on_quali = df_focus_on_quali[(df_focus_on_quali['year'] == year)]
df_focus_on_quali = df_focus_on_quali[['driverName','q1', 'q2', 'q3']]
df_focus_on_quali

Unnamed: 0_level_0,driverName,q1,q2,q3
resultId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
25406,Charles Leclerc,1:44.378,1:44.336,1:44.992
25407,Carlos Sainz,1:44.378,1:44.336,1:44.992
25408,Lewis Hamilton,1:44.378,1:44.336,1:44.992
25409,George Russell,1:44.378,1:44.336,1:44.992
25410,Kevin Magnussen,1:44.378,1:44.336,1:44.992
...,...,...,...,...
25841,Mick Schumacher,1:31.279,1:28.954,1:29.711
25842,Kevin Magnussen,1:31.279,1:28.954,1:29.711
25843,Lewis Hamilton,1:31.279,1:28.954,1:29.711
25844,Nicholas Latifi,1:31.279,1:28.954,1:29.711


In [62]:
nb_q1 = df_focus_on_quali.groupby('driverName').count()
nb_q1

Unnamed: 0_level_0,q1,q2,q3
driverName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alexander Albon,21,17,11
Carlos Sainz,22,17,11
Charles Leclerc,22,17,11
Daniel Ricciardo,22,17,11
Esteban Ocon,22,17,11
Fernando Alonso,22,17,11
George Russell,22,17,11
Guanyu Zhou,22,17,11
Kevin Magnussen,22,17,11
Lance Stroll,22,17,11
