# EDA of Formula 1 World Championship (1950 - 2022)<br>
The dataset is <a href= "https://www.kaggle.com/datasets/rohanrao/formula-1-world-championship-1950-2020"> Formula 1 World Championship (1950 - 2022)</a>,posted on Kaggle by user Vopani.


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np 
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [3]:
import plotly.graph_objects as go
from plotly.offline import iplot
import folium
from  folium  import  plugins

In [4]:
resultsDF = pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/results.csv')
circuitsDF = pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/circuits.csv')
driversDF = pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/drivers.csv')
racesDF = pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/races.csv')
constructorDF = pd.read_csv('/kaggle/input/formula-1-world-championship-1950-2020/constructors.csv')


In [5]:
resultsDF.head()



In [6]:
circuitsDF.head()

In [7]:
driversDF.head()

In [8]:
racesDF.head()

In [9]:
constructorDF.head()

In [10]:
#joining resulDF with driversDF by column driverID,using inner join
dfresul = pd.merge(resultsDF,driversDF,on='driverId')
#joining racesDF with dfresul by column raceID,using inner join
dfresulcons = pd.merge(dfresul,racesDF,on='raceId')
#joining constructorDF with dfresulcons by column constructorID,using inner join
dfresulrac = pd.merge(dfresulcons,constructorDF,on='constructorId')



In [11]:
dfresulrac.head()

In [12]:
dfresulrac.isnull().sum()

In [13]:
dfresulrac = dfresulrac.drop(columns=['url_x','url_y','name_y','nationality_y','url','time_y'])
dfresulrac.head()

In [15]:
coordinates=[]
for lat,lng in zip(circuitsDF['lat'],circuitsDF['lng']):
  coordinates.append([lat,lng])

In [16]:
maps = folium.Map(location=[-15.788497,-47.879873],zoom_start=2,tiles='Stamen Toner')

In [17]:
maps.add_child(plugins.HeatMap(coordinates))
maps

In [45]:
#Driver's Championship Ranking by Season
def championship_year(year):
    classi = dfresulrac[(dfresulrac['year']== year)].groupby('driverRef')['points'].sum().reset_index()
    classi = classi.sort_values(by=['points'],ascending=False)
    fig = go.Figure(data=[go.Bar(
    x=classi['driverRef'],
    y=classi['points']
)])
    fig.update_layout(title={
        'text': f"Driver´s Championship Rankings of {year}",
        'y':0.85,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}, 
                  yaxis=dict(
                            title='Total Points',
                            titlefont_size=14,
                            tickfont_size=12),
                  xaxis=dict(
                            title='Drivers',
                            titlefont_size=14,
                            tickfont_size=12),template='seaborn'
                  
                  )
    return iplot(fig)

In [46]:
# choose the year to show the ranking: 1950 - 2021
championship_year(2018)

In [47]:
# Constructors' Championship Ranking by Season
def championship_cons(year):
    classi_equp = dfresulrac[(dfresulrac['year']== year)].groupby('constructorRef')['points'].sum().reset_index()
    classi_equp = classi_equp.sort_values(by=['points'],ascending=False)
    fig = go.Figure(data=[go.Bar(
    x=classi_equp['constructorRef'],
    y=classi_equp['points']
    )])
    fig.update_layout(title={
        'text': f"Constructors' Championship Rankings of {year}",
        'y':0.85,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}, 
                  yaxis=dict(
                            title='Points',
                            titlefont_size=14,
                            tickfont_size=12),
                  xaxis=dict(
                            title='Constructor',
                            titlefont_size=14,
                            tickfont_size=12),
                  template = "seaborn"
                  )
    return iplot(fig)

In [48]:
# choose the year to show the ranking: 1950 - 2021
championship_cons(2021)

In [59]:
# Total Wins for a Driver
num_wins = dfresulrac[(dfresulrac['position']== '1')]
num_wins['position_mod'] = num_wins['position'].astype(int)
num_wins = num_wins.groupby(['driverRef','nationality_x'])['position_mod'].sum().reset_index()
num_wins = num_wins.sort_values(by=['position_mod'], ascending=False)

In [60]:
fig = go.Figure(data=[go.Bar(
    x= num_wins['driverRef'],
    y= num_wins['position_mod'],
    hovertext = num_wins['nationality_x']
)])
fig.update_layout(title={
        'text': "Wins Count Per Driver",
        'y':0.85,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}, 
                  yaxis=dict(
                            title='No of Wins',
                            titlefont_size=14,
                            tickfont_size=12),
                  xaxis=dict(
                            title='Drivers',
                            titlefont_size=14,
                            tickfont_size=12),
                  template = "seaborn"
                  )
iplot(fig)

ps: The result below points out as champion to the driver who scored the most in a season, that's why Prost appears with 5 titles when he actually has 4 and Senna with 2 when he actually has 3, due to the regulation of the 1988 championship, which drivers discard the worst results;Hamilton has 8 titles because he is the leader of the championship until the data is updated

In [61]:
# Championships Wins Per Driver
sum_driver = dfresulrac.groupby(['year','driverRef'])['points'].sum().reset_index()
champions = sum_driver.loc[sum_driver.reset_index().groupby(['year'])['points'].idxmax()]
champions = champions['driverRef'].value_counts().reset_index()
champions.rename(columns={'index':'driver','driverRef':'titles'}, inplace = True)

In [64]:
fig = go.Figure(data=[go.Bar(
    x=champions['driver'],
    y=champions['titles']
    )])
fig.update_layout(title={
        'text': "Championships Won Per Driver",
        'y':0.85,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}, 
                  yaxis=dict(
                            title='No.of Championships',
                            titlefont_size=14,
                            tickfont_size=12),
                  xaxis=dict(
                            title='Drivers',
                            titlefont_size=14,
                            tickfont_size=12),
                  template = "seaborn"
                  )
iplot(fig)

In [65]:
#Pole Positions for each Driver
driver_pole = dfresulrac[dfresulrac['grid']== 1].groupby(by=['driverRef','nationality_x'])['grid'].sum().reset_index()
driver_pole = driver_pole.sort_values(by=['grid'], ascending=False)

In [68]:
fig = go.Figure(data=[go.Bar(
    x=driver_pole['driverRef'],
    y=driver_pole['grid'],
    hovertext = driver_pole['nationality_x']
)])
fig.update_layout(title={
        'text': "Pole Postions Per Driver",
        'y':0.85,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}, 
                  yaxis=dict(
                            title='No. of Pole positions',
                            titlefont_size=14,
                            tickfont_size=12),
                  xaxis=dict(
                            title='Drivers',
                            titlefont_size=14,
                            tickfont_size=12),
                  template = "seaborn"
                  )
iplot(fig)

In [69]:
# Most Podiums for a Driver
p1 = dfresulrac[(dfresulrac['position']== '1')].groupby('driverRef')['position'].value_counts()
p2 = dfresulrac[(dfresulrac['position']== '2')].groupby('driverRef')['position'].value_counts()
p3 = dfresulrac[(dfresulrac['position']== '3')].groupby('driverRef')['position'].value_counts()
#pilotos = dfresulrac.sort_values(by=['driverRef'])['driverRef'].unique()
driver1 = dfresulrac[(dfresulrac['position']== '1')].sort_values(by=['driverRef'])['driverRef'].unique()
driver2 = dfresulrac[(dfresulrac['position']== '2')].sort_values(by=['driverRef'])['driverRef'].unique()
driver3 = dfresulrac[(dfresulrac['position']== '3')].sort_values(by=['driverRef'])['driverRef'].unique()

In [71]:
fig = go.Figure(go.Bar(x=driver1, y=p1, name='1st Place'))
fig.add_trace(go.Bar(x=driver2, y=p2, name='2nd Place'))
fig.add_trace(go.Bar(x=driver3, y=p3, name='3rd Place'))

fig.update_layout(barmode='stack',title={
        'text': "Podiums Per Driver",
        'y':0.85,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}, 
                  yaxis=dict(
                            title='No. of Podiums',
                            titlefont_size=14,
                            tickfont_size=12),
                  xaxis=dict(
                            title='Drivers',
                            titlefont_size=14,
                            tickfont_size=12),
                  template = "seaborn"
                  )
iplot(fig)

In [73]:
# Wins Per Driver's Country
fig = go.Figure(data=[go.Pie(labels=dfresulrac[(dfresulrac['position']== '1')].sort_values(by=['nationality_x'])['nationality_x'].unique(), values=dfresulrac[(dfresulrac['position']== '1')].groupby('nationality_x')['position'].value_counts(),hole=.3)])
fig.update_layout(title={
        'text': "Country with more wins",
        'y':0.85,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'}, 
        template = "seaborn")
iplot(fig)