<a href="https://colab.research.google.com/github/Ngugisenior/data_analytics/blob/main/Formula_1_Data_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Libraries

In [1]:
import requests
import json 
import pandas as pd 

from bokeh.io import output_notebook, show
output_notebook()
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap

# Use 3 decimal places in output display
pd.set_option("display.precision", 3)

# Don't wrap repr(DataFrame) across additional lines
pd.set_option("display.expand_frame_repr", False)

# Set max rows displayed in output to 25
pd.set_option("display.max_rows", 2505)

Request Racing Data from API

In [4]:
def get_drivers_data(limit=None,series=None,offset=None,total=None,data=None):
  limit = '?limit={}'.format(limit) if limit else ''
  url = 'https://ergast.com/api/f1.json{}'.format(limit)
  req = requests.get(url)

  response = req.json()
  return response
racing_data = get_drivers_data()
pd.json_normalize(racing_data)
limit = racing_data['MRData']['total']

Circuits By Year

In [7]:
circuits = get_drivers_data(limit=2000)['MRData']['RaceTable']['Races']
circuits = pd.DataFrame(pd.json_normalize(circuits))
circuits.to_csv('Formula 1 Historical Data.csv')

Circuits in 1950

In [None]:
cr_1950 = circuits.loc[circuits['season']=='1950']
cr_1950.sort_values(by='date').reset_index(drop=True)

Unnamed: 0,season,round,url,raceName,date,Circuit.circuitId,Circuit.url,Circuit.circuitName,Circuit.Location.lat,Circuit.Location.long,Circuit.Location.locality,Circuit.Location.country,time
0,1950,1,http://en.wikipedia.org/wiki/1950_British_Gran...,British Grand Prix,1950-05-13,silverstone,http://en.wikipedia.org/wiki/Silverstone_Circuit,Silverstone Circuit,52.0786,-1.01694,Silverstone,UK,
1,1950,2,http://en.wikipedia.org/wiki/1950_Monaco_Grand...,Monaco Grand Prix,1950-05-21,monaco,http://en.wikipedia.org/wiki/Circuit_de_Monaco,Circuit de Monaco,43.7347,7.42056,Monte-Carlo,Monaco,
2,1950,3,http://en.wikipedia.org/wiki/1950_Indianapolis...,Indianapolis 500,1950-05-30,indianapolis,http://en.wikipedia.org/wiki/Indianapolis_Moto...,Indianapolis Motor Speedway,39.795,-86.2347,Indianapolis,USA,
3,1950,4,http://en.wikipedia.org/wiki/1950_Swiss_Grand_...,Swiss Grand Prix,1950-06-04,bremgarten,http://en.wikipedia.org/wiki/Circuit_Bremgarten,Circuit Bremgarten,46.9589,7.40194,Bern,Switzerland,
4,1950,5,http://en.wikipedia.org/wiki/1950_Belgian_Gran...,Belgian Grand Prix,1950-06-18,spa,http://en.wikipedia.org/wiki/Circuit_de_Spa-Fr...,Circuit de Spa-Francorchamps,50.4372,5.97139,Spa,Belgium,
5,1950,6,http://en.wikipedia.org/wiki/1950_French_Grand...,French Grand Prix,1950-07-02,reims,http://en.wikipedia.org/wiki/Reims-Gueux,Reims-Gueux,49.2542,3.93083,Reims,France,
6,1950,7,http://en.wikipedia.org/wiki/1950_Italian_Gran...,Italian Grand Prix,1950-09-03,monza,http://en.wikipedia.org/wiki/Autodromo_Naziona...,Autodromo Nazionale di Monza,45.6156,9.28111,Monza,Italy,


Most Used circuit in F1

In [None]:
# Group circults by circuit name and country name
cr_cname = circuits.groupby( [ 'Circuit.circuitName','Circuit.Location.country'] ).size()

# Transform Grouping into a DataFrame
cr_cname = pd.DataFrame({'count' : cr_cname})

# Sort the result set
cr_cname = cr_cname.sort_values(by='count', ascending=False)
# Reset the index
cr_cname.reset_index()

Unnamed: 0,Circuit.circuitName,Circuit.Location.country,count
0,Autodromo Nazionale di Monza,Italy,68
1,Circuit de Monaco,Monaco,65
2,Silverstone Circuit,UK,52
3,Circuit de Spa-Francorchamps,Belgium,51
4,Nürburgring,Germany,40
5,Circuit Gilles Villeneuve,Canada,39
6,Hockenheimring,Germany,36
7,Autódromo José Carlos Pace,Brazil,36
8,Hungaroring,Hungary,33
9,Suzuka Circuit,Japan,30


Country That has hosted most F1 Races

In [None]:
# Group circults by country name
cr_country = circuits.groupby( [ 'Circuit.Location.country'] ).size()

# Transform Grouping into a DataFrame
cr_country = pd.DataFrame({'# Races' : cr_country})

# Sort the result set
cr_country = cr_country.sort_values(by='# Races', ascending=False)
# Reset the index
cr_country = cr_country.reset_index()
cr_country

Unnamed: 0,Circuit.Location.country,# Races
0,Italy,96
1,Germany,77
2,UK,72
3,USA,69
4,Monaco,65
5,Belgium,63
6,France,60
7,Spain,55
8,Canada,49
9,Brazil,46


In [None]:
c_list = cr_country['Circuit.Location.country'].tolist()
c_count = cr_country['# Races'].tolist()

In [None]:

p = figure(plot_width=1200, plot_height=500, title="Formula 1",
           x_range=c_list, toolbar_location=None, tools="")

p.vbar(x=c_list, top=c_count, width=0.9)

p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)

In [None]:
driver_table = response['MRData']['DriverTable']
driver_table

{'Drivers': [{'dateOfBirth': '1932-07-10',
   'driverId': 'abate',
   'familyName': 'Abate',
   'givenName': 'Carlo',
   'nationality': 'Italian',
   'url': 'http://en.wikipedia.org/wiki/Carlo_Mario_Abate'},
  {'dateOfBirth': '1913-03-21',
   'driverId': 'abecassis',
   'familyName': 'Abecassis',
   'givenName': 'George',
   'nationality': 'British',
   'url': 'http://en.wikipedia.org/wiki/George_Abecassis'},
  {'dateOfBirth': '1957-11-27',
   'driverId': 'acheson',
   'familyName': 'Acheson',
   'givenName': 'Kenny',
   'nationality': 'British',
   'url': 'http://en.wikipedia.org/wiki/Kenny_Acheson'},
  {'dateOfBirth': '1969-11-19',
   'driverId': 'adams',
   'familyName': 'Adams',
   'givenName': 'Philippe',
   'nationality': 'Belgian',
   'url': 'http://en.wikipedia.org/wiki/Philippe_Adams'},
  {'dateOfBirth': '1913-12-15',
   'driverId': 'ader',
   'familyName': 'Ader',
   'givenName': 'Walt',
   'nationality': 'American',
   'url': 'http://en.wikipedia.org/wiki/Walt_Ader'},
  {'da

In [None]:
#set columns
cols = driver_table['Drivers'][0].keys()

#Empty DataFrame
train = pd.DataFrame(columns=cols)

#Create a Drivers DataFrame
for i in driver_table['Drivers']:
  df = pd.DataFrame([i])[cols]
  if train.empty:
    train = df
  else:
    train = train.append(df)

train

Unnamed: 0,driverId,url,givenName,familyName,dateOfBirth,nationality
0,abate,http://en.wikipedia.org/wiki/Carlo_Mario_Abate,Carlo,Abate,1932-07-10,Italian
0,abecassis,http://en.wikipedia.org/wiki/George_Abecassis,George,Abecassis,1913-03-21,British
0,acheson,http://en.wikipedia.org/wiki/Kenny_Acheson,Kenny,Acheson,1957-11-27,British
0,adams,http://en.wikipedia.org/wiki/Philippe_Adams,Philippe,Adams,1969-11-19,Belgian
0,ader,http://en.wikipedia.org/wiki/Walt_Ader,Walt,Ader,1913-12-15,American
0,adolff,http://en.wikipedia.org/wiki/Kurt_Adolff,Kurt,Adolff,1921-11-05,German
0,agabashian,http://en.wikipedia.org/wiki/Fred_Agabashian,Fred,Agabashian,1913-08-21,American
0,ahrens,"http://en.wikipedia.org/wiki/Kurt_Ahrens,_Jr.",Kurt,Ahrens,1940-04-19,German
0,aitken,http://en.wikipedia.org/wiki/Jack_Aitken,Jack,Aitken,1995-09-23,British
0,albers,http://en.wikipedia.org/wiki/Christijan_Albers,Christijan,Albers,1979-04-16,Dutch
