Imports

In [84]:
import pandas as pd
import requests
import numpy as np

In [3]:
def null_check(df):
    print('Shape: ', df.shape)
    print('\n', df.isna().sum())

Drivers

In [6]:
url = 'http://ergast.com/api/f1/drivers.json?limit=1000'
r = requests.get(url)
drivers = r.json()
df_drivers = pd.DataFrame(drivers['MRData']['DriverTable']['Drivers'])

In [14]:
null_check(df_drivers)

Shape:  (854, 8)

 driverId             0
url                  0
givenName            0
familyName           0
dateOfBirth          0
nationality          0
permanentNumber    803
code               757
dtype: int64


In [19]:
# recent drivers have both fields
df_drivers[df_drivers['permanentNumber'].notna()].head(10)

Unnamed: 0,driverId,url,givenName,familyName,dateOfBirth,nationality,permanentNumber,code
8,aitken,http://en.wikipedia.org/wiki/Jack_Aitken,Jack,Aitken,1995-09-23,British,89,AIT
10,albon,http://en.wikipedia.org/wiki/Alexander_Albon,Alexander,Albon,1996-03-23,Thai,23,ALB
16,alonso,http://en.wikipedia.org/wiki/Fernando_Alonso,Fernando,Alonso,1981-07-29,Spanish,14,ALO
75,jules_bianchi,http://en.wikipedia.org/wiki/Jules_Bianchi,Jules,Bianchi,1989-08-03,French,17,BIA
96,bottas,http://en.wikipedia.org/wiki/Valtteri_Bottas,Valtteri,Bottas,1989-08-28,Finnish,77,BOT
131,button,http://en.wikipedia.org/wiki/Jenson_Button,Jenson,Button,1980-01-19,British,22,BUT
158,chilton,http://en.wikipedia.org/wiki/Max_Chilton,Max,Chilton,1991-04-21,British,4,CHI
245,ericsson,http://en.wikipedia.org/wiki/Marcus_Ericsson,Marcus,Ericsson,1990-09-02,Swedish,9,ERI
267,pietro_fittipaldi,http://en.wikipedia.org/wiki/Pietro_Fittipaldi,Pietro,Fittipaldi,1996-06-25,Brazilian,51,FIT
302,gasly,http://en.wikipedia.org/wiki/Pierre_Gasly,Pierre,Gasly,1996-02-07,French,10,GAS


Seasons

In [20]:
url = 'http://ergast.com/api/f1/seasons.json?limit=1000'
r = requests.get(url)

In [25]:
seasons = r.json()
df_seasons = pd.DataFrame(seasons['MRData']['SeasonTable']['Seasons'])

In [28]:
seasons['MRData']['SeasonTable']['Seasons'][-1]

{'season': '2022',
 'url': 'http://en.wikipedia.org/wiki/2022_Formula_One_World_Championship'}

In [26]:
df_seasons.head()

Unnamed: 0,season,url
0,1950,http://en.wikipedia.org/wiki/1950_Formula_One_...
1,1951,http://en.wikipedia.org/wiki/1951_Formula_One_...
2,1952,http://en.wikipedia.org/wiki/1952_Formula_One_...
3,1953,http://en.wikipedia.org/wiki/1953_Formula_One_...
4,1954,http://en.wikipedia.org/wiki/1954_Formula_One_...


Qualifying

In [109]:

def process_yearly_quali(root):
    df_quali = pd.DataFrame()

    fields = [
        'season',
        'round',
        'raceName',
        'date',
        'time']
        
    for race in root:
        quali_results = pd.DataFrame(race['QualifyingResults'])

        drivers = pd.json_normalize(quali_results['Driver'])[['driverId']]
        cons = pd.json_normalize(quali_results['Constructor'])[['constructorId']]

        quali_results = pd.concat([quali_results, drivers], axis=1)
        quali_results = pd.concat([quali_results, cons], axis=1)
        quali_results = quali_results.drop(columns=['Driver', 'Constructor'])

        for field in fields:
            try:
                quali_results[field] = race[field]
            except KeyError:
                quali_results[field] = np.nan

        # df_quali = pd.concat([df_quali, quali_results], axis=0, ignore_index=True)

        try:
            df_quali = pd.concat([df_quali, quali_results], axis=0)
            print(race['raceName'])

        except pd.errors.InvalidIndexError:
            print(race['raceName'], ' failed')

    return df_quali

In [110]:
df_quali_total = pd.DataFrame()
for year in range(2003, 2023):

    print(year)

    url = f'http://ergast.com/api/f1/{year}/qualifying.json?limit=1000'
    r = requests.get(url)
    quali = r.json()

    root = quali['MRData']['RaceTable']['Races']

    df_temp = process_yearly_quali(root=root)

    try:
        df_quali_total = pd.concat([df_quali_total, df_temp], axis=0)

    except pd.errors.InvalidIndexError:
        print('shit')

2003
Australian Grand Prix
Malaysian Grand Prix
Brazilian Grand Prix
San Marino Grand Prix
Spanish Grand Prix
Austrian Grand Prix
Monaco Grand Prix
Canadian Grand Prix
European Grand Prix
French Grand Prix
British Grand Prix
German Grand Prix
Hungarian Grand Prix
Italian Grand Prix
United States Grand Prix
Japanese Grand Prix
2004
Australian Grand Prix
Malaysian Grand Prix
Bahrain Grand Prix
San Marino Grand Prix
Spanish Grand Prix
Monaco Grand Prix
European Grand Prix
Canadian Grand Prix
United States Grand Prix
French Grand Prix
British Grand Prix
German Grand Prix
Hungarian Grand Prix
Belgian Grand Prix
Italian Grand Prix
Chinese Grand Prix
Japanese Grand Prix
Brazilian Grand Prix
2005
Australian Grand Prix
Malaysian Grand Prix
Bahrain Grand Prix
San Marino Grand Prix
Spanish Grand Prix
Monaco Grand Prix
European Grand Prix
Canadian Grand Prix
United States Grand Prix
French Grand Prix
British Grand Prix
German Grand Prix
Hungarian Grand Prix
Turkish Grand Prix
Italian Grand Prix
Be

In [114]:
df_quali_total.to_csv('03_s3/quali_res.csv', index=False)