In [None]:
import pandas as pd
import numpy as np
from pprint import pprint
import requests

In [None]:
# I will use this function later to calculate points and wins prior to the race

def lookup (df, team, points):
    df['lookup1'] = df.season.astype(str) + df[team] + df['round'].astype(str)
    df['lookup2'] = df.season.astype(str) + df[team] + (df['round']-1).astype(str)
    new_df = df.merge(df[['lookup1', points]], how = 'left', left_on='lookup2',right_on='lookup1')
    new_df.drop(['lookup1_x', 'lookup2', 'lookup1_y'], axis = 1, inplace = True)
    new_df.rename(columns = {points+'_x': points+'_after_race', points+'_y': points}, inplace = True)
    new_df[points].fillna(0, inplace = True)
    return new_df
   

## Races

In [None]:
races = {'season': [],
        'round': [],
        'circuit_id': [],
        'lat': [],
        'long': [],
        'country': [],
        'date': [],
        'url': []
        }

In [None]:
races

{'season': [],
 'round': [],
 'circuit_id': [],
 'lat': [],
 'long': [],
 'country': [],
 'date': [],
 'url': []}

In [30]:


for year in list(range(2020,2022)):
    
    url = 'https://ergast.com/api/f1/{}.json'
    r = requests.get(url.format(year))
    json = r.json()

    for item in json['MRData']['RaceTable']['Races']:
        try:
            races['season'].append(int(item['season']))
        except:
            races['season'].append(None)

        try:
            races['round'].append(int(item['round']))
        except:
            races['round'].append(None)

        try:
            races['circuit_id'].append(item['Circuit']['circuitId'])
        except:
            races['circuit_id'].append(None)

        try:
            races['lat'].append(float(item['Circuit']['Location']['lat']))
        except:
            races['lat'].append(None)

        try:
            races['long'].append(float(item['Circuit']['Location']['long']))
        except:
            races['long'].append(None)

        try:
            races['country'].append(item['Circuit']['Location']['country'])
        except:
            races['country'].append(None)

        try:
            races['date'].append(item['date'])
        except:
            races['date'].append(None)

        try:
            races['url'].append(item['url'])
        except:
            races['url'].append(None)
        
races = pd.DataFrame(races)
print(races.shape)

(39, 8)


In [31]:
races.head()

Unnamed: 0,season,round,circuit_id,lat,long,country,date,url
0,2020,1,red_bull_ring,47.2197,14.7647,Austria,2020-07-05,http://en.wikipedia.org/wiki/2020_Austrian_Gra...
1,2020,2,red_bull_ring,47.2197,14.7647,Austria,2020-07-12,http://en.wikipedia.org/wiki/2020_Styrian_Gran...
2,2020,3,hungaroring,47.5789,19.2486,Hungary,2020-07-19,http://en.wikipedia.org/wiki/2020_Hungarian_Gr...
3,2020,4,silverstone,52.0786,-1.01694,UK,2020-08-02,http://en.wikipedia.org/wiki/2020_British_Gran...
4,2020,5,silverstone,52.0786,-1.01694,UK,2020-08-09,http://en.wikipedia.org/wiki/70th_Anniversary_...


In [32]:
races.tail()

Unnamed: 0,season,round,circuit_id,lat,long,country,date,url
34,2021,18,rodriguez,19.4042,-99.0907,Mexico,2021-11-07,http://en.wikipedia.org/wiki/2021_Mexican_Gran...
35,2021,19,interlagos,-23.7036,-46.6997,Brazil,2021-11-14,http://en.wikipedia.org/wiki/2021_S%C3%A3o_Pau...
36,2021,20,losail,25.49,51.4542,Qatar,2021-11-21,http://en.wikipedia.org/wiki/2021_Qatar_Grand_...
37,2021,21,jeddah,21.6319,39.1044,Saudi Arabia,2021-12-05,http://en.wikipedia.org/wiki/2021_Saudi_Arabia...
38,2021,22,yas_marina,24.4672,54.6031,UAE,2021-12-12,http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...


In [33]:
races.to_csv('races.csv', index = False)

## Rounds

In [35]:
race = pd.read_csv('races2.csv')

In [64]:
rounds = []
for year in np.array(race.season.unique()):
    rounds.append([year, list(race[race.season == year]['round'])])

In [65]:
rounds[:5]

[[2020, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]],
 [2021,
  [1,
   2,
   3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   17,
   18,
   19,
   20,
   21,
   22]]]

## Results

In [66]:
results = {'season': [],
          'round':[],
           'circuit_id':[],
          'driver': [],
           'date_of_birth': [],
           'nationality': [],
          'constructor': [],
          'grid': [],
          'time': [],
          'status': [],
          'points': [],
          'podium': [],
          'url': []}

for n in list(range(len(rounds))):
    for i in rounds[n][1]:
    
        url = 'http://ergast.com/api/f1/{}/{}/results.json'
        r = requests.get(url.format(rounds[n][0], i))
        json = r.json()

        for item in json['MRData']['RaceTable']['Races'][0]['Results']:
            try:
                results['season'].append(int(json['MRData']['RaceTable']['Races'][0]['season']))
            except:
                results['season'].append(None)

            try:
                results['round'].append(int(json['MRData']['RaceTable']['Races'][0]['round']))
            except:
                results['round'].append(None)

            try:
                results['circuit_id'].append(json['MRData']['RaceTable']['Races'][0]['Circuit']['circuitId'])
            except:
                results['circuit_id'].append(None)

            try:
                results['driver'].append(item['Driver']['driverId'])
            except:
                results['driver'].append(None)
            
            try:
                results['date_of_birth'].append(item['Driver']['dateOfBirth'])
            except:
                results['date_of_birth'].append(None)
                
            try:
                results['nationality'].append(item['Driver']['nationality'])
            except:
                results['nationality'].append(None)

            try:
                results['constructor'].append(item['Constructor']['constructorId'])
            except:
                results['constructor'].append(None)

            try:
                results['grid'].append(int(item['grid']))
            except:
                results['grid'].append(None)

            try:
                results['time'].append(int(item['Time']['millis']))
            except:
                results['time'].append(None)

            try:
                results['status'].append(item['status'])
            except:
                results['status'].append(None)

            try:
                results['points'].append(int(item['points']))
            except:
                results['points'].append(None)

            try:
                results['podium'].append(int(item['position']))
            except:
                results['podium'].append(None)

            try:
                results['url'].append(json['MRData']['RaceTable']['Races'][0]['url'])
            except:
                results['url'].append(None)

results = pd.DataFrame(results)
print(results.shape)

(780, 13)


In [39]:
results.head()

Unnamed: 0,season,round,circuit_id,driver,date_of_birth,nationality,constructor,grid,time,status,points,podium,url
0,2020,1,red_bull_ring,bottas,1989-08-28,Finnish,mercedes,1,5455739.0,Finished,25.0,1,http://en.wikipedia.org/wiki/2020_Austrian_Gra...
1,2020,1,red_bull_ring,leclerc,1997-10-16,Monegasque,ferrari,7,5458439.0,Finished,18.0,2,http://en.wikipedia.org/wiki/2020_Austrian_Gra...
2,2020,1,red_bull_ring,norris,1999-11-13,British,mclaren,3,5461230.0,Finished,16.0,3,http://en.wikipedia.org/wiki/2020_Austrian_Gra...
3,2020,1,red_bull_ring,hamilton,1985-01-07,British,mercedes,5,5461428.0,Finished,12.0,4,http://en.wikipedia.org/wiki/2020_Austrian_Gra...
4,2020,1,red_bull_ring,sainz,1994-09-01,Spanish,mclaren,8,5464642.0,Finished,10.0,5,http://en.wikipedia.org/wiki/2020_Austrian_Gra...


In [40]:
results.tail()

Unnamed: 0,season,round,circuit_id,driver,date_of_birth,nationality,constructor,grid,time,status,points,podium,url
775,2021,22,yas_marina,latifi,1995-06-29,Canadian,williams,16,,Accident,0.0,16,http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...
776,2021,22,yas_marina,giovinazzi,1993-12-14,Italian,alfa,14,,Gearbox,0.0,17,http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...
777,2021,22,yas_marina,russell,1998-02-15,British,williams,17,,Gearbox,0.0,18,http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...
778,2021,22,yas_marina,raikkonen,1979-10-17,Finnish,alfa,18,,Brakes,0.0,19,http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...
779,2021,22,yas_marina,mazepin,1999-03-02,Russian,haas,20,,Illness,0.0,20,http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...


In [41]:
results.to_csv('results2.csv', index = False)

## Driver Standings

In [42]:
driver_standings = {'season': [],
                    'round':[],
                    'driver': [],
                    'driver_points': [],
                    'driver_wins': [],
                   'driver_standings_pos': []}

for n in list(range(len(rounds))):
    for i in rounds[n][1]:
    
        url = 'https://ergast.com/api/f1/{}/{}/driverStandings.json'
        r = requests.get(url.format(rounds[n][0], i))
        json = r.json()

        for item in json['MRData']['StandingsTable']['StandingsLists'][0]['DriverStandings']:
            try:
                driver_standings['season'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['season']))
            except:
                driver_standings['season'].append(None)

            try:
                driver_standings['round'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['round']))
            except:
                driver_standings['round'].append(None)
                                         
            try:
                driver_standings['driver'].append(item['Driver']['driverId'])
            except:
                driver_standings['driver'].append(None)
            
            try:
                driver_standings['driver_points'].append(int(item['points']))
            except:
                driver_standings['driver_points'].append(None)
            
            try:
                driver_standings['driver_wins'].append(int(item['wins']))
            except:
                driver_standings['driver_wins'].append(None)
                
            try:
                driver_standings['driver_standings_pos'].append(int(item['position']))
            except:
                driver_standings['driver_standings_pos'].append(None)
            
driver_standings = pd.DataFrame(driver_standings)
print(driver_standings.shape)


(808, 6)


In [43]:
driver_standings = lookup(driver_standings, 'driver', 'driver_points')

In [44]:
driver_standings = lookup(driver_standings, 'driver', 'driver_wins')

In [45]:
driver_standings = lookup(driver_standings, 'driver', 'driver_standings_pos')

In [46]:
driver_standings.head()

Unnamed: 0,season,round,driver,driver_points_after_race,driver_wins_after_race,driver_standings_pos_after_race,driver_points,driver_wins,driver_standings_pos
0,2020,1,bottas,25.0,1,1,0.0,0.0,0.0
1,2020,1,leclerc,18.0,0,2,0.0,0.0,0.0
2,2020,1,norris,16.0,0,3,0.0,0.0,0.0
3,2020,1,hamilton,12.0,0,4,0.0,0.0,0.0
4,2020,1,sainz,10.0,0,5,0.0,0.0,0.0


In [47]:
driver_standings.tail()

Unnamed: 0,season,round,driver,driver_points_after_race,driver_wins_after_race,driver_standings_pos_after_race,driver_points,driver_wins,driver_standings_pos
803,2021,22,latifi,7.0,0,17,7.0,0.0,17.0
804,2021,22,giovinazzi,3.0,0,18,3.0,0.0,18.0
805,2021,22,mick_schumacher,0.0,0,19,0.0,0.0,19.0
806,2021,22,kubica,0.0,0,20,0.0,0.0,20.0
807,2021,22,mazepin,0.0,0,21,0.0,0.0,21.0


In [55]:
driver_standings.to_csv('driver_standings2.csv', index = False)

## Constructor Standings

In [74]:
rounds

<function list.index(value, start=0, stop=9223372036854775807, /)>

In [75]:
constructor_rounds = rounds

constructor_standings = {'season': [],
                    'round':[],
                    'constructor': [],
                    'constructor_points': [],
                    'constructor_wins': [],
                   'constructor_standings_pos': []}

for n in list(range(len(constructor_rounds))):
    for i in constructor_rounds[n][1]:
    
        url = 'https://ergast.com/api/f1/{}/{}/constructorStandings.json'
        r = requests.get(url.format(constructor_rounds[n][0], i))
        json = r.json()

        for item in json['MRData']['StandingsTable']['StandingsLists'][0]['ConstructorStandings']:
            try:
                constructor_standings['season'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['season']))
            except:
                constructor_standings['season'].append(None)

            try:
                constructor_standings['round'].append(int(json['MRData']['StandingsTable']['StandingsLists'][0]['round']))
            except:
                constructor_standings['round'].append(None)
                                         
            try:
                constructor_standings['constructor'].append(item['Constructor']['constructorId'])
            except:
                constructor_standings['constructor'].append(None)
            
            try:
                constructor_standings['constructor_points'].append(int(item['points']))
            except:
                constructor_standings['constructor_points'].append(None)
            
            try:
                constructor_standings['constructor_wins'].append(int(item['wins']))
            except:
                constructor_standings['constructor_wins'].append(None)
                
            try:
                constructor_standings['constructor_standings_pos'].append(int(item['position']))
            except:
                constructor_standings['constructor_standings_pos'].append(None)
            
constructor_standings = pd.DataFrame(constructor_standings)
print(constructor_standings.shape)


(390, 6)


In [76]:
constructor_standings = lookup(constructor_standings, 'constructor', 'constructor_points')

In [77]:
constructor_standings = lookup(constructor_standings, 'constructor', 'constructor_wins')

In [78]:
constructor_standings = lookup(constructor_standings, 'constructor', 'constructor_standings_pos')

In [79]:
constructor_standings.head()

Unnamed: 0,season,round,constructor,constructor_points_after_race,constructor_wins_after_race,constructor_standings_pos_after_race,constructor_points,constructor_wins,constructor_standings_pos
0,2020,1,mercedes,37.0,1,1,0.0,0.0,0.0
1,2020,1,mclaren,26.0,0,2,0.0,0.0,0.0
2,2020,1,ferrari,19.0,0,3,0.0,0.0,0.0
3,2020,1,racing_point,8.0,0,4,0.0,0.0,0.0
4,2020,1,alphatauri,6.0,0,5,0.0,0.0,0.0


In [80]:
constructor_standings.tail()

Unnamed: 0,season,round,constructor,constructor_points_after_race,constructor_wins_after_race,constructor_standings_pos_after_race,constructor_points,constructor_wins,constructor_standings_pos
385,2021,22,alphatauri,142.0,0,6,120.0,0.0,6.0
386,2021,22,aston_martin,77.0,0,7,77.0,0.0,7.0
387,2021,22,williams,23.0,0,8,23.0,0.0,8.0
388,2021,22,alfa,13.0,0,9,13.0,0.0,9.0
389,2021,22,haas,0.0,0,10,0.0,0.0,10.0


In [62]:
constructor_standings.to_csv('constructor_standings2.csv', index = False)