In [23]:
import requests
import pandas as pd
from pandas.io.json import json_normalize
csv_files = '../csv_files/'

To list the results for a specific race use the following URL with the required year and round number:
http://ergast.com/api/f1/2008/5/results
For example, to list all the results for a specific driver in a particular season:
http://ergast.com/api/f1/2008/drivers/alonso/results

In [24]:
site = 'http://ergast.com/api/f1/'
drivers = 'drivers'
circuits = 'circuits'
constructors = 'constructors'
json = '.json?limit=1000'
years = range(1950,2021)

In [25]:
list_drivers = []
#function to return all the drivers in a range of years- Returns a dictionary of lists , the key is the year of the season year and the drivers_ID
def season_drivers(from_year, to_year):
    drivers_info = {}
    for year in range(from_year, to_year+1):
        drivers_info[year] = requests.get(site+ f'{str(year)}/'+ drivers +json).json()['MRData']['DriverTable']['Drivers']
    return drivers_info

In [26]:
def drivers_info():
    drivers_req = requests.get(site+ drivers +json).json()['MRData']['DriverTable']['Drivers']
    drivers_info = {} 
    for driver in drivers_req:
        drivers_info[driver['driverId']] = driver
    return drivers_info
drivers_info = drivers_info()
drivers_info = pd.DataFrame(drivers_info)
drivers_info.to_csv(csv_files +'drivers_info')

In [27]:
#Function that takes a dictionary of lists (the return of the season_drivers_info) and returns the ids of all the drivers
def unique_drivers(drivers_dict):
    drivers_list = []
    for season in drivers_dict.keys():
        for driver in drivers_dict[season]:
            drivers_list.append(driver['driverId'])
    unique_drivers = set(drivers_list)
    return unique_drivers

#Mixing season_drivers_info, with unique drivers function. Returns
def all_drivers(from_year, to_year):
    all_drivers = unique_drivers(season_drivers_info(from_year,to_year))
    return all_drivers

In [28]:
#Create a dictionary with all the circuits and their information. The key is the id and the value is the returned dict by the api
def circuits_info():
    circuits_req = requests.get(site+ circuits +json).json()['MRData']['CircuitTable']['Circuits']
    circuits_info = {}
    for circuit in circuits_req:
        circuits_info[circuit['circuitId']] = circuit
    return circuits_info
circuits_info = circuits_info()
circuits_info = pd.DataFrame(circuits_info)
circuits_info.to_csv(csv_files +'circuits_info')

In [42]:
def constructors_info():
    constructors_req = requests.get(site+ constructors+json).json()['MRData']['ConstructorTable']['Constructors']
    constructors_info = {}
    for constructor in constructors_req:
        constructors_info[constructor['constructorId']] = constructor
    return constructors_info
constructors_info = constructors_info()
constructors_info = pd.DataFrame(constructors_info)
constructors_info.to_csv(csv_files+'constructors_info')

In [18]:
def all_races(from_year, to_year):
    
    driver_race = {}
    total_races = []
    result_fields = ['position', 'points', 'laps']
    
    for y in season_drivers(from_year,to_year).keys():       
        for driver in season_drivers(y, y+1)[y]:
            driver_req = requests.get(site+str(y)+'/drivers/'+driver['driverId']+'/results.json').json()['MRData']['RaceTable']['Races']
            driver_race[driver['driverId']] = driver_req
            for race in driver_race[driver['driverId']]:
                current_race = ([driver['driverId'], 
                                y,
                                race['round'],
                                race['Circuit']['circuitId'],
                                race['Results'][0]['Constructor']['constructorId'],
                                race['Results'][0]['status'],
                                ] + 
                                [race['Results'][0][field] for field in result_fields])
                if 'Time' in race['Results'][0] :
                    current_race = current_race + [race['Results'][0]['Time']['time']]
                else:
                    current_race = current_race + ['NaN']
                if 'FastestLap' in race['Results'][0]:
                    current_race = (current_race + 
                    [race['Results'][0]['FastestLap']['Time']['time'],
                    race['Results'][0]['FastestLap']['lap'],
                    race['Results'][0]['FastestLap']['rank'],
                    race['Results'][0]['FastestLap']['AverageSpeed']['speed']])
                else:
                    current_race = (current_race + ['NaN','NaN','NaN','NaN'])
                total_races.append(current_race)
    return total_races

In [22]:
driver_req = requests.get(site+str(2009)+'/drivers/'+'alonso'+'/results.json').json()
driver_req

{'MRData': {'xmlns': 'http://ergast.com/mrd/1.4',
  'series': 'f1',
  'url': 'http://ergast.com/api/f1/2009/drivers/alonso/results.json',
  'limit': '30',
  'offset': '0',
  'total': '17',
  'RaceTable': {'season': '2009',
   'driverId': 'alonso',
   'Races': [{'season': '2009',
     'round': '1',
     'url': 'http://en.wikipedia.org/wiki/2009_Australian_Grand_Prix',
     'raceName': 'Australian Grand Prix',
     'Circuit': {'circuitId': 'albert_park',
      'url': 'http://en.wikipedia.org/wiki/Melbourne_Grand_Prix_Circuit',
      'circuitName': 'Albert Park Grand Prix Circuit',
      'Location': {'lat': '-37.8497',
       'long': '144.968',
       'locality': 'Melbourne',
       'country': 'Australia'}},
     'date': '2009-03-29',
     'time': '06:00:00Z',
     'Results': [{'number': '7',
       'position': '5',
       'positionText': '5',
       'points': '4',
       'Driver': {'driverId': 'alonso',
        'permanentNumber': '14',
        'code': 'ALO',
        'url': 'http://en.wik

In [21]:
df_races = pd.DataFrame(all_races(1950,2020), columns=['driver', 'year', 'round', 'circuit', 'constructor','status', 'position', 'points', 'laps','race_time', 'fastest_lap_time','lap', 'fastest_lap_rank','average_speed'])
df_races
df_races.to_csv('Data_F1_3')

In [26]:
df_races.sample()

Unnamed: 0,driver,year,round,circuit,constructor,status,position,points,laps,race_time,fastest_lap_time,lap,fastest_lap_rank,average_speed
13391,tarquini,1991,9,hockenheimring,ags,Did not qualify,29,0,0,,,,,


In [25]:
def attempts_fastestlap(from_year, to_year):
    
    driver_race = {}
    total_races = []
    result_fields = ['position', 'points', 'laps'] 
    
    for y in season_drivers(from_year,to_year).keys():       
        for driver in season_drivers(y, y+1)[y]:
            driver_req = requests.get(site+str(y)+'/drivers/'+driver['driverId']+'/results.json').json()['MRData']['RaceTable']['Races']
            driver_race[driver['driverId']] = driver_req
            for race in driver_race[driver['driverId']]:
                current_race = ([driver['driverId'], 
                                y,
                                race['round'],
                                race['Circuit']['circuitId'],
                                race['Results'][0]['Constructor']['constructorId']
                                ] + 
                                [race['Results'][0][field] for field in result_fields])
                total_races.append(current_race)
    return total_races
attempt = attempts_fastestlap(2005,2006)                        

In [12]:
df_races = season_drivers(2005,2006)
df_races

{2005: [{'driverId': 'albers',
   'code': 'ALB',
   'url': 'http://en.wikipedia.org/wiki/Christijan_Albers',
   'givenName': 'Christijan',
   'familyName': 'Albers',
   'dateOfBirth': '1979-04-16',
   'nationality': 'Dutch'},
  {'driverId': 'alonso',
   'permanentNumber': '14',
   'code': 'ALO',
   'url': 'http://en.wikipedia.org/wiki/Fernando_Alonso',
   'givenName': 'Fernando',
   'familyName': 'Alonso',
   'dateOfBirth': '1981-07-29',
   'nationality': 'Spanish'},
  {'driverId': 'barrichello',
   'code': 'BAR',
   'url': 'http://en.wikipedia.org/wiki/Rubens_Barrichello',
   'givenName': 'Rubens',
   'familyName': 'Barrichello',
   'dateOfBirth': '1972-05-23',
   'nationality': 'Brazilian'},
  {'driverId': 'button',
   'permanentNumber': '22',
   'code': 'BUT',
   'url': 'http://en.wikipedia.org/wiki/Jenson_Button',
   'givenName': 'Jenson',
   'familyName': 'Button',
   'dateOfBirth': '1980-01-19',
   'nationality': 'British'},
  {'driverId': 'coulthard',
   'code': 'COU',
   'url':

In [8]:
driver_req = requests.get(site+'2006'+'/drivers/'+'alonso'+'/results.json').json()['MRData']['RaceTable']['Races']
driver_req

[{'season': '2006',
  'round': '1',
  'url': 'http://en.wikipedia.org/wiki/2006_Bahrain_Grand_Prix',
  'raceName': 'Bahrain Grand Prix',
  'Circuit': {'circuitId': 'bahrain',
   'url': 'http://en.wikipedia.org/wiki/Bahrain_International_Circuit',
   'circuitName': 'Bahrain International Circuit',
   'Location': {'lat': '26.0325',
    'long': '50.5106',
    'locality': 'Sakhir',
    'country': 'Bahrain'}},
  'date': '2006-03-12',
  'time': '14:30:00Z',
  'Results': [{'number': '1',
    'position': '1',
    'positionText': '1',
    'points': '10',
    'Driver': {'driverId': 'alonso',
     'permanentNumber': '14',
     'code': 'ALO',
     'url': 'http://en.wikipedia.org/wiki/Fernando_Alonso',
     'givenName': 'Fernando',
     'familyName': 'Alonso',
     'dateOfBirth': '1981-07-29',
     'nationality': 'Spanish'},
    'Constructor': {'constructorId': 'renault',
     'url': 'http://en.wikipedia.org/wiki/Renault_in_Formula_One',
     'name': 'Renault',
     'nationality': 'French'},
    'g