**Data Collection**
---

---
**Import necessary libraries**


In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import requests, concurrent.futures
import re
import numpy as np

---
**Circuits dataframe**

In [5]:
'''
In this part of the code we extract all the circuits’ names, the season that a race 
took place there, the round in each season, and its country. The data extraction is 
achieved through querying the http://ergast.com/mrd/, an experimental web service which
provides a historical record of motor racing data. Furthermore, additional data regarding
circuits such as configuration, drs_zones, number of corners and length, was manually acquired through 
their respective wikipedia page, https://www.wikipedia.org/.
'''


races = {'season': [],
        'round': [],
        'circuit_id': [],
        'country': []
        }
    
for year in list(range(2010,2023)):

    url = 'https://ergast.com/api/f1/{}.json'
    r = requests.get(url.format(year))
    json = r.json()

    for item in json['MRData']['RaceTable']['Races']:
        try:
            races['season'].append(int(item['season']))
        except:
            races['season'].append(None)
        try:
            races['round'].append(int(item['round']))
        except:
            races['round'].append(None)
        try:
            races['circuit_id'].append(item['Circuit']['circuitId'])
        except:
            races['circuit_id'].append(None)
        try:
            races['country'].append(item['Circuit']['Location']['country'])
        except:
            races['country'].append(None)
        
circuits = pd.DataFrame(races)


circuit_type = {
    'monaco': 'Street',
    'marina_bay': 'Street',
    'baku': 'Street',
    'ricard': 'Race',
    'mugello': 'Race',
    'jeddah': 'Street',
    'bahrain': 'Race',
    'albert_park': 'Street',
    'sepang': 'Race',
    'shanghai': 'Race',
    'catalunya': 'Race',
    'istanbul': 'Race',
    'villeneuve': 'Street',
    'valencia': 'Street',
    'silverstone': 'Race',
    'hockenheimring': 'Race',
    'hungaroring': 'Race',
    'spa': 'Race',
    'monza': 'Race',
    'suzuka': 'Race',
    'yeongam': 'Race',
    'interlagos': 'Race',
    'yas_marina': 'Race',
    'nurburgring': 'Race',
    'buddh': 'Race',
    'americas': 'Race',
    'red_bull_ring': 'Race',
    'sochi': 'Race',
    'rodriguez': 'Race',
    'portimao': 'Race',
    'imola': 'Race',
    'zandvoort': 'Race',
    'losail': 'Race',
    'miami': 'Street'
}

circuits['configuration'] = circuits['circuit_id'].map(circuit_type)

circuit_length = {
        'monaco': 3.337 ,
    'marina_bay': 4.928,
    'baku': 6.003,
    'ricard': 5.842,
    'mugello': 5.245,
    'jeddah': 6.174,
    'bahrain': 5.412,
    'albert_park': 5.303,
    'sepang':  5.543,
    'shanghai': 5.451,
    'catalunya': 4.657,
    'istanbul': 5.338,
    'villeneuve': 4.361,
    'valencia': 5.419,
    'silverstone': 5.891,
    'hockenheimring': 4.574,
    'hungaroring': 4.381,
    'spa': 7.004,
    'monza': 5.793,
    'suzuka': 5.807,
    'yeongam': 5.615,
    'interlagos': 4.309,
    'yas_marina': 5.554,
    'nurburgring': 5.148,
    'buddh': 5.125,
    'americas': 5.514,
    'red_bull_ring': 4.318,
    'sochi': 5.848,
    'rodriguez': 4.304,
    'portimao': 4.653,
    'imola': 4.909,
    'zandvoort': 4.259,
    'losail': 5.418,
    'miami': 5.412
}

circuits['length (km)'] = circuits['circuit_id'].map(circuit_length)

circuit_corners = {
    'monaco': 19,
    'marina_bay': 19,
    'baku': 20,
    'ricard': 15,
    'mugello': 15,
    'jeddah': 27,
    'bahrain': 15,
    'albert_park': 14,
    'sepang':  15,
    'shanghai': 16,
    'catalunya': 14,
    'istanbul': 14,
    'villeneuve': 14,
    'valencia': 25,
    'silverstone': 18,
    'hockenheimring': 16,
    'hungaroring': 14,
    'spa': 20,
    'monza': 11,
    'suzuka': 18,
    'yeongam': 18,
    'interlagos': 15,
    'yas_marina': 16,
    'nurburgring': 15,
    'buddh': 16,
    'americas': 20,
    'red_bull_ring': 10,
    'sochi': 19,
    'rodriguez': 17,
    'portimao': 15,
    'imola': 22,
    'zandvoort': 14,
    'losail': 16,
    'miami': 19
}

circuits['number_of_corners'] = circuits['circuit_id'].map(circuit_corners)

drs_zones = {
    'monaco': 1,
    'marina_bay': 3,
    'baku': 2,
    'ricard': 2,
    'mugello': 1,
    'jeddah': 3,
    'bahrain': 3,
    'albert_park': 4,
    'sepang':  1,
    'shanghai': 2,
    'catalunya': 2,
    'istanbul': 2,
    'villeneuve': 2,
    'valencia': 1,
    'silverstone': 2,
    'hockenheimring': 2,
    'hungaroring': 2,
    'spa': 2,
    'monza': 2,
    'suzuka': 1,
    'yeongam': 1,
    'interlagos': 2,
    'yas_marina': 2,
    'nurburgring': 2,
    'buddh': 2,
    'americas': 2,
    'red_bull_ring': 2,
    'sochi': 2,
    'rodriguez': 1,
    'portimao': 1,
    'imola': 1,
    'zandvoort': 2,
    'losail': 1,
    'miami': 3
}

circuits['drs_zones'] = circuits['circuit_id'].map(drs_zones)
circuits.drop(['season', 'round'], axis = 1, inplace =True)
circuits.drop_duplicates(inplace = True)
circuits.reset_index(drop =True)

Unnamed: 0,circuit_id,country,configuration,length (km),number_of_corners,drs_zones
0,bahrain,Bahrain,Race,5.412,15,3
1,albert_park,Australia,Street,5.303,14,4
2,sepang,Malaysia,Race,5.543,15,1
3,shanghai,China,Race,5.451,16,2
4,catalunya,Spain,Race,4.657,14,2
5,monaco,Monaco,Street,3.337,19,1
6,istanbul,Turkey,Race,5.338,14,2
7,villeneuve,Canada,Street,4.361,14,2
8,valencia,Spain,Street,5.419,25,1
9,silverstone,UK,Race,5.891,18,2


---
**Drivers dataframe**


In [14]:
'''
In this part of the code we extract all the drivers that took part in at least one
race from 2010 to 2022. We acquired data such as their names, date of births, and nationality 
through querying the http://ergast.com/mrd/ again. Furthermore, additional data regarding
driver career statistics, such as wins, pole positions, championships and career points were extracted
manually through https://gpracingstats.com/. Physical measurements such as weight and height were taken 
from https://www.eurosport.com/.
'''
drivers = {'driver_id': [],
        'nationality': [],
        'date_of_birth': []
        }
    
for year in list(range(2010,2023)):
    

    url = 'https://ergast.com/api/f1/{}/drivers.json'
    r = requests.get(url.format(year))
    json = r.json()
    
    
    for item in json['MRData']['DriverTable']['Drivers']:
        try:
            drivers['driver_id'].append(item['driverId'])
        except:
            drivers['driver_id'].append(None)
        try:
            drivers['date_of_birth'].append(item['dateOfBirth'])
        except:
            drivers['date_of_birth'].append(None)
        try:
            drivers['nationality'].append(item['nationality'])
        except:
            drivers['nationality'].append(None)

drivers = pd.DataFrame(drivers)
    

drivers['date_of_birth'] = pd.to_datetime(drivers['date_of_birth'])

drivers['year_of_birth'] = drivers['date_of_birth'].dt.year

drivers = drivers.drop(columns=['date_of_birth'])

drivers.drop_duplicates(inplace = True)

drivers = drivers.reset_index(drop=True)


drivers_wins = { 'alguersuari': 0,
 'alonso': 21,
 'barrichello': 11,
 'buemi': 0,
 'button': 7,
 'chandhok': 0,
 'rosa': 0,
 'grassi': 0,
 'glock': 0,
 'hamilton': 11,
 'heidfeld': 0,
 'hulkenberg': 0,
 'klien': 0,
 'kobayashi': 0,
 'kovalainen': 1,
 'kubica': 1,
 'liuzzi': 0,
 'massa': 11,
 'petrov': 0,
 'rosberg': 0,
 'michael_schumacher': 91,
 'bruno_senna': 0,
 'sutil': 0,
 'trulli': 1,
 'vettel': 5,
 'webber': 2,
 'yamamoto': 0,
 'ambrosio': 0,
 'resta': 0,
 'karthikeyan': 0,
 'maldonado': 0,
 'perez': 0,
 'ricciardo': 0,
 'grosjean': 0,
 'pic': 0,
 'raikkonen': 19,
 'vergne': 0,
 'jules_bianchi': 0,
 'bottas': 0,
 'chilton': 0,
 'gutierrez': 0,
 'garde': 0,
 'ericsson': 0,
 'kvyat': 0,
 'lotterer': 0,
 'kevin_magnussen': 0,
 'stevens': 0,
 'merhi': 0,
 'nasr': 0,
 'rossi': 0,
 'sainz': 0,
 'max_verstappen': 0,
 'haryanto': 0,
 'ocon': 0,
 'jolyon_palmer': 0,
 'vandoorne': 0,
 'wehrlein': 0,
 'gasly': 0,
 'giovinazzi': 0,
 'brendon_hartley': 0,
 'stroll': 0,
 'leclerc': 0,
 'sirotkin': 0,
 'albon': 0,
 'norris': 0,
 'russell': 0,
 'aitken': 0,
 'pietro_fittipaldi': 0,
 'latifi': 0,
 'mazepin': 0,
 'mick_schumacher': 0,
 'tsunoda': 0,
 'de_vries':0,
 'zhou': 0
               }
drivers['wins'] = drivers['driver_id'].map(drivers_wins)

drivers_poles = { 'alguersuari': 0,
 'alonso': 18,
 'barrichello': 14,
 'buemi': 0,
 'button': 7,
 'chandhok': 0,
 'rosa': 0,
 'grassi': 0,
 'glock': 0,
 'hamilton': 17,
 'heidfeld': 1,
 'hulkenberg': 0,
 'klien': 0,
 'kobayashi': 0,
 'kovalainen': 1,
 'kubica': 1,
 'liuzzi': 0,
 'massa': 15,
 'petrov': 0,
 'rosberg': 0,
 'michael_schumacher': 68,
 'bruno_senna': 0,
 'sutil': 0,
 'trulli': 4,
 'vettel': 5,
 'webber': 1,
 'yamamoto': 0,
 'ambrosio': 0,
 'resta': 0,
 'karthikeyan': 0,
 'maldonado': 0,
 'perez': 0,
 'ricciardo': 0,
 'grosjean': 0,
 'pic': 0,
 'raikkonen': 16,
 'vergne': 0,
 'jules_bianchi': 0,
 'bottas': 0,
 'chilton': 0,
 'gutierrez': 0,
 'garde': 0,
 'ericsson': 0,
 'kvyat': 0,
 'lotterer': 0,
 'kevin_magnussen': 0,
 'stevens': 0,
 'merhi': 0,
 'nasr': 0,
 'rossi': 0,
 'sainz': 0,
 'max_verstappen': 0,
 'haryanto': 0,
 'ocon': 0,
 'jolyon_palmer': 0,
 'vandoorne': 0,
 'wehrlein': 0,
 'gasly': 0,
 'giovinazzi': 0,
 'brendon_hartley': 0,
 'stroll': 0,
 'leclerc': 0,
 'sirotkin': 0,
 'albon': 0,
 'norris': 0,
 'russell': 0,
 'aitken': 0,
 'pietro_fittipaldi': 0,
 'latifi': 0,
 'mazepin': 0,
 'mick_schumacher': 0,
 'tsunoda': 0,
 'de_vries':0,
 'zhou': 0
               }
drivers['poles'] = drivers['driver_id'].map(drivers_poles)

drivers_championships = { 'alguersuari': 0,
 'alonso': 2,
 'barrichello': 0,
 'buemi': 0,
 'button': 1,
 'chandhok': 0,
 'rosa': 0,
 'grassi': 0,
 'glock': 0,
 'hamilton': 1,
 'heidfeld': 0,
 'hulkenberg': 0,
 'klien': 0,
 'kobayashi': 0,
 'kovalainen': 0,
 'kubica': 0,
 'liuzzi': 0,
 'massa': 0,
 'petrov': 0,
 'rosberg': 0,
 'michael_schumacher': 7,
 'bruno_senna': 0,
 'sutil': 0,
 'trulli': 0,
 'vettel': 0,
 'webber': 0,
 'yamamoto': 0,
 'ambrosio': 0,
 'resta': 0,
 'karthikeyan': 0,
 'maldonado': 0,
 'perez': 0,
 'ricciardo': 0,
 'grosjean': 0,
 'pic': 0,
 'raikkonen': 1,
 'vergne': 0,
 'jules_bianchi': 0,
 'bottas': 0,
 'chilton': 0,
 'gutierrez': 0,
 'garde': 0,
 'ericsson': 0,
 'kvyat': 0,
 'lotterer': 0,
 'kevin_magnussen': 0,
 'stevens': 0,
 'merhi': 0,
 'nasr': 0,
 'rossi': 0,
 'sainz': 0,
 'max_verstappen': 0,
 'haryanto': 0,
 'ocon': 0,
 'jolyon_palmer': 0,
 'vandoorne': 0,
 'wehrlein': 0,
 'gasly': 0,
 'giovinazzi': 0,
 'brendon_hartley': 0,
 'stroll': 0,
 'leclerc': 0,
 'sirotkin': 0,
 'albon': 0,
 'norris': 0,
 'russell': 0,
 'aitken': 0,
 'pietro_fittipaldi': 0,
 'latifi': 0,
 'mazepin': 0,
 'mick_schumacher': 0,
 'tsunoda': 0,
 'de_vries':0,
 'zhou': 0
               }
drivers['championships'] = drivers['driver_id'].map(drivers_championships)


career_points = { 'alguersuari': 0,
 'alonso': 577,
 'barrichello': 607,
 'buemi': 6,
 'button': 327,
 'chandhok': 0,
 'rosa': 29,
 'grassi': 0,
 'glock': 51,
 'hamilton': 256,
 'heidfeld': 219,
 'hulkenberg': 0,
 'klien': 14,
 'kobayashi': 3,
 'kovalainen': 105,
 'kubica': 137,
 'liuzzi': 5,
 'massa': 320,
 'petrov': 0,
 'rosberg': 75.5,
 'michael_schumacher': 1369,
 'bruno_senna': 0,
 'sutil': 6,
 'trulli': 246.5,
 'vettel': 125,
 'webber': 169.5,
 'yamamoto': 0,
 'ambrosio': 0,
 'resta': 0,
 'karthikeyan': 5,
 'maldonado': 0,
 'perez': 0,
 'ricciardo': 0,
 'grosjean': 0,
 'pic': 0,
 'raikkonen': 579,
 'vergne': 0,
 'jules_bianchi': 0,
 'bottas': 0,
 'chilton': 0,
 'gutierrez': 0,
 'garde': 0,
 'ericsson': 0,
 'kvyat': 0,
 'lotterer': 0,
 'kevin_magnussen': 0,
 'stevens': 0,
 'merhi': 0,
 'nasr': 0,
 'rossi': 0,
 'sainz': 0,
 'max_verstappen': 0,
 'haryanto': 0,
 'ocon': 0,
 'jolyon_palmer': 0,
 'vandoorne': 0,
 'wehrlein': 0,
 'gasly': 0,
 'giovinazzi': 0,
 'brendon_hartley': 0,
 'stroll': 0,
 'leclerc': 0,
 'sirotkin': 0,
 'albon': 0,
 'norris': 0,
 'russell': 0,
 'aitken': 0,
 'pietro_fittipaldi': 0,
 'latifi': 0,
 'mazepin': 0,
 'mick_schumacher': 0,
 'tsunoda': 0,
 'de_vries':0,
 'zhou': 0
               }

drivers['career_points'] = drivers['driver_id'].map(career_points)

height_cm = { 'alguersuari': 181,
 'alonso': 171,
 'barrichello': 172,
 'buemi': 172,
 'button': 182,
 'chandhok': 173,
 'rosa': 177,
 'grassi': 179,
 'glock': 169,
 'hamilton': 175,
 'heidfeld': 165,
 'hulkenberg': 184,
 'klien': 169,
 'kobayashi': 170,
 'kovalainen': 172,
 'kubica': 183,
 'liuzzi': 169,
 'massa': 166,
 'petrov': 185,
 'rosberg': 178,
 'michael_schumacher': 174,
 'bruno_senna': 180,
 'sutil': 183,
 'trulli': 173,
 'vettel': 176,
 'webber': 185,
 'yamamoto': 172,
 'ambrosio': 172,
 'resta': 185,
 'karthikeyan': 167,
 'maldonado': 173,
 'perez': 173,
 'ricciardo': 175,
 'grosjean': 180,
 'pic': 178,
 'raikkonen': 175,
 'vergne': 182,
 'jules_bianchi': 174,
 'bottas': 173,
 'chilton': 181,
 'gutierrez': 180,
 'garde': 182,
 'ericsson': 180,
 'kvyat': 175,
 'lotterer': np.nan,
 'kevin_magnussen': 174,
 'stevens': 173,
 'merhi': 179,
 'nasr': 174,
 'rossi': 187,
 'sainz': 177,
 'max_verstappen': 180,
 'haryanto': 170,
 'ocon': 186,
 'jolyon_palmer': 183,
 'vandoorne': 177,
 'wehrlein': 175,
 'gasly': 177,
 'giovinazzi': 185,
 'brendon_hartley': 184,
 'stroll': 182,
 'leclerc': 180,
 'sirotkin': 184,
 'albon': 186,
 'norris': 170,
 'russell': 185,
 'aitken': np.nan,
 'pietro_fittipaldi': np.nan,
 'latifi': 185,
 'mazepin': 176,
 'mick_schumacher': 170,
 'tsunoda': 158,
 'de_vries': 167,
 'zhou': np.nan
               }
drivers['height (cm)'] = drivers['driver_id'].map(height_cm)


weight_kg = { 'alguersuari': 65,
 'alonso': 68,
 'barrichello': 77,
 'buemi': 55,
 'button': 72,
 'chandhok': 67,
 'rosa': 74,
 'grassi': 73,
 'glock': 64,
 'hamilton': 66,
 'heidfeld': 59,
 'hulkenberg': 70,
 'klien': 68,
 'kobayashi': 57,
 'kovalainen': 66,
 'kubica': 72,
 'liuzzi': 68,
 'massa': 59,
 'petrov': 75,
 'rosberg': 71,
 'michael_schumacher': 75,
 'bruno_senna': 69,
 'sutil': 75,
 'trulli': 60,
 'vettel': 58,
 'webber': 75,
 'yamamoto': 62,
 'ambrosio': 62,
 'resta': 74,
 'karthikeyan': 60,
 'maldonado': 63,
 'perez': 63,
 'ricciardo': 64,
 'grosjean': 71,
 'pic': 60,
 'raikkonen': 62,
 'vergne': 69,
 'jules_bianchi': 65,
 'bottas': 70,
 'chilton': 64,
 'gutierrez': 61,
 'garde': 73,
 'ericsson': 64,
 'kvyat': 58,
 'lotterer': np.nan,
 'kevin_magnussen': 68,
 'stevens': 64,
 'merhi': 79,
 'nasr': 71,
 'rossi': 72,
 'sainz': 66,
 'max_verstappen': 67,
 'haryanto': 58,
 'ocon': 66,
 'jolyon_palmer': 78,
 'vandoorne': 65,
 'wehrlein': 61,
 'gasly': 68,
 'giovinazzi': 70,
 'brendon_hartley': 67,
 'stroll': 70,
 'leclerc': 65,
 'sirotkin': 71,
 'albon': 74,
 'norris': 64,
 'russell': 70,
 'aitken': np.nan,
 'pietro_fittipaldi': np.nan,
 'latifi': 74,
 'mazepin': 68,
 'mick_schumacher': 60,
 'tsunoda': 61,
 'de_vries': 67,
 'zhou': np.nan
               }
drivers['weight (kg)'] = drivers['driver_id'].map(weight_kg)
drivers

Unnamed: 0,driver_id,nationality,year_of_birth,wins,poles,championships,career_points,height (cm),weight (kg)
0,alguersuari,Spanish,1990,0,0,0,0.0,181.0,65.0
1,alonso,Spanish,1981,21,18,2,577.0,171.0,68.0
2,barrichello,Brazilian,1972,11,14,0,607.0,172.0,77.0
3,buemi,Swiss,1988,0,0,0,6.0,172.0,55.0
4,button,British,1980,7,7,1,327.0,182.0,72.0
...,...,...,...,...,...,...,...,...,...
69,mazepin,Russian,1999,0,0,0,0.0,176.0,68.0
70,mick_schumacher,German,1999,0,0,0,0.0,170.0,60.0
71,tsunoda,Japanese,2000,0,0,0,0.0,158.0,61.0
72,de_vries,Dutch,1995,0,0,0,0.0,167.0,67.0


---
**Constructors technical specifications dataframe**

In [3]:
'''
In this part of the code we extract all the constructors technical specs, that took part in at least one
season from 2010 to 2022. We acquired data such as their names, and nationality through querying 
the http://ergast.com/mrd/. Furthermore, additional data regarding engine names and manufacturers, 
number of valves, displacement and number of cylinders for each season,were extracted manually through 
https://www.f1technical.net/ and https://www.statsf1.com/en/default.aspx. 
'''

constructors = {'constructor_id': [],
        'nationality': [],
        }
    
for year in list(range(2010,2023)):
    

    url = 'https://ergast.com/api/f1/{}/constructors.json'
    r = requests.get(url.format(year))
    json = r.json()
    
    
    for item in json['MRData']['ConstructorTable']['Constructors']:
        try:
            constructors['constructor_id'].append(item['constructorId'])
        except:
            constructors['constructor_id'].append(None)
        try:
            constructors['nationality'].append(item['nationality'])
        except:
            constructors['nationality'].append(None)
    
constructors = pd.DataFrame(constructors)

season = [2010, 2010,2010,2010,2010,2010,2010,2010,2010,2010,2010,2010,2011,2011,2011,2011,2011,2011,
         2011,2011,2011,2011,2011,2011,2012,2012,2012,2012,2012,2012,2012,2012,2012,2012,2012,2012,
         2013,2013,2013,2013,2013,2013,2013,2013,2013,2013,2013,2014,2014,2014,2014,2014,2014,2014,
         2014,2014,2014,2014,2015,2015,2015,2015,2015,2015,2015,2015,2015,2015,2016,2016,2016,2016,
         2016,2016,2016,2016,2016,2016,2016,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2018,2018,2018,
         2018,2018,2018,2018,2018,2018,2018,2019,2019,2019,2019,2019,2019,2019,2019,2019,2019,2020,2020,2020,
         2020,2020,2020,2020,2020,2020,2020,2021,2021,2021,2021,2021,2021,2021,2021,2021,2021,2022,2022,
         2022,2022,2022,2022,2022,2022,2022,2022,]

constructors.insert(0, 'season', season)


teams = {2010: {'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056', 
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32}, 
                'force_india': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108X',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
                'hrt': {'Engine Manufacturer': 'Cosworth', 'Engine': 'Cosworth CA2010',
                        'Displacement': 2400,'Cylinders': 8, 'Valves': np.nan},
                'lotus_racing': {'Engine Manufacturer': 'Cosworth', 'Engine': 'Cosworth CA2010',
                        'Displacement': 2400,'Cylinders': 8, 'Valves': np.nan},
                'mclaren':{'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108X',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
                'mercedes':{'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108X',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
                'red_bull':{'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2010',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
                'renault':{'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2010',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
                'sauber':{'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056' ,
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32 }, 
                'toro_rosso': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056', 
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32 }, 
                'virgin': {'Engine Manufacturer': 'Cosworth', 'Engine': 'Cosworth CA2010',
                        'Displacement': 2400,'Cylinders': 8, 'Valves': np.nan},
                'williams': {'Engine Manufacturer': 'Cosworth', 'Engine': 'Cosworth CA2010',
                        'Displacement': 2400,'Cylinders': 8, 'Valves': np.nan}},
        2011: {'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056', 
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32 }, 
                'force_india': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108Y',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
                'hrt': {'Engine Manufacturer': 'Cosworth', 'Engine': 'Cosworth CA2011',
                        'Displacement': 2400,'Cylinders': 8, 'Valves': np.nan},
                'lotus_racing': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2011',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
                'mclaren':{'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108Y',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
               'mercedes':{'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108Y',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
                'red_bull':{'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2011',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
                'renault':{'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2011',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
                'sauber':{'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056', 
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32}, 
                'toro_rosso': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056', 
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32 }, 
                'virgin': {'Engine Manufacturer': 'Cosworth', 'Engine': 'Cosworth CA2011',
                        'Displacement': 2400,'Cylinders': 8, 'Valves': np.nan},
                'williams': {'Engine Manufacturer': 'Cosworth', 'Engine': 'Cosworth CA2011',
                        'Displacement': 2400,'Cylinders': 8, 'Valves': np.nan}},
        2012: {'caterham': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2012',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
               'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056', 
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32 },
               'force_india': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108Z',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
               'hrt': {'Engine Manufacturer': 'Cosworth', 'Engine': 'Cosworth CA2012',
                        'Displacement': 2400,'Cylinders': 8, 'Valves': np.nan},
               'lotus_f1': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2012',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
               'marussia': {'Engine Manufacturer': 'Cosworth', 'Engine': 'Cosworth CA2012',
                        'Displacement': 2400,'Cylinders': 8, 'Valves': np.nan},
               'mclaren': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108Z',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
               'mercedes': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108Z',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
               'red_bull': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2012',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
               'sauber': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056', 
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32},
               'toro_rosso': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056', 
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32},
                'williams': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2012',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32}},
        2013: {'caterham': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2013',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
               'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056', 
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32 },
               'force_india': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108F',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
                'lotus_f1': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2013',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
                'marussia': {'Engine Manufacturer': 'Cosworth', 'Engine': 'Cosworth CA2013',
                        'Displacement': 2400,'Cylinders': 8, 'Valves': np.nan},
               'mclaren':{'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108F',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
               'mercedes':{'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes FO 108F',
                                'Displacement': 2398,'Cylinders': 8,'Valves': 32},
               'red_bull': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2013',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32},
               'sauber': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056' ,
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32 },
               'toro_rosso': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 056' ,
                            'Displacement': 2398,'Cylinders': 8, 'Valves': 32 },
               'williams': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault RS27-2013',
                                'Displacement': 2400, 'Cylinders': 8,'Valves': 32}},
         2014: {'caterham': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault Energy F1 2014',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/3' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'force_india': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106A',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'lotus_f1':  {'Engine Manufacturer': 'Renault', 'Engine': 'Renault Energy F1 2014',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'marussia': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/3' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'mclaren': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106A',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'mercedes': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106A',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'red_bull': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault Energy F1 2014',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'sauber': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/3' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'toro_rosso': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault Energy F1 2014',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'williams': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106A',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}},
        2015: {'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/4' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'force_india': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106B',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'lotus_f1': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106B',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'manor': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/3' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
               'mclaren': {'Engine Manufacturer': 'Honda', 'Engine': 'Honda RA615H' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
               'mercedes': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106B',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'red_bull': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault Energy F1 2015',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24}, 
               'sauber': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/4' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'toro_rosso': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault Energy F1 2015',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24}, 
               'williams': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106B',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}},
        2016: {'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/5' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'force_india': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106C',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'haas': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/5', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'manor':{'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106C',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'mclaren': {'Engine Manufacturer': 'Honda', 'Engine': 'Honda RA616H' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },  
               'mercedes': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106C',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'red_bull': {'Engine Manufacturer': 'Renault', 'Engine': 'TAG Heuer (Renault)',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'renault': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault R.E.16',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'sauber': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/5' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'toro_rosso': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/4' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'williams': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes PU106B',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}},
        2017: {'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 062', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'force_india': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M08 EQ Power+',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'haas': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 062' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'mclaren': {'Engine Manufacturer': 'Honda', 'Engine': 'Honda RA617H' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
               'mercedes': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M08 EQ Power+',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'red_bull': {'Engine Manufacturer': 'Renault', 'Engine': 'TAG Heuer (Renault)',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'renault': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault R.E.17',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'sauber': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 059/5' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'toro_rosso': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault R.E.17',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'williams': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M08 EQ Power+',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}},
         2018: {'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 062 EVO' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'force_india': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M09 EQ Power+',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'haas': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 062 EVO' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'mclaren': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault R.E.18',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'mercedes': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M09 EQ Power+',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'red_bull': {'Engine Manufacturer': 'Renault', 'Engine': 'TAG Heuer (Renault)',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'renault': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault R.E.18',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'sauber': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 062 EVO', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'toro_rosso': {'Engine Manufacturer': 'Honda', 'Engine': 'Honda RA618H' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
               'williams': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M09 EQ Power+',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}},
         2019: {'alfa': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 064' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 064', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'haas': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 064', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'mclaren': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault E Tech 19',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'mercedes': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M10 EQ Power+',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'racing_point': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M10 EQ Power+',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'red_bull': {'Engine Manufacturer': 'Honda', 'Engine': 'Honda RA619H' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
               'renault': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault E Tech 19',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'toro_rosso': {'Engine Manufacturer': 'Honda', 'Engine': 'Honda RA619H' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
               'williams': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M10 EQ Power+',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}},
         2020: {'alfa': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 065' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
                'alphatauri':{'Engine Manufacturer': 'Honda', 'Engine': 'Honda RA620H' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
                'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 065', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'haas': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 065', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
               'mclaren': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault E Tech 20',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
               'mercedes': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M10 EQ Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'racing_point': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M10 EQ Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
               'red_bull': {'Engine Manufacturer': 'Honda', 'Engine': 'Honda RA620H' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
               'renault': {'Engine Manufacturer': 'Renault', 'Engine': 'Renault E Tech 20',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
                'williams': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M10 EQ Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}},
         2021: {'alfa': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 066' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
                'alphatauri':{'Engine Manufacturer': 'Honda', 'Engine': 'Honda RA621H' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
                'alpine':{'Engine Manufacturer': 'Renault', 'Engine': 'Renault E Tech 21',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
                'aston_martin':{'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M12 E Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}, 
                'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 066', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
                'haas': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 066', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
                'mclaren': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M12 E Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
                'mercedes': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M12 E Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
                'red_bull': {'Engine Manufacturer': 'Honda', 'Engine': 'Honda RA621H' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
                'williams': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M12 E Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}},
         2022: {'alfa': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 066/7' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
                'alphatauri':{'Engine Manufacturer': 'Honda RBPT', 'Engine': 'Honda Red Bull RBPTH001' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
                'alpine':{'Engine Manufacturer': 'Renault', 'Engine': 'Renault E Tech RE22',
                                'Displacement': 1600, 'Cylinders': 6,'Valves': 24},
                'aston_martin':{'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M13 E Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}, 
                'ferrari': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 066/7', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
                'haas': {'Engine Manufacturer': 'Ferrari', 'Engine': 'Ferrari Tipo 066/7', 
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 },
                'mclaren': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M13 E Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
                'mercedes': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M13 E Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24},
                'red_bull': {'Engine Manufacturer': 'Honda RBPT', 'Engine': 'Honda Red Bull RBPTH001' ,
                            'Displacement': 1600,'Cylinders': 6, 'Valves': 24 }, 
                'williams': {'Engine Manufacturer': 'Mercedes', 'Engine': 'Mercedes M13 E Performance',
                                'Displacement': 1600,'Cylinders': 6,'Valves': 24}}
                }
                
                
               
constructors = constructors.merge(constructors.apply(lambda row: pd.Series(teams[row['season']][row['constructor_id']]), axis=1), left_index=True, right_index=True)
constructors

Unnamed: 0,season,constructor_id,nationality,Engine Manufacturer,Engine,Displacement,Cylinders,Valves
0,2010,ferrari,Italian,Ferrari,Ferrari Tipo 056,2398,8,32.0
1,2010,force_india,Indian,Mercedes,Mercedes FO 108X,2398,8,32.0
2,2010,hrt,Spanish,Cosworth,Cosworth CA2010,2400,8,
3,2010,lotus_racing,Malaysian,Cosworth,Cosworth CA2010,2400,8,
4,2010,mclaren,British,Mercedes,Mercedes FO 108X,2398,8,32.0
...,...,...,...,...,...,...,...,...
134,2022,haas,American,Ferrari,Ferrari Tipo 066/7,1600,6,24.0
135,2022,mclaren,British,Mercedes,Mercedes M13 E Performance,1600,6,24.0
136,2022,mercedes,German,Mercedes,Mercedes M13 E Performance,1600,6,24.0
137,2022,red_bull,Austrian,Honda RBPT,Honda Red Bull RBPTH001,1600,6,24.0


---
**Constructors statistics**

In [8]:
'''
In this part of the code we extract the constructors' all time statistics, that took part in at least one
race from 2010 to 2022. We acquired data such as their names, and nationality 
through querying the http://ergast.com/mrd/. Furthermore, additional data regarding
constructors' all time statistics, such as wins, pole positions, championships and points were extracted
manually through https://gpracingstats.com/. 
'''
constructors_stats = {'constructor_id': [],
        'nationality': [],
        }
    
for year in list(range(2010,2023)):
    

    url = 'https://ergast.com/api/f1/{}/constructors.json'
    r = requests.get(url.format(year))
    json = r.json()
    
    
    for item in json['MRData']['ConstructorTable']['Constructors']:
        try:
            constructors_stats['constructor_id'].append(item['constructorId'])
        except:
            constructors_stats['constructor_id'].append(None)
        try:
            constructors_stats['nationality'].append(item['nationality'])
        except:
            constructors_stats['nationality'].append(None)
    
constructors_stats = pd.DataFrame(constructors_stats)
constructors_stats.drop_duplicates(inplace = True)
constructors_stats.reset_index(drop = True)


stats = {'ferrari': {'wins': 230, 'pole_positions': 193,  'Points': 4093.5,  
                     'Constructor_Championships': 16, 'Driver_Championships': 15},
         'force_india': {'wins': 0, 'pole_positions': 1 ,'Points': 13,
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'hrt': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'lotus_racing': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'mclaren': {'wins': 164, 'pole_positions': 145, 'Points': 3381.5, 
                     'Constructor_Championships': 8, 'Driver_Championships': 12},
         'mercedes': {'wins': 9, 'pole_positions': 8, 'Points':0 , 
                     'Constructor_Championships': 0, 'Driver_Championships': 2}, 
         'red_bull': {'wins': 6, 'pole_positions': 5, 'Points': 256.5, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'renault': {'wins': 35, 'pole_positions': 51, 'Points': 1082, 
                     'Constructor_Championships': 2, 'Driver_Championships': 2}, 
         'sauber': {'wins': 1, 'pole_positions': 1, 'Points': 544, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'toro_rosso': {'wins': 1, 'pole_positions': 1, 'Points': 56, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0},
         'virgin': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'williams': {'wins': 113, 'pole_positions': 125, 'Points': 2600, 
                     'Constructor_Championships': 9, 'Driver_Championships': 7}, 
         'caterham': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'lotus_f1': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'marussia': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'manor': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'haas': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'alfa': {'wins': 10, 'pole_positions': 12, 'Points': 50, 
                     'Constructor_Championships': 0, 'Driver_Championships': 2}, 
         'racing_point': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'alphatauri': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'alpine': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}, 
         'aston_martin': {'wins': 0, 'pole_positions': 0, 'Points': 0, 
                     'Constructor_Championships': 0, 'Driver_Championships': 0}}

constructors_stats = constructors_stats.merge(constructors_stats.apply(lambda row: pd.Series(stats[row['constructor_id']]), axis=1), left_index=True, right_index=True)
constructors_stats.reset_index(drop = True)



Unnamed: 0,constructor_id,nationality,wins,pole_positions,Points,Constructor_Championships,Driver_Championships
0,ferrari,Italian,230.0,193.0,4093.5,16.0,15.0
1,force_india,Indian,0.0,1.0,13.0,0.0,0.0
2,hrt,Spanish,0.0,0.0,0.0,0.0,0.0
3,lotus_racing,Malaysian,0.0,0.0,0.0,0.0,0.0
4,mclaren,British,164.0,145.0,3381.5,8.0,12.0
5,mercedes,German,9.0,8.0,0.0,0.0,2.0
6,red_bull,Austrian,6.0,5.0,256.5,0.0,0.0
7,renault,French,35.0,51.0,1082.0,2.0,2.0
8,sauber,Swiss,1.0,1.0,544.0,0.0,0.0
9,toro_rosso,Italian,1.0,1.0,56.0,0.0,0.0


---
**Qualifying data**

In [4]:
'''
In this part of the code we extract data related to all qualifying sessions that took
part from 2010 to 2022. We acquired data such as the circuit, drivers and conctructors participating,
best lap times of each driver and the grid for the respective race and the respective season. This was achieved
through querying  https://https://www.formula1.com/en/results.html/{season}/races/{unique_identifier}/
{circuit_id}/starting-grid.html. Necessary changes were made to circuits' names, drivers' names and constructors' 
names for consistency through all dataframes.
'''
import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://www.formula1.com"
starting_grid = pd.DataFrame()

for year in range(2010, 2023):
    year_url = f"{base_url}/en/results.html/{year}/races.html"
    year_page = requests.get(year_url)
    year_soup = BeautifulSoup(year_page.content, 'html.parser')
    
    races = year_soup.find_all('a', class_='ArchiveLink')

    for race in races:
        race_url = base_url + race['href'].replace('race-result.html', 'starting-grid.html')
        race_page = requests.get(race_url)
        race_soup = BeautifulSoup(race_page.content, 'html.parser')
        
        table = race_soup.find('table', class_='resultsarchive-table')
        
        if table is not None:
            df_race = pd.read_html(str(table))[0]
            df_race['Year'] = year
            df_race['Race'] = race.get_text()
            
            starting_grid = pd.concat([starting_grid, df_race], ignore_index=True)
    for col in starting_grid:
            if 'Unnamed' in col:
                starting_grid.drop(col, axis = 1, inplace = True)

name_changes = { '\n                        Bahrain\n                    ': 'bahrain',
       '\n                        Australia\n                    ': 'albert_park',
       '\n                        Malaysia\n                    ': 'sepang',
       '\n                        China\n                    ': 'shanghai',
       '\n                        Spain\n                    ': 'catalunya',
       '\n                        Monaco\n                    ': 'monaco',
       '\n                        Turkey\n                    ':  'istanbul',
       '\n                        Canada\n                    ': 'villeneuve',
       '\n                        Europe\n                    ': 'valencia',
       '\n                        Great Britain\n                    ': 'silverstone',
       '\n                        Germany\n                    ': 'hockenheimring',
       '\n                        Hungary\n                    ': 'hungaroring',
       '\n                        Belgium\n                    ': 'spa',
       '\n                        Italy\n                    ': 'monza',
       '\n                        Singapore\n                    ': 'marina_bay',
       '\n                        Japan\n                    ': 'suzuka',
       '\n                        South Korea\n                    ': 'yeongam',
       '\n                        Brazil\n                    ': 'interlagos',
       '\n                        Abu Dhabi\n                    ': 'yas_marina',
       '\n                        India\n                    ': 'buddh',
       '\n                        United States  \n                    ': 'americas',
       '\n                        United States \n                    ': 'americas',
       '\n                        Austria\n                    ': 'red_bull_ring',
       '\n                        Russia\n                    ': 'sochi',
       '\n                        Mexico\n                    ': 'rodriquez',
       '\n                        Azerbaijan\n                    ': 'baku',
       '\n                        France\n                    ': 'ricard',
       '\n                        Styria\n                    ': 'red_bull_ring',
       '\n                        70th Anniversary\n                    ': 'silverstone',
       '\n                        Tuscany\n                    ': 'mugello',
       '\n                        Eifel\n                    ': 'nurburgring',
       '\n                        Portugal\n                    ':'portimao',
       '\n                        Emilia Romagna\n                    ': 'imola',
       '\n                        Sakhir\n                    ': 'bahrain',
       '\n                        Netherlands\n                    ': 'zandvoort',
       '\n                        Qatar\n                    ': 'losail',
       '\n                        Saudi Arabia\n                    ': 'jeddah',
       '\n                        Miami \n                    ': 'miami'
     
    
}

driver_names_changes = { 'Sebastian Vettel VET': 'vettel', 'Felipe Massa MAS': 'massa', 
                        'Fernando Alonso ALO': 'alonso', 'Lewis Hamilton HAM': 'hamilton', 
                        'Nico Rosberg ROS': 'rosberg', 'Mark Webber WEB': 'webber',
       'Michael Schumacher MSC': 'michael_schumacher', 'Jenson Button BUT':  'button', 'Robert Kubica KUB': 'kubica',
       'Adrian Sutil SUT': 'sutil', 'Rubens Barrichello BAR': 'barrichello',
       'Vitantonio Liuzzi LIU': 'liuzzi', 'Nico Hulkenberg HUL': 'hulkenberg',
       'Pedro de la Rosa DLR': 'rosa', 'Sebastien Buemi BUE': 'buemi',
       'Kamui Kobayashi KOB': 'kobayashi', 'Vitaly Petrov PET': 'petrov',
       'Jaime Alguersuari ALG': 'alguersuari', 'Timo Glock GLO': 'glock', 'Jarno Trulli TRU': 'trulli',
       'Heikki Kovalainen KOV': 'kovalainen', 'Lucas di Grassi DIG': 'grassi', 'Bruno Senna SEN': 'bruno_senna',
       'Karun Chandhok CHD':'chandhok', 'Sakon Yamamoto YAM': 'yamamoto', 'Nick Heidfeld HEI': 'heidfeld',
       'Christian Klien KLI': 'klien', 'Sergio Perez PER': 'perez', 'Paul di Resta DIR':'resta',
       'Pastor Maldonado MAL': 'maldonado', "Jerome d'Ambrosio DAM": 'ambrosio',
       'Narain Karthikeyan KAR': 'karthikeyan', 'Daniel Ricciardo RIC': 'ricciardo',
       'Romain Grosjean GRO': 'grosjean', 'Jean-Eric Vergne VER': 'vergne',
       'Kimi Räikkönen RAI': 'raikkonen', 'Charles Pic PIC': 'pic', 'Valtteri Bottas BOT': 'bottas',
       'Esteban Gutierrez GUT': 'gutierrez', 'Jules Bianchi BIA': 'jules_bianchi', 'Max Chilton CHI': 'chilton',
       'Giedo van der Garde VDG': 'garde', 'Kevin Magnussen MAG': 'kevin_magnussen',
       'Daniil Kvyat KVY': 'kvyat', 'Marcus Ericsson ERI': 'ericsson', 'Andre Lotterer LOT': 'lotterer',
       'Will Stevens STE': 'stevens', 'Carlos Sainz SAI': 'sainz', 'Felipe Nasr NAS': 'nasr',
       'Max Verstappen VER': 'max_verstappen', 'Roberto Merhi MER': 'merhi', 'Alexander Rossi RSI': 'rossi',
       'Jolyon Palmer PAL': 'jolyon_palmer', 'Pascal Wehrlein WEH': 'wehrlein', 'Rio Haryanto HAR': 'haryanto',
       'Stoffel Vandoorne VAN': 'vandoorne', 'Esteban Ocon OCO': 'ocon',
       'Antonio Giovinazzi GIO': 'giovinazzi', 'Lance Stroll STR': 'stroll', 'Pierre Gasly GAS': 'gasly',
       'Brendon Hartley HAR': 'brendon_hartley', 'Charles Leclerc LEC': 'leclerc',
       'Sergey Sirotkin SIR': 'sirotkin', 'Lando Norris NOR': 'norris', 'Alexander Albon ALB': 'albon',
       'George Russell RUS': 'russell', 'Nicholas Latifi LAT': 'latifi', 'Jack Aitken AIT': 'aitken',
       'Pietro Fittipaldi FIT': 'pietro_fittipaldi', 'Yuki Tsunoda TSU':'tsunoda', 
       'Mick Schumacher MSC': 'mick_schumacher','Nikita Mazepin MAZ': 'mazepin', 
       'Zhou Guanyu ZHO': 'zhou', 'Nyck De Vries DEV': 'de_vries'
}
 
constructors_name_changes = {
    'RBR Renault': 'renault', 'Ferrari': 'ferrari', 'McLaren Mercedes': 'mclaren', 'Mercedes': 'mercedes',
       'Renault': 'renault', 'Force India Mercedes': 'force_india', 'Williams Cosworth': 'williams',
       'Sauber Ferrari': 'sauber', 'STR Ferrari': 'toro_rosso', 'Virgin Cosworth': 'virgin',
       'Lotus Cosworth': 'lotus_racing', 'HRT Cosworth': 'hrt', 'Red Bull Racing Renault': 'red_bull',
       'Lotus Renault': 'lotus_f1', 'Williams Renault': 'williams', 'Caterham Renault': 'caterham',
       'Marussia Cosworth': 'marussia', 'STR Renault': 'toro_rosso', 'Williams Mercedes': 'williams',
       'Marussia Ferrari': 'marussia', 'Lotus Mercedes': 'lotus_f1', 'McLaren Honda': 'mclaren',
       'Toro Rosso-Ferrari' :'toro_rosso', 'Red Bull Racing TAG Heuer': 'red_bull', 'Haas Ferrari': 'haas',
       'MRT-Mercedes': 'manor', 'Toro Rosso Ferrari': 'toro_rosso', 'MRT Mercedes': 'manor', 'Toro Rosso': 'toro_rosso',
       'McLaren Renault': 'mclaren', 'Scuderia Toro Rosso Honda': 'toro_rosso',
       'Red Bull Racing Honda': 'red_bull', 'Alfa Romeo Racing Ferrari': 'alfa',
       'Racing Point BWT Mercedes': 'racing_point', 'AlphaTauri Honda': 'alphatauri', 'Alpine Renault': 'alpine',
       'Aston Martin Mercedes': 'aston_martin', 'Red Bull Racing RBPT': 'red_bull',
       'Alfa Romeo Ferrari': 'alfa', 'AlphaTauri RBPT': 'alphatauri',
       'Aston Martin Aramco Mercedes': 'aston_martin'
}

starting_grid.drop('No', axis = 1, inplace = True)
starting_grid['Car'] = starting_grid['Car'].map(constructors_name_changes)
starting_grid['Driver'] = starting_grid['Driver'].map(driver_names_changes)
starting_grid['Race'] = starting_grid['Race'].map(name_changes)
starting_grid = starting_grid[['Year', 'Race', 'Car', 'Driver', 'Time', 'Pos']]
starting_grid = starting_grid.rename(columns={
    'Year': 'Season',
    'Race': 'circuit_id',
    'Car': 'constructor_id',
    'Driver': 'driver_id',
    'Time': 'time',
    'Pos': 'grid'
})
starting_grid

Unnamed: 0,Season,circuit_id,constructor_id,driver_id,time,grid
0,2010,bahrain,renault,vettel,1:54.101,1
1,2010,bahrain,ferrari,massa,1:54.242,2
2,2010,bahrain,ferrari,alonso,1:54.608,3
3,2010,bahrain,mclaren,hamilton,1:55.217,4
4,2010,bahrain,mercedes,rosberg,1:55.241,5
...,...,...,...,...,...,...
5455,2022,yas_marina,haas,kevin_magnussen,1:25.834,16
5456,2022,yas_marina,alphatauri,gasly,1:25.859,17
5457,2022,yas_marina,alfa,bottas,1:25.892,18
5458,2022,yas_marina,williams,albon,1:26.028,19


---
**Race Results dataframe**

In [5]:
'''
In this part of the code we extract data related to all races that happened
between 2010 to 2022. We acquired data such as the circuit, drivers and conctructors participating,
race time of each driver, the points each driver gained, the laps needed to complete the race,
the rank of the drivr (Our analysis target variable), and the respective season. This was achieved
through querying  https://https://www.formula1.com/en/results.html/{season}/races/{unique_identifier}/
{circuit_id}/race-result.html. Necessary changes were made to circuits' names, drivers' names and constructors' 
names for consistency through all dataframes.
'''
import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://www.formula1.com"
race_results = pd.DataFrame()

for year in range(2010, 2023):
    year_url = f"{base_url}/en/results.html/{year}/races.html"
    year_page = requests.get(year_url)
    year_soup = BeautifulSoup(year_page.content, 'html.parser')
    
    races = year_soup.find_all('a', class_='ArchiveLink')

    for race in races:
        race_url = base_url + race['href']
        race_page = requests.get(race_url)
        race_soup = BeautifulSoup(race_page.content, 'html.parser')
        
        table = race_soup.find('table', class_='resultsarchive-table')
        
        if table is not None:
            df_race = pd.read_html(str(table))[0]
            df_race['Year'] = year
            df_race['Race'] = race.get_text()
            
            race_results = pd.concat([race_results, df_race], ignore_index=True)
    for col in race_results:
            if 'Unnamed' in col:
                race_results.drop(col, axis = 1, inplace = True)
                
race_results.drop('No', axis = 1, inplace = True)
race_results['Car'] = race_results['Car'].map(constructors_name_changes)
race_results['Driver'] = race_results['Driver'].map(driver_names_changes)
race_results['Race'] = race_results['Race'].map(name_changes)
race_results = race_results[['Year', 'Race', 'Car', 'Driver', 'Laps', 'Time/Retired', 'PTS', 'Pos']]
race_results = race_results.rename(columns={
    'Year': 'Season',
    'Race': 'circuit_id',
    'Car': 'constructor_id',
    'Driver': 'driver_id',
    'Time/Retired': 'Race time',
    'PTS': 'Points',
    'Pos': 'Rank'
    
})
race_results

Unnamed: 0,Season,circuit_id,constructor_id,driver_id,Laps,Race time,Points,Rank
0,2010,bahrain,ferrari,alonso,49.0,1:39:20.396,25.0,1
1,2010,bahrain,ferrari,massa,49.0,+16.099s,18.0,2
2,2010,bahrain,mclaren,hamilton,49.0,+23.182s,15.0,3
3,2010,bahrain,renault,vettel,49.0,+38.799s,12.0,4
4,2010,bahrain,mercedes,rosberg,49.0,+40.213s,10.0,5
...,...,...,...,...,...,...,...,...
5504,2022,yas_marina,haas,mick_schumacher,57.0,+1 lap,0.0,16
5505,2022,yas_marina,haas,kevin_magnussen,57.0,+1 lap,0.0,17
5506,2022,yas_marina,mercedes,hamilton,55.0,DNF,0.0,18
5507,2022,yas_marina,williams,latifi,55.0,DNF,0.0,19


---
**Weather dataframe**

In [6]:
'''
In this part of the code we extract data related to the weather conditions in each race that happened
between 2010 to 2022. We acquired data such as the circuit, the respective season, and the grand prix's 
respective wikipedia url through querying the http://ergast.com/mrd/. Then through quering the  respective
wikipedia url, we extracted information regarding the weather conditions. We then narrowed down the possible 
weather conditions that were the most generic to Clear, Cloudy, Rainy, Warm and Windy, to reduce dimensionality.
'''
weather = {'season': [],
        'circuit_id': [],
        'url': []
        }
    
for year in list(range(2010,2023)):

    url = 'https://ergast.com/api/f1/{}.json'
    r = requests.get(url.format(year))
    json = r.json()

    for item in json['MRData']['RaceTable']['Races']:
        try:
            weather['season'].append(int(item['season']))
        except:
            weather['season'].append(None)
        try:
            weather['circuit_id'].append(item['Circuit']['circuitId'])
        except:
            weather['circuit_id'].append(None)
        try:
            weather['url'].append(item['url'])
        except:
            weather['url'].append(None)
        
weather = pd.DataFrame(weather)

weather_info = []

for url in weather.url:
    for i in range(0,4):
        try:
            weather_df = pd.read_html(url)[i]
            if 'Weather' in list(weather_df.iloc[:,0]):
                n = list(weather_df.iloc[:,0]).index('Weather')
                weather_info.append(weather_df.iloc[n,1])                   
        except:
            weather_info.append('not found')
            
weather['weather'] = weather_info
weather.drop('url', axis = 1, inplace = True)
weather_df1 =weather

weather_df1['weather_category'] = weather_df1['weather'].str.lower()

weather_df1.loc[weather_df1['weather_category'].str.contains('sunny'), 'weather_category'] = 'Clear'
weather_df1.loc[weather_df1['weather_category'].str.contains('clear'), 'weather_category'] = 'Clear'
weather_df1.loc[weather_df1['weather_category'].str.contains('fine'), 'weather_category'] = 'Clear'
weather_df1.loc[weather_df1['weather_category'].str.contains('overcast'), 'weather_category'] = 'Cloudy'
weather_df1.loc[weather_df1['weather_category'].str.contains('cloudy|clouds|cloud', case=False), 'weather_category'] = 'Cloudy'
weather_df1.loc[weather_df1['weather_category'].str.contains('rain'), 'weather_category'] = 'Rainy'
weather_df1.loc[weather_df1['weather_category'].str.contains('dry'), 'weather_category'] = 'Clear'
weather_df1.loc[weather_df1['weather_category'].str.contains('wet'), 'weather_category'] = 'Rainy'
weather_df1.loc[weather_df1['weather_category'].str.contains('hot'), 'weather_category'] = 'Warm'
weather_df1.loc[weather_df1['weather_category'].str.contains('cold'), 'weather_category'] = 'Cloudy'
weather_df1.loc[weather_df1['weather_category'].str.contains('humid'), 'weather_category'] = 'Warm'
weather_df1.loc[weather_df1['weather_category'].str.contains('haze|hazy', case =False), 'weather_category'] = 'Warm'
weather_df1.loc[weather_df1['weather_category'].str.contains('fog'), 'weather_category'] = 'Cloudy'
weather_df1.loc[weather_df1['weather_category'].str.contains('wind'), 'weather_category'] = 'Windy'
weather_df1.loc[weather_df1['weather_category'].str.contains('82 to 84'), 'weather_category'] = 'Clear'

unique_weather_categories = weather_df1['weather_category'].unique()
weather_df1.drop('weather', axis =1, inplace = True)
df_encoded = pd.get_dummies(weather_df1, columns=['weather_category'])
weather

Unnamed: 0,season,circuit_id,weather_category
0,2010,bahrain,Clear
1,2010,albert_park,Cloudy
2,2010,sepang,Cloudy
3,2010,shanghai,Cloudy
4,2010,catalunya,Cloudy
...,...,...,...
254,2022,suzuka,Rainy
255,2022,americas,Cloudy
256,2022,rodriguez,Cloudy
257,2022,interlagos,Cloudy


---
**Salaries dataframe**

In [7]:
'''
In this part of the code we firstly extract all the drivers’ names given a year from 
https://www.formula1.com/en/results.html/{year}/drivers.html. 
By doing this, we have a full list of names that participated in our year range.
Then, for each of these names, we create a link with that name for 
https://www.spotrac.com/search/results/{process_name(name)}/ , 
a website that provides salary information. The names have to undergo some processing
to be converted to url format, hence process_name(name).In spotrac.com there is a chance that 
multiple people exist with the same name in different sports. For these drivers, we have to visit 
all the possible links of their name and determine which of those links is related to the F1 driver, 
and discard the links that point to the same name under a different sport (Nascar for example).
After filtering the data and having links to a page for each driver’s salaries, we parse the page 
for each driver and extract their yearly salaries.
'''
from utils import *
import requests

driver_names = set()

for year in range(2010, 2023):
    url = f"https://www.formula1.com/en/results.html/{year}/drivers.html"
    response = requests.get(url)
    html = response.text
    driver_names.update(get_names_from_table(html))
    print(f"Done with {year}, found {len(driver_names)} unique drivers")

links = {}
for name in driver_names:
    links[name] = f"https://www.spotrac.com/search/results/{process_name(name)}/"

print("\nFinding driver links")
for name in links:
    response = requests.get(links[name])
    current_url = response.url
    if not current_url.__contains__("formula1"):
        print("MULTIPLE PLAYERS WITH SAME NAME - " + name + " " + current_url)
        new_link = separate_formula1_driver_link(current_url)
        links[name] = new_link
        print(f"Replaced with {new_link}")
    else:
        links[name] = current_url
        print(name + " " + current_url)

links[
    "Kimi Raikkonen"
] = "https://www.spotrac.com/formula1/alfa-romeo/kimi-r%C3%A4ikk%C3%B6nen-47374/"

links = {key: value for key, value in links.items() if value is not None}

salaries = {}
print(f"\n\nGetting driver salaries for {len(links)} drivers")
for name in links:
    salaries[name] = get_yearly_salaries(links[name])
    print(name, salaries[name])
    


Done with 2010, found 27 unique drivers
Done with 2011, found 33 unique drivers
Done with 2012, found 37 unique drivers
Done with 2013, found 42 unique drivers
Done with 2014, found 46 unique drivers
Done with 2015, found 51 unique drivers
Done with 2016, found 56 unique drivers
Done with 2017, found 60 unique drivers
Done with 2018, found 63 unique drivers
Done with 2019, found 66 unique drivers
Done with 2020, found 69 unique drivers
Done with 2021, found 72 unique drivers
Done with 2022, found 74 unique drivers

Finding driver links
Nicholas Latifi https://www.spotrac.com/formula1/williams/nicholas-latifi-47384/
MULTIPLE PLAYERS WITH SAME NAME - Jack Aitken https://www.spotrac.com/search/results/jack-aitken/
Replaced with None
Sebastian Vettel https://www.spotrac.com/formula1/aston-martin/sebastian-vettel-47370/
Romain Grosjean https://www.spotrac.com/formula1/haas/romain-grosjean-47378/
MULTIPLE PLAYERS WITH SAME NAME - Lucas di Grassi https://www.spotrac.com/search/results/lucas-d

Romain Grosjean {'2012': 1280000, '2013': 1330000, '2014': 4300000, '2015': 4400000, '2016': 4500000, '2017': 1500000, '2018': 4450000, '2019': 1800000, '2020': 3200000}
Lucas di Grassi {'2010': 266000}
Sergio Perez {'2011': 278000, '2012': 640000, '2013': 2000000, '2014': 4300000, '2015': 4360000, '2016': 2000000, '2017': 2500000, '2018': 5000000, '2019': 3500000, '2020': 6000000, '2021': 5000000, '2022': 8000000, '2023': 10000000}
Max Verstappen {'2015': 277500, '2016': 650000, '2017': 3000000, '2018': 10000000, '2019': 13500000, '2020': 26000000, '2021': 25000000, '2022': 25000000, '2023': 55000000}
Charles Leclerc {'2018': 150000, '2019': 3500000, '2020': 15000000, '2021': 12000000, '2022': 12000000, '2023': 24000000}
Nyck De Vries {'2023': 2000000}
Pastor Maldonado {'2011': 278000, '2012': 512000, '2013': 1330000, '2014': 4300000, '2015': 4400000}
Giedo van der Garde {'2013': 199500}
Lance Stroll {'2018': 1800000, '2019': 1200000, '2020': 2400000, '2021': 1915000, '2022': 10000000

In [9]:
'''
Necessary changes were made to drivers' names, and duplicates were dropped, for consistency through all dataframes.
'''
df = pd.DataFrame(salaries)
df = df.transpose()
df.reset_index(inplace=True)
df.rename(columns={'index': 'Driver'}, inplace=True)
df = df.melt(id_vars='Driver', var_name='Season', value_name='Salary')
unique_drivers = df['Driver'].unique()
first_list = list(unique_drivers)
second_list = ['alonso', 'massa', 'hamilton', 'vettel', 'rosberg',
       'michael_schumacher', 'button', 'webber', 'liuzzi', 'barrichello',
       'kubica', 'sutil', 'alguersuari', 'hulkenberg', 'kovalainen',
       'buemi', 'trulli', 'bruno_senna', 'chandhok', 'glock', 'grassi',
       'kobayashi', 'petrov', 'rosa', 'yamamoto', 'heidfeld', 'klien',
       'resta', 'ambrosio', 'maldonado', 'perez', 'karthikeyan',
       'ricciardo', 'raikkonen', 'vergne', 'pic', 'grosjean', 'gutierrez',
       'bottas', 'jules_bianchi', 'chilton', 'garde', 'kevin_magnussen',
       'kvyat', 'ericsson', 'lotterer', 'stevens', 'nasr', 'sainz',
       'max_verstappen', 'merhi', 'rossi', 'jolyon_palmer', 'wehrlein',
       'haryanto', 'vandoorne', 'ocon', 'giovinazzi', 'stroll', 'gasly',
       'brendon_hartley', 'leclerc', 'sirotkin', 'norris', 'albon',
       'russell', 'latifi', 'aitken', 'pietro_fittipaldi', 'tsunoda',
       'mick_schumacher', 'mazepin', 'zhou', 'de_vries']
new_rows = pd.DataFrame({
    'Driver': ['Nath Klien', 'Ser Lotterer', 'Vale Rossi', 'Kotsio Aitken', 'Fittipaldi'],
    'Season': [2010, 2014, 2015, 2020, 2020],
    'Salary': ['None', 'None', 'None', 'None', 'None']  
})

df = df.append(new_rows, ignore_index=True)
names_changes = {'Jenson Button':'button', 'Jules Bianchi': 'jules_bianchi', 'Jolyon Palmer':'jolyon_palmer', 
                 'Lance Stroll': 'stroll','Pastor Maldonado': 'maldonado', 'Giedo van der Garde': 'garde', 
                 'Paul di Resta': 'resta','Alexander Albon': 'albon', 'Vitantonio Liuzzi': 'liuzzi', 
                 'Sebastien Buemi': 'buemi', 'Michael Schumacher': 'michael_schumacher', 
                 'Charles Leclerc': 'leclerc', 'Sergey Sirotkin': 'sirotkin', 'Jean-Eric Vergne': 'vergne',
                 'Rubens Barrichello': 'barrichello', 'Pascal Wehrlein': 'wehrlein','Pierre Gasly': 'gasly', 
                 'Daniil Kvyat': 'kvyat', 'Vitaly Petrov': 'petrov', 'Fernando Alonso': 'alonso',
                 'Nico Rosberg': 'rosberg', 'Lucas di Grassi': 'grassi', 'Will Stevens': 'stevens', 
                 'Adrian Sutil': 'sutil', 'Daniel Ricciardo': 'ricciardo', 'Marcus Ericsson': 'ericsson', 
                 'Sergio Perez': 'perez','Max Verstappen': 'max_verstappen', 'Valtteri Bottas': 'bottas', 
                 'Charles Pic': 'pic', 'Nick Heidfeld': 'heidfeld', 'Mark Webber': 'webber', 
                 'Rio Haryanto': 'haryanto', 'George Russell': 'russell','Robert Kubica': 'kubica', 
                 'Esteban Gutierrez ': 'gutierrez', 'Brendon Hartley': 'brendon_hartley', 'Carlos Sainz': 'sainz', 
                 'Karun Chandhok': 'chandhok', "Jerome d'Ambrosio": 'ambrosio', 'Heikki Kovalainen': 'kovalainen', 
                 'Bruno Senna': 'bruno_senna', 'Felipe Nasr': 'nasr', 'Jaime Alguersuari': 'alguersuari', 
                 'Romain Grosjean': 'grosjean', 'Timo Glock': 'glock', 'Esteban Ocon': 'ocon', 
                 'Zhou Guanyu': 'zhou', 'Roberto Merhi': 'merhi', 'Lando Norris': 'norris',
                 'Nicholas Latifi': 'latifi', 'Max Chilton': 'chilton', 'Stoffel Vandoorne': 'vandoorne',
                 'Pedro de la Rosa': 'rosa', 'Sakon Yamamoto': 'yamamoto', 'Lewis Hamilton': 'hamilton',
                 'Felipe Massa': 'massa', 'Valtteri  Bottas': 'bottas', 'Antonio Giovinazzi': 'giovinazzi',
                 'Yuki Tsunoda': 'tsunoda', 'Mick Schumacher': 'mick_schumacher', 
                 'Jarno Trulli': 'trulli', 'Nyck De Vries': 'de_vries',
                 'Nico Hulkenberg': 'hulkenberg', 'Kamui Kobayashi': 'kobayashi', 'Nikita Mazepin': 'mazepin',
                 'Kevin Magnussen': 'kevin_magnussen', 'Sebastian Vettel': 'vettel', 
                 'Narain Karthikeyan': 'karthikeyan', 'Kimi Raikkonen': 'raikkonen', 'Nath Klien': 'klien',
                 'Ser Lotterer': 'lotterer', 'Vale Rossi': 'Rossi', 'Kotsio Aitken': 'aitken', 'Fittipaldi': 'pietro_fittipaldi'}

df['Driver'] = df['Driver'].map(names_changes)
df['Season'] = pd.to_numeric(df['Season'], errors='coerce')

df = df[df['Season'] > 2009]
df.reset_index(drop=True)
df = df.sort_values(by=['Season', 'Driver'])
df['Salary'] = df['Salary'].apply(lambda x: 'None' if x is None else x)
df.dropna()
tuples_to_drop = [('brendon_hartley', 2010),('brendon_hartley', 2011),('brendon_hartley', 2012),
                  ('brendon_hartley', 2013),('brendon_hartley', 2014),('brendon_hartley', 2015),
                  ('brendon_hartley', 2016),('brendon_hartley', 2019),('brendon_hartley', 2020),
                  ('brendon_hartley', 2021),('brendon_hartley', 2022),
                  ('chandhok', 2012), ('chandhok', 2013), ('chandhok', 2014),('chandhok', 2015),
                  ('chandhok', 2016),('chandhok', 2017),('chandhok', 2018),('chandhok', 2019),
                  ('chandhok', 2020),('chandhok', 2021),('chandhok', 2022),
                  ('haryanto', 2010),('haryanto', 2011),('haryanto', 2012),('haryanto', 2013),
                  ('haryanto', 2014),('haryanto', 2015),('haryanto', 2017),('haryanto', 2018),
                  ('haryanto', 2019), ('haryanto', 2020), ('haryanto', 2021), ('haryanto', 2022),
                  ('jolyon_palmer', 2010),('jolyon_palmer', 2011),('jolyon_palmer', 2012),('jolyon_palmer', 2013),
                  ('jolyon_palmer', 2014),('jolyon_palmer', 2015),('jolyon_palmer', 2018),('jolyon_palmer', 2019),
                  ('jolyon_palmer', 2020),('jolyon_palmer', 2021),('jolyon_palmer', 2022),
                  ('yamamoto', 2011), ('yamamoto', 2012),('yamamoto', 2013),('yamamoto', 2014),('yamamoto', 2015),
                  ('yamamoto', 2016),('yamamoto', 2017),('yamamoto', 2018),('yamamoto', 2019),('yamamoto', 2020),
                  ('yamamoto', 2021),('yamamoto', 2022)
                  ]
df2 = df.loc[~df[['Driver', 'Season']].apply(tuple, axis=1).isin(tuples_to_drop)]
df2 = df2.drop(df2[df2['Season'] == 2023].index)
df2.dropna(inplace = True)
df2.reset_index(drop = True)
df2 = df2.rename(columns={'Driver': 'driver_id', 'Season': 'season', 'Salary': 'salary'})
df2 = df2[['season', 'driver_id', 'salary']]
df2

  df = df.append(new_rows, ignore_index=True)


Unnamed: 0,season,driver_id,salary
334,2010,alguersuari,532000.0
328,2010,alonso,40000000.0
297,2010,barrichello,7315000.0
296,2010,bruno_senna,199500.0
347,2010,buemi,532000.0
...,...,...,...
182,2022,sainz,10000000.0
150,2022,stroll,10000000.0
169,2022,tsunoda,750000.0
141,2022,vettel,15000000.0


In [None]:
circuits.to_csv('circuits_pre.csv', index = False)
drivers.to_csv('drivers_pre.csv', index = False)
constructors.to_csv('constructors_specs_pre.csv', index = False)
constructors_stats.to_csv('constructors_stats_pre.csv', index = False)
starting_grid.to_csv('quali_results_pre.csv', index = False)
race_results.to_csv('race_results_pre.csv', index = False)
df_encoded.to_csv('weather_pre.csv', index = False)
df2.to_csv('salaries.csv', index = False)
