---
# Import from the API (ergast)


In [11]:
import requests

def get_drivers_standings(year='current'):
    url = f'http://ergast.com/api/f1/{year}/driverStandings.json'
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        standings = data['MRData']['StandingsTable']['StandingsLists'][0]['DriverStandings']
        
        for position, driver_info in enumerate(standings, start=1):
            driver = driver_info['Driver']
            name = f"{driver['givenName']} {driver['familyName']}"
            points = driver_info['points']
            print(f"{position}. {name} - {points} points")
    else:
        print("Failed to retrieve data")

get_drivers_standings()

1. Max Verstappen - 575 points
2. Sergio Pérez - 285 points
3. Lewis Hamilton - 234 points
4. Fernando Alonso - 206 points
5. Charles Leclerc - 206 points
6. Lando Norris - 205 points
7. Carlos Sainz - 200 points
8. George Russell - 175 points
9. Oscar Piastri - 97 points
10. Lance Stroll - 74 points
11. Pierre Gasly - 62 points
12. Esteban Ocon - 58 points
13. Alexander Albon - 27 points
14. Yuki Tsunoda - 17 points
15. Valtteri Bottas - 10 points
16. Nico Hülkenberg - 9 points
17. Daniel Ricciardo - 6 points
18. Guanyu Zhou - 6 points
19. Kevin Magnussen - 3 points
20. Liam Lawson - 2 points
21. Logan Sargeant - 1 points
22. Nyck de Vries - 0 points


---
# Import dataset

## *This dataset is updated from season 1950 to 2023*

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter("ignore")
pd.set_option('display.max_columns', None)
pd.get_option('display.max_columns', None)

In [30]:
path = "../data/Formula 1 World Championship (1950 - 2023) Kaggle"

# Load data
df_circuits = pd.read_csv(path + "/circuits.csv")
df_constructor_results = pd.read_csv(path + "/constructor_results.csv")
df_constructor_standings = pd.read_csv(path + "/constructor_standings.csv")
df_constructors = pd.read_csv(path + "/constructors.csv")
df_driver_standings = pd.read_csv(path + "/driver_standings.csv")
df_drivers = pd.read_csv(path + "/drivers.csv")
df_lap_times = pd.read_csv(path + "/lap_times.csv")
df_pit_stops = pd.read_csv(path + "/pit_stops.csv")
df_qualifying = pd.read_csv(path + "/qualifying.csv")
df_races = pd.read_csv(path + "/races.csv")
df_results = pd.read_csv(path + "/results.csv")
df_seasons = pd.read_csv(path + "/seasons.csv")
df_sprint_results = pd.read_csv(path + "/sprint_results.csv")
df_status = pd.read_csv(path + "/status.csv")

In [36]:
df_status.head(8)

Unnamed: 0,statusId,status
0,1,Finished
1,2,Disqualified
2,3,Accident
3,4,Collision
4,5,Engine
5,6,Gearbox
6,7,Transmission
7,8,Clutch


### Limit data to last X seasons

In [32]:
min_year = 2019

In [33]:
# Limit df_races to rows where year is equal or greater to 2022
print("Len before filter:", len(df_races))
df_races = df_races[df_races['year'] >= min_year]
print("Len after filter:", len(df_races))

Len before filter: 1101
Len after filter: 104


In [37]:
# Now we can filter all other data to only keep the races from 2022 and 2023
print("> df_constructor_results")
print("Len before filter:", len(df_constructor_results))
df_constructor_results = df_constructor_results[df_constructor_results['raceId'].isin(df_races['raceId'])]
print("Len after filter:", len(df_constructor_results))

print("\n> df_constructor_standings")
print("Len before filter:", len(df_constructor_standings))
df_constructor_standings = df_constructor_standings[df_constructor_standings['raceId'].isin(df_races['raceId'])]
print("Len after filter:", len(df_constructor_standings))

print("\n> df_driver_standings")
print("Len before filter:", len(df_driver_standings))
df_driver_standings = df_driver_standings[df_driver_standings['raceId'].isin(df_races['raceId'])]
print("Len after filter:", len(df_driver_standings))

print("\n> df_lap_times")
print("Len before filter:", len(df_lap_times))
df_lap_times = df_lap_times[df_lap_times['raceId'].isin(df_races['raceId'])]
print("Len after filter:", len(df_lap_times))

print("\n> df_pit_stops")
print("Len before filter:", len(df_pit_stops))
df_pit_stops = df_pit_stops[df_pit_stops['raceId'].isin(df_races['raceId'])]
print("Len after filter:", len(df_pit_stops))

print("\n> df_qualifying")
print("Len before filter:", len(df_qualifying))
df_qualifying = df_qualifying[df_qualifying['raceId'].isin(df_races['raceId'])]
print("Len after filter:", len(df_qualifying))

print("\n> df_results")
print("Len before filter:", len(df_results))
df_results = df_results[df_results['raceId'].isin(df_races['raceId'])]
print("Len after filter:", len(df_results))  

print("\n> df_sprint_results")
print("Len before filter:", len(df_sprint_results))
df_sprint_results = df_sprint_results[df_sprint_results['raceId'].isin(df_races['raceId'])]
print("Len after filter:", len(df_sprint_results))

> df_constructor_results
Len before filter: 12290
Len after filter: 940

> df_constructor_standings
Len before filter: 13051
Len after filter: 940

> df_driver_standings
Len before filter: 34124
Len after filter: 1937

> df_lap_times
Len before filter: 551742
Len after filter: 102863

> df_pit_stops
Len before filter: 10089
Len after filter: 3302

> df_qualifying
Len before filter: 9815
Len after filter: 1877

> df_results
Len before filter: 26080
Len after filter: 1880

> df_sprint_results
Len before filter: 180
Len after filter: 180


---
# Data Exploration

> ## Circuits

In [24]:
print("List of countries: \n",
      df_circuits['country'].unique()
)

print("\nList of locations: \n",
      df_circuits['location'].unique()
)

List of countries: 
 ['Australia' 'Malaysia' 'Bahrain' 'Spain' 'Turkey' 'Monaco' 'Canada'
 'France' 'UK' 'Germany' 'Hungary' 'Belgium' 'Italy' 'Singapore' 'Japan'
 'China' 'Brazil' 'USA' 'United States' 'UAE' 'Argentina' 'Portugal'
 'South Africa' 'Mexico' 'Korea' 'Netherlands' 'Sweden' 'Austria'
 'Morocco' 'Switzerland' 'India' 'Russia' 'Azerbaijan' 'Saudi Arabia'
 'Qatar']

List of locations: 
 ['Melbourne' 'Kuala Lumpur' 'Sakhir' 'Montmeló' 'Istanbul' 'Monte-Carlo'
 'Montreal' 'Magny Cours' 'Silverstone' 'Hockenheim' 'Budapest' 'Valencia'
 'Spa' 'Monza' 'Marina Bay' 'Oyama' 'Shanghai' 'São Paulo' 'Indianapolis'
 'Nürburg' 'Imola' 'Suzuka' 'Las Vegas' 'Abu Dhabi' 'Buenos Aires'
 'Jerez de la Frontera' 'Estoril' 'Okayama' 'Adelaide' 'Midrand'
 'Castle Donington' 'Mexico City' 'Phoenix' 'Le Castellet'
 'Yeongam County' 'Rio de Janeiro' 'Detroit' 'Kent' 'Zandvoort'
 'Heusden-Zolder' 'Dijon' 'Dallas' 'California' 'Nevada' 'Madrid'
 'New York State' 'Anderstorp' 'Ontario' 'Barcelona' 'Bru

> ## Constructors

In [25]:
df_constructors.head(5)

Unnamed: 0,constructorId,constructorRef,name,nationality,url
0,1,mclaren,McLaren,British,http://en.wikipedia.org/wiki/McLaren
1,2,bmw_sauber,BMW Sauber,German,http://en.wikipedia.org/wiki/BMW_Sauber
2,3,williams,Williams,British,http://en.wikipedia.org/wiki/Williams_Grand_Pr...
3,4,renault,Renault,French,http://en.wikipedia.org/wiki/Renault_in_Formul...
4,5,toro_rosso,Toro Rosso,Italian,http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso


> ## Drivers

In [26]:
df_drivers.head(5)

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url
0,1,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton
1,2,heidfeld,\N,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld
2,3,rosberg,6,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg
3,4,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso
4,5,kovalainen,\N,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen


> ## Races

In [37]:
df_races.tail(5)

Unnamed: 0,raceId,year,round,circuitId,name,date,time,url,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time
1096,1116,2023,18,69,United States Grand Prix,2023-10-22,19:00:00,https://en.wikipedia.org/wiki/2023_United_Stat...,2023-10-20,17:30:00,2023-10-21,18:00:00,\N,\N,2023-10-20,21:00:00,2023-10-21,22:00:00
1097,1117,2023,19,32,Mexico City Grand Prix,2023-10-29,20:00:00,https://en.wikipedia.org/wiki/2023_Mexico_City...,2023-10-27,18:30:00,2023-10-27,22:00:00,2023-10-28,17:30:00,2023-10-28,21:00:00,\N,\N
1098,1118,2023,20,18,São Paulo Grand Prix,2023-11-05,17:00:00,https://en.wikipedia.org/wiki/2023_S%C3%A3o_Pa...,2023-11-03,14:30:00,2023-11-04,14:30:00,\N,\N,2023-11-03,18:00:00,2023-11-04,18:30:00
1099,1119,2023,21,80,Las Vegas Grand Prix,2023-11-19,06:00:00,https://en.wikipedia.org/wiki/2023_Las_Vegas_G...,2023-11-17,04:30:00,2023-11-17,08:00:00,2023-11-18,04:30:00,2023-11-18,08:00:00,\N,\N
1100,1120,2023,22,24,Abu Dhabi Grand Prix,2023-11-26,13:00:00,https://en.wikipedia.org/wiki/2023_Abu_Dhabi_G...,2023-11-24,09:30:00,2023-11-24,13:00:00,2023-11-25,10:30:00,2023-11-25,14:00:00,\N,\N


In [42]:
print("Number of races by year (order from most to less): \n",)
print(df_races.groupby('year').count()['raceId'].sort_values(ascending=False))

Number of races by year (order from most to less): 

year
2023    22
2022    22
2021    22
2019    21
2018    21
        ..
1956     8
1957     8
1961     8
1955     7
1950     7
Name: raceId, Length: 74, dtype: int64


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Calcul des corrélations
correlation_matrix = df_qualifying.corr()

# Visualisation de la matrice de corrélation avec heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()
