### Import all the required Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

### Load all required datasets

In [3]:
import pandas as pd
import os

path = "overall/"

circuits = pd.read_csv(os.path.join(path, "circuits.csv"))
constructor_results = pd.read_csv(os.path.join(path, "constructor_results.csv"))
constructor_standings = pd.read_csv(os.path.join(path, "constructor_standings.csv"))
constructors = pd.read_csv(os.path.join(path, "constructors.csv"))
driver_standings = pd.read_csv(os.path.join(path, "driver_standings.csv"))
drivers = pd.read_csv(os.path.join(path, "drivers.csv"))
lap_times = pd.read_csv(os.path.join(path, "lap_times.csv"))
pit_stops = pd.read_csv(os.path.join(path, "pit_stops.csv"))
qualifying = pd.read_csv(os.path.join(path, "qualifying.csv"))
races = pd.read_csv(os.path.join(path, "races.csv"))
results = pd.read_csv(os.path.join(path, "results.csv"))
seasons = pd.read_csv(os.path.join(path, "seasons.csv"))
sprint_results = pd.read_csv(os.path.join(path, "sprint_results.csv"))
status = pd.read_csv(os.path.join(path, "status.csv"))

### Replace unwanted values

In [4]:
drivers = drivers.replace(r"\N", np.nan, regex=False)
races = races.replace(r"\N", np.nan, regex=False)
results = results.replace(r"\N", np.nan, regex=False)

### Convert columns wherever necessary

In [5]:
# driver table
drivers["number"] = pd.to_numeric(drivers["number"])
drivers["dob"] = pd.to_datetime(drivers["dob"], errors='coerce')


# races table
races_date_cols = ["date", "fp1_date", "fp2_date", "fp3_date", "quali_date", "sprint_date"]
races_time_cols = ["time", "fp1_time", "fp2_time", "fp3_time", "quali_time", "sprint_time"]

# Convert date columns
for col in races_date_cols:
    races[col] = pd.to_datetime(races[col], errors="coerce")

# Combine date + time into full datetime columns
for d_col, t_col in zip(races_date_cols, races_time_cols):
    new_col = d_col.replace("_date", "") + "_datetime"
    races[new_col] = pd.to_datetime(
        races[d_col].astype(str) + " " + races[t_col].astype(str),
        errors="coerce"
    )

# Drop original columns
races.drop(columns=races_time_cols, inplace=True)
races.drop(columns=races_date_cols, inplace=True)

# Rename main race datetime column
races.rename(columns={"date_datetime": "race_datetime"}, inplace=True)


# Results table
# Numeric with missing values → nullable integers
int_cols = ["number", "position", "milliseconds", "fastestLap", "rank"]
for col in int_cols:
    results[col] = pd.to_numeric(results[col], errors="coerce").astype("Int64")

# Float columns
results["fastestLapSpeed"] = pd.to_numeric(results["fastestLapSpeed"], errors="coerce")

# Convert fastest lap time to timedelta
def parse_lap_time(t):
    if pd.isna(t):
        return pd.NaT
    try:
        m, s = t.split(":")
        total_seconds = int(m) * 60 + float(s)
        return pd.to_timedelta(total_seconds, unit="s")
    except:
        return pd.NaT

results["fastestLapTime"] = results["fastestLapTime"].apply(parse_lap_time)

# Optional: convert positionText to category
results["positionText"] = results["positionText"].astype("category")

  races[new_col] = pd.to_datetime(
  races[new_col] = pd.to_datetime(
  races[new_col] = pd.to_datetime(
  races[new_col] = pd.to_datetime(
  races[new_col] = pd.to_datetime(


### Convert columns wherever reqired

In [6]:
drivers

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url
0,1,hamilton,44.0,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton
1,2,heidfeld,,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld
2,3,rosberg,6.0,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg
3,4,alonso,14.0,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso
4,5,kovalainen,,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen
...,...,...,...,...,...,...,...,...,...
859,861,colapinto,43.0,COL,Franco,Colapinto,2003-05-27,Argentinian,http://en.wikipedia.org/wiki/Franco_Colapinto
860,862,doohan,61.0,DOO,Jack,Doohan,2003-01-20,Australian,http://en.wikipedia.org/wiki/Jack_Doohan
861,863,antonelli,12.0,ANT,Andrea Kimi,Antonelli,2006-08-25,Italian,https://en.wikipedia.org/wiki/Andrea_Kimi_Anto...
862,864,bortoleto,5.0,BOR,Gabriel,Bortoleto,2004-10-14,Brazilian,https://en.wikipedia.org/wiki/Gabriel_Bortoleto


In [8]:
drivers["Driver Name"] = drivers["forename"] + " " + drivers["surname"]
drivers

Unnamed: 0,driverId,driverRef,number,code,forename,surname,dob,nationality,url,Driver Name
0,1,hamilton,44.0,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,Lewis Hamilton
1,2,heidfeld,,HEI,Nick,Heidfeld,1977-05-10,German,http://en.wikipedia.org/wiki/Nick_Heidfeld,Nick Heidfeld
2,3,rosberg,6.0,ROS,Nico,Rosberg,1985-06-27,German,http://en.wikipedia.org/wiki/Nico_Rosberg,Nico Rosberg
3,4,alonso,14.0,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso,Fernando Alonso
4,5,kovalainen,,KOV,Heikki,Kovalainen,1981-10-19,Finnish,http://en.wikipedia.org/wiki/Heikki_Kovalainen,Heikki Kovalainen
...,...,...,...,...,...,...,...,...,...,...
859,861,colapinto,43.0,COL,Franco,Colapinto,2003-05-27,Argentinian,http://en.wikipedia.org/wiki/Franco_Colapinto,Franco Colapinto
860,862,doohan,61.0,DOO,Jack,Doohan,2003-01-20,Australian,http://en.wikipedia.org/wiki/Jack_Doohan,Jack Doohan
861,863,antonelli,12.0,ANT,Andrea Kimi,Antonelli,2006-08-25,Italian,https://en.wikipedia.org/wiki/Andrea_Kimi_Anto...,Andrea Kimi Antonelli
862,864,bortoleto,5.0,BOR,Gabriel,Bortoleto,2004-10-14,Brazilian,https://en.wikipedia.org/wiki/Gabriel_Bortoleto,Gabriel Bortoleto


In [9]:
drivers["Driver Name"]

0             Lewis Hamilton
1              Nick Heidfeld
2               Nico Rosberg
3            Fernando Alonso
4          Heikki Kovalainen
               ...          
859         Franco Colapinto
860              Jack Doohan
861    Andrea Kimi Antonelli
862        Gabriel Bortoleto
863             Isack Hadjar
Name: Driver Name, Length: 864, dtype: object