# Setup

## Imports

In [28]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
tf.random.set_seed(100)

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

path = r"../../../data/raw/formula-1-world-championship-1950-2020/versions/24/"

## Reader Files

In [30]:
def create_dataframes_csv(path):
    frame = pd.read_csv(path)
    
    return frame

# Predicted Race Results

In [151]:
def prefix_columns(df, table_name, exclude=None):
    if exclude is None:
        exclude = []
    return df.rename(columns={
        col: f"{table_name}__{col}" if col not in exclude else col
        for col in df.columns
    })

drivers = prefix_columns(create_dataframes_csv(f"{path}drivers.csv"), "drivers")
constructors = prefix_columns(create_dataframes_csv(f"{path}constructors.csv"), "constructors")
constructor_standings = prefix_columns(create_dataframes_csv(f"{path}constructor_standings.csv"), "constructor_standings")
qualifying = prefix_columns(create_dataframes_csv(f"{path}qualifying.csv"), "qualifying")
circuits = prefix_columns(create_dataframes_csv(f"{path}circuits.csv"), "circuits")
results = prefix_columns(create_dataframes_csv(f"{path}results.csv"), "results")
pit_stops = prefix_columns(create_dataframes_csv(f"{path}pit_stops.csv"), "pit_stops")
races = prefix_columns(create_dataframes_csv(f"{path}races.csv"), "races")

drivers = drivers.drop(columns=[
    "drivers__url", "drivers__dob"
])

constructors = constructors.drop(columns=[
    "constructors__url"
])

constructor_standings = constructor_standings.drop(columns=[
    "constructor_standings__positionText"
])

circuits = circuits.drop(columns=[
    "circuits__url", "circuits__alt", "circuits__lng", "circuits__lng"
])

results = results.drop(columns=[
    "results__positionText"
])

races = races.drop(columns=[
    "races__url"
])

# Merging with clear suffix tracking
total = pd.merge(drivers, results, left_on="drivers__driverId", right_on="results__driverId")
total = pd.merge(total, races, left_on="results__raceId", right_on="races__raceId")
total = pd.merge(total, circuits, left_on="races__circuitId", right_on="circuits__circuitId")
total = pd.merge(total, qualifying, left_on=["races__raceId", "drivers__driverId"], right_on=["qualifying__raceId", "qualifying__driverId"])
total = pd.merge(total, pit_stops, left_on=["qualifying__raceId", "qualifying__driverId"], right_on=["pit_stops__raceId", "pit_stops__driverId"])
total = pd.merge(total, constructors, left_on="qualifying__constructorId", right_on="constructors__constructorId")
total = pd.merge(total, constructor_standings, left_on="constructors__constructorId", right_on="constructor_standings__constructorId")

total = total.drop(columns=[
    "drivers__driverId", "drivers__number",
    "results__driverId", "results__raceId", "results__constructorId", "results__statusId", "results__number", "results__driverId",
    "races__raceId", "races__circuitId",
    "circuits__circuitId",
    "qualifying__qualifyId", "qualifying__raceId", "qualifying__driverId", "qualifying__constructorId", "qualifying__number",
    "pit_stops__driverId",
    "constructors__constructorId",
    "constructor_standings__constructorStandingsId", "constructor_standings__raceId", "constructor_standings__constructorId"
])

total = total.drop_duplicates()

total


Unnamed: 0,drivers__driverRef,drivers__code,drivers__forename,drivers__surname,drivers__nationality,results__resultId,results__grid,results__position,results__positionOrder,results__points,...,pit_stops__lap,pit_stops__time,pit_stops__duration,pit_stops__milliseconds,constructors__constructorRef,constructors__name,constructors__nationality,constructor_standings__points,constructor_standings__position,constructor_standings__wins
0,hamilton,HAM,Lewis,Hamilton,British,20780,2,2,2,18.0,...,16,17:28:24,23.227,23227,mclaren,McLaren,British,14.0,1,1
1,hamilton,HAM,Lewis,Hamilton,British,20780,2,2,2,18.0,...,16,17:28:24,23.227,23227,mclaren,McLaren,British,24.0,1,1
2,hamilton,HAM,Lewis,Hamilton,British,20780,2,2,2,18.0,...,16,17:28:24,23.227,23227,mclaren,McLaren,British,28.0,3,1
3,hamilton,HAM,Lewis,Hamilton,British,20780,2,2,2,18.0,...,16,17:28:24,23.227,23227,mclaren,McLaren,British,34.0,3,1
4,hamilton,HAM,Lewis,Hamilton,British,20780,2,2,2,18.0,...,16,17:28:24,23.227,23227,mclaren,McLaren,British,42.0,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5020547,doohan,DOO,Jack,Doohan,Australian,26759,17,15,15,0.0,...,22,17:37:42,22.080,22080,alpine,Alpine F1 Team,French,14.0,9,0
5020548,doohan,DOO,Jack,Doohan,Australian,26759,17,15,15,0.0,...,22,17:37:42,22.080,22080,alpine,Alpine F1 Team,French,49.0,6,0
5020549,doohan,DOO,Jack,Doohan,Australian,26759,17,15,15,0.0,...,22,17:37:42,22.080,22080,alpine,Alpine F1 Team,French,49.0,7,0
5020550,doohan,DOO,Jack,Doohan,Australian,26759,17,15,15,0.0,...,22,17:37:42,22.080,22080,alpine,Alpine F1 Team,French,59.0,6,0


1746367
