In [5]:
import fastf1
import fastf1.plotting
from fastf1.ergast import Ergast
import pandas as pd
import numpy as np
import seaborn as sns
from datetime import timedelta

year = 2024
all_year_practice_data = pd.DataFrame()
for race_number in range(1, 25):
    practice_data_per_race = pd.DataFrame()
    total_diff_medium = 0
    total_diff_hard = 0
    soft_i = 0
    medium_i = 0
    hard_i = 0
    race_stints_list = []
    for free_practice_session in ('fp1', 'fp2', 'fp3'):
        try:
            practice_session = fastf1.get_session(year, race_number, free_practice_session)
            practice_session.load(weather=True)
            
            drivers = pd.unique(practice_session.laps['Driver'])

            QUAL_LAP_PARAM = 1.03
            RACE_LAP_PARAM = 1.13

            laps = practice_session.laps
            stints = laps[["Driver", "Stint", "Compound", "LapNumber"]]
            stints = stints.groupby(["Driver", "Stint", "Compound"]).agg(
                LapCount=('LapNumber', 'count'),
                FirstLap=('LapNumber', 'min'),
                LastLap=('LapNumber', 'max')
            ).reset_index()

            laps_soft = practice_session.laps.pick_quicklaps(QUAL_LAP_PARAM)

            transformed_laps_soft = laps_soft.copy()
            transformed_laps_soft.loc[:, "LapTime (s)"] = laps_soft["LapTime"].dt.total_seconds()

            soft_laptime = transformed_laps_soft[transformed_laps_soft['Compound'] == 'SOFT']["LapTime (s)"].median()
            medium_laptime = transformed_laps_soft[transformed_laps_soft['Compound'] == 'MEDIUM']["LapTime (s)"].median()

            if pd.notna(soft_laptime) and pd.notna(medium_laptime):
                diff_soft_medium = medium_laptime - soft_laptime
                total_diff_medium += diff_soft_medium
                soft_i +=1
                medium_i +=1
            
            #hard
            laps_hard = practice_session.laps.pick_quicklaps(RACE_LAP_PARAM)

            transformed_laps_hard = laps_hard.copy()
            transformed_laps_hard.loc[:, "LapTime (s)"] = laps_hard["LapTime"].dt.total_seconds()

            medium_laptime = transformed_laps_hard[transformed_laps_hard['Compound'] == 'MEDIUM']["LapTime (s)"].median()
            hard_laptime = transformed_laps_hard[transformed_laps_hard['Compound'] == 'HARD']["LapTime (s)"].median()

        
            if pd.notna(medium_laptime) and pd.notna(hard_laptime):
                diff_medium_hard = hard_laptime - medium_laptime
                total_diff_hard +=diff_medium_hard
                medium_i +=1
                hard_i +=1

            fastest_lap_session = practice_session.laps.pick_fastest()['LapTime']
            
            all_stints_list = []
            for driver in drivers:
                try:
                    driver_stints = stints[stints['Driver'] == driver].copy()
                    for index, row in driver_stints.iterrows():
                        driver_lap = practice_session.laps.pick_drivers(driver).pick_laps(range(int(row['FirstLap']), int(row['LastLap'])+1))
                        driver_lap.loc[:, 'LapTime'] = driver_lap['LapTime'].fillna(driver_lap['Sector1Time'] + driver_lap['Sector2Time'] + driver_lap['Sector3Time'])
                        driver_lap.loc[:, 'LapTime'] = driver_lap['LapTime'].fillna(driver_lap['Sector1Time'] + driver_lap['Sector2Time'] + driver_lap['Sector3Time'])
                        driver_lap.loc[:, 'LapTime'] = driver_lap['LapTime'].fillna(driver_lap['Time'] - driver_lap['PitOutTime'])

                        try:
                            fastest_lap_per_driver = timedelta(seconds = driver_lap['LapTime'].dt.total_seconds().min())
                        except:
                            fastest_lap_per_driver = fastest_lap_session * QUAL_LAP_PARAM
                        
                        quick_laps = driver_lap[driver_lap['LapTime'] < fastest_lap_per_driver * QUAL_LAP_PARAM]
                        avg_quick_laptime = quick_laps['LapTime'].median()
                        number_quick_laptime = len(quick_laps['LapTime'])

                        driver_stints.loc[index, 'race_number'] = race_number
                        driver_stints.loc[index, 'fastest_laptime'] = fastest_lap_per_driver
                        driver_stints.loc[index, 'number_quick_laptime'] = number_quick_laptime
                        driver_stints.loc[index, 'avg_quick_laptime'] = avg_quick_laptime
                        driver_stints.loc[index, 'fp_session'] = free_practice_session
                        driver_stints.loc[index, 'SpeedTotal'] = driver_lap['SpeedI1'].median() + driver_lap['SpeedI2'].median() +  driver_lap['SpeedFL'].median() + driver_lap['SpeedST'].median()

                        driver_stints['high_engine_mode'] = np.where(driver_stints['SpeedTotal'] > driver_stints['SpeedTotal'].quantile(q = [0.75]).values[0], 1, 0)
                        driver_stints['low_engine_mode'] = np.where(driver_stints['SpeedTotal'] < driver_stints['SpeedTotal'].quantile(q = [0.25]).values[0], 1, 0)
                
                    if not driver_stints.empty:
                        driver_stints['quali_sim'] = np.where((driver_stints['avg_quick_laptime'] < driver_stints['fastest_laptime'].min()*QUAL_LAP_PARAM) & (driver_stints['avg_quick_laptime'] <= fastest_lap_session * QUAL_LAP_PARAM) & (driver_stints['Compound'] != 'HARD'), 1, 0)
                        driver_stints['race_sim'] = np.where((driver_stints['avg_quick_laptime'] > driver_stints['fastest_laptime'].min()*QUAL_LAP_PARAM) & (driver_stints['avg_quick_laptime'] < driver_stints['fastest_laptime'].min()*RACE_LAP_PARAM), 1, 0)
                        all_stints_list.append(driver_stints)
                    
                    driver_stints = driver_stints.dropna()
                except:
                    print(f'No data for {driver}')

            if all_stints_list:
                drivers_stints = pd.concat(all_stints_list, ignore_index=True)
                practice_data_per_race = pd.concat([practice_data_per_race, drivers_stints], ignore_index=True)
            else:
                drivers_stints = pd.DataFrame()

            drivers_stints = drivers_stints.drop_duplicates()
            race_stints_list.append(practice_data_per_race)
        except:
            print('DataError: No practice')
            continue
    try:
        session = fastf1.get_session(year, race_number, 'Q')
        session.load(weather=True)

        q1, q2, q3 = session.laps.split_qualifying_sessions()
        is_nat = np.isnat(q1['LapTime'])
        q1 = q1[~is_nat]
        is_nat = np.isnat(q2['LapTime'])
        q2 = q2[~is_nat]
        is_nat = np.isnat(q3['LapTime'])
        q3 = q3[~is_nat]

        drivers_quali_result = []
            
        drivers = pd.unique(session.laps['Driver'])
        for driver in drivers:
            driver_quali_result = []
            driver_quali_result.append(year)
            driver_quali_result.append(race_number)
            driver_quali_result.append(session.event.EventFormat)
            driver_quali_result.append(driver)

            for subsession in [q1, q2, q3]:
                fastest_lap = subsession.pick_drivers(driver).pick_fastest()
                try:
                    driver_quali_result.append(fastest_lap['LapTime'])
                except TypeError:
                    driver_quali_result.append(np.nan)
            drivers_quali_result.append(driver_quali_result)
        df_quali_result = pd.DataFrame(drivers_quali_result, columns=['year', 'race_number', 'event_format', 'Driver', 'q1', 'q2', 'q3'])
        df_quali_result['Driver'] = df_quali_result['Driver'].astype(str)
        
        if race_stints_list:
            practice_data_per_race = pd.concat(race_stints_list, ignore_index=True)
        practice_data_per_race['Driver'] = practice_data_per_race['Driver'].astype(str)

        combined_df = practice_data_per_race.merge(df_quali_result, on=['Driver', 'race_number'])
        combined_df = combined_df[(combined_df['quali_sim'] == 1) | (combined_df['race_sim'] == 1)]
        #combined_df = pd.concat(race_stints_list, ignore_index=True)
        
        all_year_practice_data = pd.concat([all_year_practice_data, combined_df], ignore_index=True)
    except:
            print('DataError: No qualifying')
            continue
all_year_practice_data

core           INFO 	Loading data for Bahrain Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '2', '3', '4', '10', '11', '14', '16', '18', '20', '22', '23', '24', '27', '31', '44', '55', '63', '77', '81']
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return np.nanmean(a, axis, out=out, keepdims=keepd

DataError: No practice
DataError: No practice


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '4', '81', '16', '55', '63', '27', '77', '18', '3', '31', '23', '10', '24', '20', '44', '22', '2']
core           INFO 	Loading data for Miami Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
re

DataError: No practice
DataError: No practice


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '55', '11', '4', '81', '63', '44', '27', '22', '18', '10', '31', '23', '14', '77', '2', '3', '20', '24']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for positio

DataError: No practice
DataError: No practice


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '63', '55', '44', '16', '81', '11', '27', '31', '3', '20', '10', '22', '14', '23', '18', '77', '2', '24']
core           INFO 	Loading data for British Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data


DataError: No practice
DataError: No practice


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '55', '16', '81', '63', '10', '14', '20', '11', '22', '27', '31', '18', '30', '23', '43', '77', '44', '24']
core           INFO 	Loading data for Mexico City Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position

DataError: No practice
DataError: No practice


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '63', '22', '31', '30', '16', '23', '81', '14', '18', '77', '1', '11', '55', '10', '44', '50', '43', '27', '24']
core           INFO 	Loading data for Las Vegas Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_d

DataError: No practice
DataError: No practice


req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '63', '4', '81', '16', '44', '55', '14', '11', '20', '10', '24', '77', '22', '18', '23', '30', '27', '43', '31']
core           INFO 	Loading data for Abu Dhabi Grand Prix - Practice 1 [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_d

Unnamed: 0,Driver,Stint,Compound,LapCount,FirstLap,LastLap,race_number,fastest_laptime,number_quick_laptime,avg_quick_laptime,...,SpeedTotal,high_engine_mode,low_engine_mode,quali_sim,race_sim,year,event_format,q1,q2,q3
0,VER,1.0,MEDIUM,3,1.0,3.0,1.0,0 days 00:01:33.900000,1.0,0 days 00:01:33.900000,...,849.5,0,1,1,0,2024,conventional,0 days 00:01:30.031000,0 days 00:01:29.374000,0 days 00:01:29.179000
1,VER,2.0,MEDIUM,3,4.0,6.0,1.0,0 days 00:01:33.535000,1.0,0 days 00:01:33.535000,...,994.5,0,0,1,0,2024,conventional,0 days 00:01:30.031000,0 days 00:01:29.374000,0 days 00:01:29.179000
2,VER,3.0,MEDIUM,6,7.0,12.0,1.0,0 days 00:01:33.238000,1.0,0 days 00:01:33.238000,...,888.5,0,0,1,0,2024,conventional,0 days 00:01:30.031000,0 days 00:01:29.374000,0 days 00:01:29.179000
3,VER,4.0,MEDIUM,9,13.0,21.0,1.0,0 days 00:01:37.576000,6.0,0 days 00:01:37.767500,...,1033.0,1,0,0,1,2024,conventional,0 days 00:01:30.031000,0 days 00:01:29.374000,0 days 00:01:29.179000
4,SAR,2.0,MEDIUM,8,9.0,16.0,1.0,0 days 00:01:34.213000,3.0,0 days 00:01:34.717000,...,1027.0,0,0,1,0,2024,conventional,0 days 00:01:30.770000,NaT,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5825,BOT,5.0,SOFT,6,11.0,16.0,24.0,0 days 00:01:24.751000,1.0,0 days 00:01:24.751000,...,1012.0,1,0,1,0,2024,conventional,0 days 00:01:23.481000,0 days 00:01:23.341000,0 days 00:01:23.204000
5826,PIA,1.0,SOFT,3,1.0,3.0,24.0,0 days 00:01:24.945000,1.0,0 days 00:01:24.945000,...,997.5,0,0,1,0,2024,conventional,0 days 00:01:23.640000,0 days 00:01:23.199000,0 days 00:01:22.804000
5827,PIA,2.0,SOFT,4,4.0,7.0,24.0,0 days 00:01:25.196000,1.0,0 days 00:01:25.196000,...,963.0,0,0,1,0,2024,conventional,0 days 00:01:23.640000,0 days 00:01:23.199000,0 days 00:01:22.804000
5828,PIA,3.0,SOFT,3,8.0,10.0,24.0,0 days 00:01:23.433000,1.0,0 days 00:01:23.433000,...,1081.0,1,0,1,0,2024,conventional,0 days 00:01:23.640000,0 days 00:01:23.199000,0 days 00:01:22.804000


In [None]:
all_year_practice_data.to_csv(f'practice_data_{year}.csv', index_col = False)

In [1]:
import os 
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import math
import re
import datetime as dt

from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error as rmse
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.impute import SimpleImputer

matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (10, 6)
matplotlib.rcParams['figure.facecolor'] = "#949494"

In [46]:
def test_params(ModelClass, **params):
    model = ModelClass(**params).fit(X_train, Y_train)
    train_rmse = rmse(model.predict(X_train), Y_train)
    val_rmse = rmse(model.predict(X_val), Y_val)
    return train_rmse, val_rmse

def test_param_and_plot(ModelClass, param_name, param_values, **other_params):
    train_errors, val_errors = [], [] 
    for value in param_values:
        params = dict(other_params)
        params[param_name] = value
        train_rmse, val_rmse = test_params(ModelClass, **params)
        train_errors.append(train_rmse)
        val_errors.append(val_rmse)
    plt.figure(figsize=(10,6))
    plt.title('Overfitting curve: ' + param_name)
    plt.plot(param_values, train_errors, 'b-o')
    plt.plot(param_values, val_errors, 'r-o')
    plt.xlabel(param_name)
    plt.ylabel('RMSE')
    plt.legend(['Training', 'Validation'])

In [2]:
df_2019 = pd.read_csv('practice_data_2019.csv')
df_2020 = pd.read_csv('practice_data_2020.csv')
df_2021 = pd.read_csv('practice_data_2021.csv')
df_2022 = pd.read_csv('practice_data_2022.csv')
df_2023 = pd.read_csv('practice_data_2023.csv')
df_2024 = pd.read_csv('practice_data_2024.csv')

train = pd.concat([df_2019, df_2020, df_2021, df_2022, df_2023])
train_df = train.drop('Unnamed: 0', axis=1)
val_df = df_2024.drop('Unnamed: 0', axis=1).copy()

In [3]:
train_df.loc[:,'fastest_laptime']= pd.to_timedelta(train_df['fastest_laptime']).dt.total_seconds()
train_df.loc[:,'avg_quick_laptime']= pd.to_timedelta(train_df['avg_quick_laptime']).dt.total_seconds()
train_df.loc[:,'q1']= pd.to_timedelta(train_df['q1']).dt.total_seconds()
train_df.loc[:,'q2']= pd.to_timedelta(train_df['q2']).dt.total_seconds()
train_df.loc[:,'q3']= pd.to_timedelta(train_df['q3']).dt.total_seconds()

val_df.loc[:,'fastest_laptime']= pd.to_timedelta(val_df['fastest_laptime']).dt.total_seconds()
val_df.loc[:,'avg_quick_laptime']= pd.to_timedelta(val_df['avg_quick_laptime']).dt.total_seconds()
val_df.loc[:,'q1']= pd.to_timedelta(val_df['q1']).dt.total_seconds()
val_df.loc[:,'q2']= pd.to_timedelta(val_df['q2']).dt.total_seconds()
val_df.loc[:,'q3']= pd.to_timedelta(val_df['q3']).dt.total_seconds()

train_df['fastest_laptime'] = train_df['fastest_laptime'].astype(float)
train_df['avg_quick_laptime'] = train_df['avg_quick_laptime'].astype(float)
train_df['q1'] = train_df['q1'].astype(float)
train_df['q2'] = train_df['q2'].astype(float)
train_df['q3'] = train_df['q3'].astype(float)

val_df['fastest_laptime'] = val_df['fastest_laptime'].astype(float)
val_df['avg_quick_laptime'] = val_df['avg_quick_laptime'].astype(float)
val_df['q1'] = val_df['q1'].astype(float)
val_df['q2'] = val_df['q2'].astype(float)
val_df['q3'] = val_df['q3'].astype(float)

In [4]:
input_cols = ['Stint', 'Compound', 'LapCount', 'FirstLap', 'LastLap',
       'race_number', 'fastest_laptime', 'number_quick_laptime',
       'avg_quick_laptime', 'fp_session', 'SpeedTotal', 'high_engine_mode',
       'low_engine_mode', 'quali_sim', 'race_sim']

target_col = 'q1'

X_train = train_df[input_cols].copy()
Y_train = train_df[target_col].copy()

X_val = val_df[input_cols].copy()
Y_val = val_df[target_col].copy()

Y_train = pd.DataFrame(Y_train)
Y_val = pd.DataFrame(Y_val)

numeric_cols = X_train.select_dtypes(include=np.number).columns.tolist()
categorical_cols = X_train.select_dtypes('object').columns.tolist()

In [5]:
imputer = SimpleImputer(strategy='constant', fill_value=120).fit(X_train[numeric_cols])

X_train[numeric_cols] = imputer.transform(X_train[numeric_cols])
X_val[numeric_cols] = imputer.transform(X_val[numeric_cols])

imputer = SimpleImputer(strategy='constant', fill_value=120).fit(Y_train)

Y_train = imputer.transform(Y_train)
Y_val = imputer.transform(Y_val)

scaler = MinMaxScaler().fit(X_train[numeric_cols])

X_train[numeric_cols] = scaler.transform(X_train[numeric_cols])
X_val[numeric_cols] = scaler.transform(X_val[numeric_cols])

encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore').fit(X_train[categorical_cols])
encoded_cols = list(encoder.get_feature_names_out(categorical_cols))

X_train[encoded_cols] = encoder.transform(X_train[categorical_cols])
X_val[encoded_cols] = encoder.transform(X_val[categorical_cols])

train_df = X_train[numeric_cols + encoded_cols]
val_df = X_val[numeric_cols + encoded_cols]

numeric_cols_df = train_df.select_dtypes(include=np.number).columns.tolist()
categorical_cols_df = train_df.select_dtypes('object').columns.tolist()

In [70]:
best_params ={
    'random_state': 42,
    'n_jobs': -1,
    'n_estimators': 10000,
    'criterion' : 'squared_error',
    'max_depth' : 10,
    'min_samples_split' : 16,
    'min_samples_leaf' : 8,
    'max_features' : 'sqrt',
    }

In [71]:
X_train, Y_train = train_df, Y_train
X_val, Y_val = val_df, Y_val

model = RandomForestRegressor(**best_params)
model.fit(X_train, Y_train)
train_preds = model.predict(X_train)
val_preds = model.predict(X_val)

train_rmse = rmse(Y_train, train_preds)
val_rmse = rmse(Y_val, val_preds)

print(train_rmse)
print(val_rmse)

  return fit_method(estimator, *args, **kwargs)


4.700972889754322
4.703543182780879


In [80]:
normalized_val_rmse = val_rmse/(val_preds.max()-val_preds.min())
normalized_val_rmse

np.float64(0.09988659725562207)

In [81]:
val_preds

array([91.35088375, 91.13684888, 90.94589945, ..., 84.42933382,
       83.47278541, 83.14885113], shape=(5830,))

In [82]:
Y_val

array([[90.031],
       [90.031],
       [90.031],
       ...,
       [83.64 ],
       [83.64 ],
       [83.64 ]], shape=(5830, 1))

In [84]:
train

Unnamed: 0.1,Unnamed: 0,Driver,Stint,Compound,LapCount,FirstLap,LastLap,race_number,fastest_laptime,number_quick_laptime,...,SpeedTotal,high_engine_mode,low_engine_mode,quali_sim,race_sim,year,event_format,q1,q2,q3
0,0,RIC,4.0,SOFT,5,7.0,11.0,1.0,0 days 00:01:25.634000,2.0,...,1081.5,0,0,1,0,2019,conventional,0 days 00:01:22.921000,0 days 00:01:22.570000,
1,1,NOR,6.0,SOFT,10,22.0,31.0,1.0,0 days 00:01:30.238000,7.0,...,1113.0,0,0,0,1,2019,conventional,0 days 00:01:22.702000,0 days 00:01:22.423000,0 days 00:01:22.304000
2,2,VET,2.0,SOFT,6,2.0,7.0,1.0,0 days 00:01:26.067000,2.0,...,1068.0,0,0,0,1,2019,conventional,0 days 00:01:22.885000,0 days 00:01:21.912000,0 days 00:01:21.190000
3,3,VET,4.0,SOFT,3,9.0,11.0,1.0,0 days 00:01:23.637000,1.0,...,1038.0,0,1,1,0,2019,conventional,0 days 00:01:22.885000,0 days 00:01:21.912000,0 days 00:01:21.190000
4,4,VET,6.0,SOFT,6,13.0,18.0,1.0,0 days 00:01:28.596000,3.0,...,1088.0,1,0,0,1,2019,conventional,0 days 00:01:22.885000,0 days 00:01:21.912000,0 days 00:01:21.190000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4880,4880,BOT,1.0,SOFT,6,1.0,6.0,22.0,0 days 00:01:26.044000,2.0,...,904.5,0,1,1,0,2023,conventional,0 days 00:01:24.788000,,
4881,4881,BOT,2.0,SOFT,7,7.0,13.0,22.0,0 days 00:01:25.420000,2.0,...,940.5,1,0,1,0,2023,conventional,0 days 00:01:24.788000,,
4882,4882,PIA,1.0,SOFT,3,1.0,3.0,22.0,0 days 00:01:25.675000,1.0,...,1012.5,1,0,1,0,2023,conventional,0 days 00:01:24.487000,0 days 00:01:24.278000,0 days 00:01:23.782000
4883,4883,PIA,2.0,SOFT,3,4.0,6.0,22.0,0 days 00:01:25.395000,1.0,...,1001.5,0,0,1,0,2023,conventional,0 days 00:01:24.487000,0 days 00:01:24.278000,0 days 00:01:23.782000
