In [19]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, precision_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn import svm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor

np.set_printoptions(precision=4)

In [None]:
data = pd.read_csv('final_df.csv')

In [21]:
data.tail()

Unnamed: 0,season,round,weather_warm,weather_cold,weather_dry,weather_wet,weather_cloudy,driver,grid,podium,...,constructor_minardi,constructor_prost,constructor_red_bull,constructor_renault,constructor_sauber,constructor_team_lotus,constructor_toro_rosso,constructor_toyota,constructor_tyrrell,constructor_williams
16274,2023,22,True,False,False,False,False,ricciardo,15,11,...,False,False,False,False,False,False,False,False,False,False
16275,2023,22,True,False,False,False,False,sainz,16,18,...,False,False,False,False,False,False,False,False,False,False
16276,2023,22,True,False,False,False,False,kevin_magnussen,17,20,...,False,False,False,False,False,False,False,False,False,False
16277,2023,22,True,False,False,False,False,bottas,18,19,...,False,False,False,False,False,False,False,False,False,False
16278,2023,22,True,False,False,False,False,zhou,19,17,...,False,False,False,False,False,False,False,False,False,False


In [23]:
df = data.copy()

train = df[df.season<2023]
X_train = train.drop(['driver', 'podium'], axis = 1)
y_train = train.podium

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

In [None]:
def score_regression(model):
    score = 0
    for circuit in df[df.season == 2023]['round'].unique():

        test = df[(df.season == 2023) & (df['round'] == circuit)]
        X_test = test.drop(['driver', 'podium'], axis = 1)
        y_test = test.podium

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict(X_test), columns = ['results'])
        prediction_df['podium'] = y_test.reset_index(drop = True)
        prediction_df['actual'] = prediction_df.podium.map(lambda x: 1 if x == 1 else 0)
        prediction_df.sort_values('results', ascending = True, inplace = True)
        prediction_df.reset_index(inplace = True, drop = True)
        prediction_df['predicted'] = prediction_df.index
        prediction_df['predicted'] = prediction_df.predicted.map(lambda x: 1 if x == 0 else 0)

        score += precision_score(prediction_df.actual, prediction_df.predicted)

    model_score = score / df[df.season == 2023]['round'].unique().max()
    return model_score

In [25]:
comparison_dict ={'model':[],
                  'params': [],
                  'score': []}

In [None]:
best_score = -np.inf  
best_model = None

params={'hidden_layer_sizes': [(80,20,40,5), (75,30,50,10,3)], 
        'activation': ['relu'], 
        'solver': ['adam'], 
        'alpha': np.logspace(-4,1,20)} 

for hidden_layer_sizes in params['hidden_layer_sizes']:
    for activation in params['activation']:
        for solver in params['solver']:
            for alpha in params['alpha']:
                model_params = (hidden_layer_sizes, activation, solver, alpha )
                model = MLPRegressor(hidden_layer_sizes = hidden_layer_sizes,
                                      activation = activation, 
                                      solver = solver, 
                                      alpha = alpha, 
                                      random_state = 1)
                model.fit(X_train, y_train)

                model_score = score_regression(model)

                comparison_dict['model'].append('nn_regressor')
                comparison_dict['params'].append(model_params)
                comparison_dict['score'].append(model_score)

                if model_score > best_score:
                    best_score = model_score
                    best_model = model


if best_model:
    joblib.dump(best_model, 'nn_regressor_weight.pkl')
    print(f"Best model saved with score: {best_score}")
else:
    print("No model was saved.")



Best model saved with score: 0.7727272727272727


