In [1]:
import numpy as np
import pandas as pd
import datetime
from sklearn.metrics import confusion_matrix, precision_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn import svm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor

In [2]:
np.set_printoptions(precision=4)

In [3]:
data = pd.read_csv('final_df_merged.csv')

In [4]:
df = data.copy()
df.podium = df.podium.map(lambda x: 1 if x == 1 else 0)

train = df[df.season<2020]
X_train = train.drop(['driver', 'podium'], axis = 1)
y_train = train.podium

scaler = StandardScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns = X_train.columns)

In [5]:
def score_classification(model):
    score = 0
    for circuit in df[df.season == 2020]['round'].unique():

        test = df[(df.season == 2020) & (df['round'] == circuit)]
        X_test = test.drop(['driver', 'podium'], axis = 1)
        y_test = test.podium

        #scaling
        X_test = pd.DataFrame(scaler.transform(X_test), columns = X_test.columns)

        # make predictions
        prediction_df = pd.DataFrame(model.predict_proba(X_test), columns = ['proba_0', 'proba_1'])
        prediction_df['actual'] = y_test.reset_index(drop = True)
        prediction_df.sort_values('proba_1', ascending = False, inplace = True)
        prediction_df.reset_index(inplace = True, drop = True)
        prediction_df['predicted'] = prediction_df.index
        prediction_df['predicted'] = prediction_df.predicted.map(lambda x: 1 if x == 0 else 0)

        score += precision_score(prediction_df.actual, prediction_df.predicted)

    model_score = score / df[df.season == 2020]['round'].unique().max()
    return model_score

In [6]:
comparison_dict ={'model':[],
                  'params': [],
                  'score': []}

## SVM Classifier

Assez lent à faire tourner

In [8]:
params={'gamma': np.logspace(-4, -1, 20),
        'C': np.logspace(-2, 1, 20),
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid']} 

for gamma in params['gamma']:
    for c in params['C']:
        for kernel in params['kernel']:
            model_params = (gamma, c, kernel)
            model = svm.SVC(probability = True, gamma = gamma, C = c, kernel = kernel )
            model.fit(X_train, y_train)
            
            model_score = score_classification(model)
            
            comparison_dict['model'].append('svm_classifier')
            comparison_dict['params'].append(model_params)
            comparison_dict['score'].append(model_score)

In [8]:
df

Unnamed: 0,season,round,driver,grid,points,podium,constructor_points,constructor_wins,constructor_standings_pos,driver_points,...,constructor_minardi,constructor_prost,constructor_red_bull,constructor_renault,constructor_sauber,constructor_team_lotus,constructor_toro_rosso,constructor_toyota,constructor_tyrrell,constructor_williams
0,1990,1,berger,1,0.0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1990,1,martini,2,0.0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1990,1,alesi,4,6.0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,1990,1,senna,5,9.0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1990,1,piquet,6,3.0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9979,2020,17,leclerc,12,0.0,0,131,0,6,98,...,0,0,0,0,0,0,0,0,0,0
9980,2020,17,vettel,13,0.0,0,131,0,6,33,...,0,0,0,0,0,0,0,0,0,0
9981,2020,17,giovinazzi,14,0.0,0,8,0,8,4,...,0,0,0,0,0,0,0,0,0,0
9982,2020,17,russell,16,0.0,0,0,0,10,3,...,0,0,0,0,0,0,0,0,0,1


In [9]:
print(comparison_dict)

{'model': ['svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier', 'svm_classifier',