In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, SVR
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.ensemble import BaggingRegressor, BaggingClassifier, AdaBoostRegressor, AdaBoostClassifier

In [2]:
df_train = pd.read_csv('../data_new/qualifying_train_pre.csv').drop_duplicates()
df_test = pd.read_csv('../data_new/qualifying_test_pre.csv').drop_duplicates()

In [3]:
df_train.columns

Index(['raceId', 'year', 'circuitId', 'weather_warm', 'weather_cold',
       'weather_dry', 'weather_wet', 'weather_cloudy', 'driverId',
       'constructorId', 'grid', 'results_positionOrder', 'circuit_country',
       'constructor_wins', 'constructor_nationality', 'driver_nationality',
       'driver_wins', 'driver_age', 'Unnamed: 0', 'qualifying_position',
       'q1_time_ms', 'q2_time_ms', 'q3_time_ms', 'results_points'],
      dtype='object')

## Regression

In [4]:
def get_score(actual, pred, margin):
    index = np.argmin(pred)
    if actual[index] <= margin:
        return 1
    return 0

In [5]:
def get_arrays(prediction, actual):
    temp_actual, temp_predicton = [], []

    for i in prediction:
        if i<=3:
            temp_predicton.append(1)
        else:
            temp_predicton.append(0)
    
    for i in actual:
        if(i<=3):
            temp_actual.append(1)
        else:
            temp_actual.append(0)
    

    return temp_actual, temp_predicton

In [6]:
def get_podium(prediced_scores):
    d = {}
    for i in range(len(prediced_scores)):
        d[prediced_scores[i]] = i

    prediced_scores[::-1].sort()

    podium = np.zeros(len(prediced_scores))

    for i in range(len(prediced_scores)):
        podium[d[prediced_scores[i]]] = i + 1
    
    return podium

### Ridge Regression

In [7]:
def ridgeRegression(X_train, Y_train):
    model = Ridge(alpha = 1000, solver = 'svd')
    model.fit(X_train, Y_train)        

    return model

In [8]:
def get_Ridge_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = ridgeRegression(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Lasso Regression

In [9]:
def lassoRegression(X_train, Y_train):
    model = Lasso(alpha = 0.1, selection = 'cyclic')
    model.fit(X_train, Y_train)        

    return model

In [10]:
def get_Lasso_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = lassoRegression(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Decision Tree regressor

In [11]:
def DTregressor(X_train, Y_train):
    model = DecisionTreeRegressor()
    model.fit(X_train, Y_train)

    return model

In [12]:
def get_DT_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = DTregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Random Forest regressor

In [13]:
def RFregressor(X_train, Y_train):
    model = RandomForestRegressor(max_depth = 10, min_samples_split = 10, n_estimators = 100)
    model.fit(X_train, Y_train)

    return model

In [14]:
def get_RF_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = RFregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Support Vector regressor

In [15]:
def SVregressor(X_train, Y_train):
    model = SVR()
    model.fit(X_train, Y_train)

    return model

In [16]:
def get_SV_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = SVregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Multilayer Perceptron regressor

In [17]:
def MLPregressor(X_train, Y_train):
    model = MLPRegressor(activation = 'logistic', batch_size = 64, hidden_layer_sizes = (256, 32), learning_rate_init = 0.001)
    model.fit(X_train, Y_train)

    return model

In [18]:
def get_MLP_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = MLPregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Bagging regressor

In [19]:
def Baggingregressor(X_train, Y_train):
    model = BaggingRegressor(base_estimator = RandomForestRegressor(max_depth = 10, min_samples_split = 10, n_estimators = 100))
    model.fit(X_train, Y_train)

    return model

In [20]:
def get_Bagging_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = Baggingregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### AdaBoost regressor

In [21]:
def AdaBoostingregressor(X_train, Y_train):
    model = AdaBoostRegressor(base_estimator = RandomForestRegressor(max_depth = 10, min_samples_split = 10, n_estimators = 100))
    model.fit(X_train, Y_train)

    return model

In [22]:
def get_Ada_Boosting_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = AdaBoostingregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

## Compiling all regressions

In [23]:
regression_scores = {}
regression_scores['Ridge Regression'] = get_Ridge_Regression_Score()
regression_scores['Lasso Regression'] = get_Lasso_Regression_Score()
regression_scores['DT Regression'] = get_DT_Regression_Score()
regression_scores['RF Regression'] = get_RF_Regression_Score()
regression_scores['SV Regression'] = get_SV_Regression_Score()
regression_scores['MLP Regression'] = get_MLP_Regression_Score()
regression_scores['Bagging Regression'] = get_Bagging_Regression_Score()
regression_scores['Adaboost Regression'] = get_Ada_Boosting_Regression_Score()

In [24]:
pd.DataFrame(regression_scores, index = ['Precision', 'Accuracy', 'Recall', 'F1', 'winner_score', 'podium_score'])

Unnamed: 0,Ridge Regression,Lasso Regression,DT Regression,RF Regression,SV Regression,MLP Regression,Bagging Regression,Adaboost Regression
Precision,0.579235,0.584699,0.144311,0.566081,0.393443,0.129505,0.617486,0.599727
Accuracy,0.885471,0.886838,0.332445,0.867723,0.832936,0.31651,0.895971,0.890675
Recall,0.579235,0.584699,0.770492,0.612022,0.393443,0.726776,0.617486,0.601093
F1,0.579235,0.584699,0.242682,0.582227,0.393443,0.219252,0.617486,0.600312
winner_score,0.508197,0.52459,0.655738,0.622951,0.180328,0.672131,0.639344,0.672131
podium_score,0.836066,0.836066,0.868852,0.918033,0.377049,0.688525,0.918033,0.934426


## Classification

### Logistic Regression

In [25]:
def logisticRegression(X_train, Y_train):
    model = LogisticRegression(max_iter=2500, multi_class = 'ovr')
    model.fit(X_train, Y_train)

    return model

In [26]:
def get_Logistic_Regression_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = logisticRegression(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Naive Bayes Classifier

In [27]:
def gaussianNB(X_train, Y_train):
    model = GaussianNB()
    model.fit(X_train, Y_train)

    return model

In [28]:
def get_Gaussian_NB_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = gaussianNB(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Decision Tree Classifier

In [29]:
def DT_classifier(X_train, Y_train):
    model = DecisionTreeClassifier()
    model.fit(X_train, Y_train)

    return model

In [30]:
def get_DT_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = DT_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Random Forest Classifier

In [31]:
def RF_classifier(X_train, Y_train):
    model = RandomForestClassifier(max_depth=20, min_samples_split=10, n_estimators=100)
    model.fit(X_train, Y_train)

    return model

In [32]:
def get_RF_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = RF_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Support Vector Classifier

In [33]:
def SV_classifier(X_train, Y_train):
    model = SVC()
    model.fit(X_train, Y_train)

    return model

In [34]:
def get_SV_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = SV_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Multilayer Perceptron Classifier

In [35]:
def MLP_classifier(X_train, Y_train):
    model = MLPClassifier(activation = 'identity', alpha = 0.001, hidden_layer_sizes = (256, 32), solver = 'adam')
    model.fit(X_train, Y_train)

    return model

In [36]:
def get_MLP_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = MLP_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Bagging Classifier

In [37]:
def Bagging_classifier(X_train, Y_train):
    model = BaggingClassifier(base_estimator = RandomForestClassifier(max_depth=20, min_samples_split=10, n_estimators=100))
    model.fit(X_train, Y_train)

    return model

In [38]:
def get_Bagging_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = Bagging_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Ada Boosting Classifier

In [39]:
def Ada_Boosting_classifier(X_train, Y_train):
    model = AdaBoostClassifier(base_estimator = RandomForestClassifier(max_depth=20, min_samples_split=10, n_estimators=100))
    model.fit(X_train, Y_train)

    return model

In [40]:
def get_Ada_Boosting_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = Ada_Boosting_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

## Compiling all classifications

In [41]:
classification_scores = {}
classification_scores['Logistic Regression'] = get_Logistic_Regression_Score()
classification_scores['NB classification'] = get_Gaussian_NB_Score()
classification_scores['DT classification'] = get_DT_classifier_Score()
classification_scores['RF classification'] = get_RF_classifier_Score()
classification_scores['SV classification'] = get_SV_classifier_Score()
classification_scores['MLP classification'] = get_MLP_classifier_Score()
classification_scores['Bagging classification'] = get_Bagging_classifier_Score()
classification_scores['Ada Boosting classification'] = get_Ada_Boosting_classifier_Score()



In [42]:
pd.DataFrame(classification_scores, index = ['precision', 'accuracy', 'recall', 'f1', 'winner_score', 'podium_score'])

Unnamed: 0,Logistic Regression,NB classification,DT classification,RF classification,SV classification,MLP classification,Bagging classification,Ada Boosting classification
precision,0.47057,0.626503,0.600976,0.564754,0.295869,0.544575,0.554372,0.601366
accuracy,0.846893,0.893347,0.871394,0.880611,0.67881,0.873894,0.879153,0.891117
recall,0.781421,0.639344,0.568306,0.726776,0.945355,0.765027,0.721311,0.73224
f1,0.583893,0.615379,0.555438,0.625332,0.450147,0.62502,0.620336,0.645498
winner_score,0.819672,0.901639,0.442623,0.770492,0.983607,0.131148,0.803279,0.344262
podium_score,0.934426,0.918033,0.754098,0.95082,0.983607,0.934426,0.95082,0.95082
