In [356]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, SVR
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.ensemble import BaggingRegressor, BaggingClassifier, AdaBoostRegressor, AdaBoostClassifier

In [357]:
df_train = pd.read_csv('../data_new/train_pre.csv').drop_duplicates()
df_test = pd.read_csv('../data_new/test_pre.csv').drop_duplicates()

In [358]:
df_train.columns

Index(['raceId', 'year', 'circuitId', 'weather_warm', 'weather_cold',
       'weather_dry', 'weather_wet', 'weather_cloudy', 'driverId',
       'constructorId', 'grid', 'results_positionOrder', 'circuit_country',
       'constructor_wins', 'constructor_nationality', 'driver_nationality',
       'driver_wins', 'driver_age', 'results_points'],
      dtype='object')

## Regression

In [359]:
def get_score(actual, pred, margin):
    index = np.argmin(pred)
    if actual[index] <= margin:
        return 1
    return 0

In [360]:
def get_arrays(prediction, actual):
    temp_actual, temp_predicton = [], []

    for i in prediction:
        if i<=3:
            temp_predicton.append(1)
        else:
            temp_predicton.append(0)
    
    for i in actual:
        if(i<=3):
            temp_actual.append(1)
        else:
            temp_actual.append(0)
    

    return temp_actual, temp_predicton

In [361]:
def get_podium(prediced_scores):
    d = {}
    for i in range(len(prediced_scores)):
        d[prediced_scores[i]] = i

    prediced_scores[::-1].sort()

    podium = np.zeros(len(prediced_scores))

    for i in range(len(prediced_scores)):
        podium[d[prediced_scores[i]]] = i + 1
    
    return podium

### Ridge Regression

In [362]:
def ridgeRegression(X_train, Y_train):
    model = Ridge(alpha = 1000, solver = 'svd')
    model.fit(X_train, Y_train)        

    return model

In [363]:
def get_Ridge_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = ridgeRegression(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Lasso Regression

In [364]:
def lassoRegression(X_train, Y_train):
    model = Lasso(alpha = 0.1, selection = 'cyclic')
    model.fit(X_train, Y_train)        

    return model

In [365]:
def get_Lasso_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = lassoRegression(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Decision Tree regressor

In [366]:
def DTregressor(X_train, Y_train):
    model = DecisionTreeRegressor()
    model.fit(X_train, Y_train)

    return model

In [367]:
def get_DT_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = DTregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Random Forest regressor

In [368]:
def RFregressor(X_train, Y_train):
    model = RandomForestRegressor(max_depth = 10, min_samples_split = 10, n_estimators = 100)
    model.fit(X_train, Y_train)

    return model

In [369]:
def get_RF_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = RFregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Support Vector regressor

In [370]:
def SVregressor(X_train, Y_train):
    model = SVR()
    model.fit(X_train, Y_train)

    return model

In [371]:
def get_SV_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = SVregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Multilayer Perceptron regressor

In [372]:
def MLPregressor(X_train, Y_train):
    model = MLPRegressor(activation = 'logistic', batch_size = 64, hidden_layer_sizes = (256, 32), learning_rate_init = 0.001)
    model.fit(X_train, Y_train)

    return model

In [373]:
def get_MLP_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = MLPregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Bagging regressor

In [374]:
def Baggingregressor(X_train, Y_train):
    model = BaggingRegressor(base_estimator = RandomForestRegressor(max_depth = 10, min_samples_split = 10, n_estimators = 100))
    model.fit(X_train, Y_train)

    return model

In [375]:
def get_Bagging_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = Baggingregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### AdaBoost regressor

In [376]:
def AdaBoostingregressor(X_train, Y_train):
    model = AdaBoostRegressor(base_estimator = RandomForestRegressor(max_depth = 10, min_samples_split = 10, n_estimators = 100))
    model.fit(X_train, Y_train)

    return model

In [377]:
def get_Ada_Boosting_Regression_Score():
    X_train = df_train.drop(columns = ['results_points'])
    Y_train = df_train['results_points']
    model = AdaBoostingregressor(X_train.drop(columns = ['results_positionOrder']), Y_train)

    precision = 0
    accuracy = 0
    recall = 0
    f1 = 0
    score1 = 0
    score2 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i].drop(columns = ['results_points'])
        Y_test = df_test[df_test['raceId'] == i]['results_points'].to_numpy()

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        prediction = get_podium(prediction)
        actual = X_test['results_positionOrder'].to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

## Compiling all regressions

In [378]:
regression_scores = {}
regression_scores['Ridge Regression'] = get_Ridge_Regression_Score()
regression_scores['Lasso Regression'] = get_Lasso_Regression_Score()
regression_scores['DT Regression'] = get_DT_Regression_Score()
regression_scores['RF Regression'] = get_RF_Regression_Score()
regression_scores['SV Regression'] = get_SV_Regression_Score()
regression_scores['MLP Regression'] = get_MLP_Regression_Score()
regression_scores['Bagging Regression'] = get_Bagging_Regression_Score()
regression_scores['Adaboost Regression'] = get_Ada_Boosting_Regression_Score()

In [379]:
pd.DataFrame(regression_scores, index = ['Precision', 'Accuracy', 'Recall', 'F1', 'winner_score', 'podium_score'])

Unnamed: 0,Ridge Regression,Lasso Regression,DT Regression,RF Regression,SV Regression,MLP Regression,Bagging Regression,Adaboost Regression
Precision,0.563333,0.565833,0.147795,0.587452,0.553333,0.586667,0.60769,0.59869
Accuracy,0.886854,0.887604,0.308191,0.88827,0.884037,0.893967,0.895775,0.892358
Recall,0.563333,0.566667,0.86,0.6325,0.553333,0.586667,0.629167,0.6325
F1,0.563333,0.56619,0.250445,0.60367,0.553333,0.586667,0.614873,0.609963
winner_score,0.46,0.45,0.59,0.53,0.49,0.48,0.52,0.55
podium_score,0.8,0.8,0.88,0.89,0.83,0.86,0.91,0.91


## Classification

### Logistic Regression

In [380]:
def logisticRegression(X_train, Y_train):
    model = LogisticRegression(max_iter=2500, multi_class = 'ovr')
    model.fit(X_train, Y_train)

    return model

In [381]:
def get_Logistic_Regression_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = logisticRegression(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Naive Bayes Classifier

In [382]:
def gaussianNB(X_train, Y_train):
    model = GaussianNB()
    model.fit(X_train, Y_train)

    return model

In [383]:
def get_Gaussian_NB_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = gaussianNB(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Decision Tree Classifier

In [384]:
def DT_classifier(X_train, Y_train):
    model = DecisionTreeClassifier()
    model.fit(X_train, Y_train)

    return model

In [385]:
def get_DT_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = DT_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Random Forest Classifier

In [386]:
def RF_classifier(X_train, Y_train):
    model = RandomForestClassifier(max_depth=20, min_samples_split=10, n_estimators=100)
    model.fit(X_train, Y_train)

    return model

In [387]:
def get_RF_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = RF_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Support Vector Classifier

In [388]:
def SV_classifier(X_train, Y_train):
    model = SVC()
    model.fit(X_train, Y_train)

    return model

In [389]:
def get_SV_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = SV_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Multilayer Perceptron Classifier

In [390]:
def MLP_classifier(X_train, Y_train):
    model = MLPClassifier(activation = 'identity', alpha = 0.001, hidden_layer_sizes = (256, 32), solver = 'adam')
    model.fit(X_train, Y_train)

    return model

In [391]:
def get_MLP_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = MLP_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Bagging Classifier

In [392]:
def Bagging_classifier(X_train, Y_train):
    model = BaggingClassifier(base_estimator = RandomForestClassifier(max_depth=20, min_samples_split=10, n_estimators=100))
    model.fit(X_train, Y_train)

    return model

In [393]:
def get_Bagging_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = Bagging_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

### Ada Boosting Classifier

In [394]:
def Ada_Boosting_classifier(X_train, Y_train):
    model = AdaBoostClassifier(base_estimator = RandomForestClassifier(max_depth=20, min_samples_split=10, n_estimators=100))
    model.fit(X_train, Y_train)

    return model

In [395]:
def get_Ada_Boosting_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = Ada_Boosting_classifier(X_train.drop(columns = ['results_positionOrder', 'results_points']), Y_train)

    score1 = 0
    score2 = 0
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder', 'results_points']))
        actual = Y_test.to_numpy()
        temp_actual, temp_prediction = get_arrays(prediction, actual)
        score1 += get_score(prediction, actual, 1)
        score2 += get_score(prediction, actual, 3)
        
        
        accuracy += accuracy_score(temp_actual, temp_prediction)
        precision += precision_score(temp_actual, temp_prediction)
        recall += recall_score(temp_actual, temp_prediction)
        f1 += f1_score(temp_actual, temp_prediction)

    return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique()), score1/len(df_test['raceId'].unique()), score2/len(df_test['raceId'].unique())

## Compiling all classifications

In [396]:
classification_scores = {}
classification_scores['Logistic Regression'] = get_Logistic_Regression_Score()
classification_scores['NB classification'] = get_Gaussian_NB_Score()
classification_scores['DT classification'] = get_DT_classifier_Score()
classification_scores['RF classification'] = get_RF_classifier_Score()
classification_scores['SV classification'] = get_SV_classifier_Score()
classification_scores['MLP classification'] = get_MLP_classifier_Score()
classification_scores['Bagging classification'] = get_Bagging_classifier_Score()
classification_scores['Ada Boosting classification'] = get_Ada_Boosting_classifier_Score()

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [397]:
pd.DataFrame(classification_scores, index = ['precision', 'accuracy', 'recall', 'f1', 'winner_score', 'podium_score'])

Unnamed: 0,Logistic Regression,NB classification,DT classification,RF classification,SV classification,MLP classification,Bagging classification,Ada Boosting classification
precision,0.518321,0.518619,0.559667,0.559143,0.0,0.540952,0.577262,0.529357
accuracy,0.887179,0.883279,0.870949,0.882999,0.866636,0.894698,0.888123,0.877971
recall,0.566667,0.596667,0.4825,0.741667,0.0,0.536667,0.758333,0.735
f1,0.502431,0.516849,0.483516,0.62181,0.0,0.499238,0.639452,0.605913
winner_score,0.55,0.61,0.41,0.86,0.0,0.52,0.88,0.77
podium_score,0.72,0.78,0.69,0.94,0.0,0.72,0.95,0.94
