In [56]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, SVR
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.ensemble import BaggingRegressor, BaggingClassifier, GradientBoostingClassifier, GradientBoostingRegressor

In [57]:
df_train = pd.read_csv('./data/train_pre.csv').drop_duplicates()
df_test = pd.read_csv('./data/test_pre.csv').drop_duplicates()

df_train.columns

Index(['raceId', 'year', 'circuitId', 'weather_warm', 'weather_cold',
       'weather_dry', 'weather_wet', 'weather_cloudy', 'driverId',
       'constructorId', 'grid', 'results_positionOrder', 'circuit_country',
       'constructor_wins', 'constructor_nationality', 'driver_nationality',
       'driver_wins', 'driver_age', 'results_points'],
      dtype='object')

In [58]:
def RF_classifier(X_train, Y_train):
    model = RandomForestClassifier()
    model.fit(X_train, Y_train)

    return model

In [59]:
def get_score(pred, actual, margin):
    score = 0
    for i in range(len(pred)):
        if pred[i] == actual[i]:
            score += 1
        elif abs(pred[i] - actual[i]) <= margin:
            score += 1
    return score/len(pred)

In [71]:
def get_RF_classifier_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = RF_classifier(X_train.drop(columns = ['results_positionOrder']), Y_train)

    score = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))        
        actual = Y_test.to_numpy()        
        # print('Race # ' + str(i) + ': prediction:' + str(np.argmin(prediction)) + ' actual: ' + str(np.argmin(actual)))
        predicted_winner = np.argmin(prediction)
        actual_winner = np.argmin(actual)
        print(str(prediction) + " " + str(actual))
        # print("Race No: " + str(i))
        # print("Predicted: " + str(predicted_winner) + ' ' + str(X_test['driverId'].iloc[predicted_winner]))
        # print("Actual: " + str(actual_winner))
        # print()
        score += get_score(prediction, actual, 0)

    # return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique())
    return score/len(df_test['raceId'].unique())

In [72]:
get_RF_classifier_Score()

[ 2  8 18  4 11 11 16  5  1  7 10 13  3  5  9 15 13 20 11 13] [ 2  8 17  4 13 19 18  6  1  7 10 20  3  5  9 16 12 11 14 15]
[ 1  9 14  3  8  7 11 13  2 12 17 12  4  5 13 11 15  6 10 17] [ 1  9 17  3  8  7 11 13  2 20 19 14  4  5 12 16 18  6 10 15]
[ 1  7 17  5 10 12 17 17  2 19 12 11  4  3 14 14  6  8  9 16] [ 1  7 16  5 10 18 20 13  2 17 12 19  4  3 14 15  6  8  9 11]
[ 1 14 17  4 20 11 10  7  2 20  9  8  3  5 14 17 13  6 20 16] [ 1 14 18  4 15 12 10  7  2 13  9  8  3  5 19 17 20  6 11 16]
[ 1 11 18  2 12  9 10 11  3 20  7  6  4 18 12 14 12  5  8 15] [ 1 17 18  2 12  9 10 14  3 13  7  6  4 20 16 15 11  5  8 19]
[ 1  8 18 12 20  7 16 15  2 10  9  6  5  3 12 12 19  4 13 14] [ 1  8 15 16 17  7 19 20  2 10  9  6  5  3 13 14 11  4 12 18]
[ 1  7 18  3 14 14 16 20  8 17 11  5  2  4 14 18  9  6 10 17] [ 1  7 19  3 11 14 20 13  8 12 15  5  2  4 17 16  9  6 10 18]
[ 2 17 20  4  6 19 11 13  3  8  7 12 19  1 10 19 16  9  5 14] [ 2 16 17  4  6 14 13 12  3  8  7 19 20  1 10 15 11  9  5 18]
[ 3 20 1

0.5428571428571429

In [73]:
def logisticRegression(X_train, Y_train):
    model = LogisticRegression(max_iter=2500, multi_class = 'ovr')
    model.fit(X_train, Y_train)

    return model

In [74]:
def get_Logistic_Regression_Score():
    X_train = df_train
    Y_train = df_train['results_positionOrder']
    model = logisticRegression(X_train.drop(columns = ['results_positionOrder']), Y_train)

    score = 0
    for i in df_test['raceId'].unique():
        X_test = df_test[df_test['raceId'] == i]
        Y_test = df_test[df_test['raceId'] == i]['results_positionOrder']

        prediction = model.predict(X_test.drop(columns = ['results_positionOrder']))
        actual = Y_test.to_numpy()

        predicted_winner = np.argmin(prediction)
        actual_winner = np.argmin(actual)

        print("Race No: " + str(i))
        print("Predicted: " + str(predicted_winner) + ' ' + str(X_test['driverId'].iloc[predicted_winner]))
        print("Actual: " + str(actual_winner))
        print()
        
        score += get_score(prediction, actual, 3)

    # return precision/len(df_test['raceId'].unique()), accuracy/len(df_test['raceId'].unique()), recall/len(df_test['raceId'].unique()), f1/len(df_test['raceId'].unique())
    return score/len(df_test['raceId'].unique())

In [75]:
get_Logistic_Regression_Score()

Race No: 1010
Predicted: 8 2.7150975121010124
Actual: 8

Race No: 1012
Predicted: 0 5.030494044842969
Actual: 0

Race No: 1011
Predicted: 0 5.030494044842969
Actual: 0

Race No: 1014
Predicted: 0 5.030494044842969
Actual: 0

Race No: 1015
Predicted: 0 5.030494044842969
Actual: 0

Race No: 1019
Predicted: 0 5.030494044842969
Actual: 0

Race No: 1021
Predicted: 0 5.030494044842969
Actual: 0

Race No: 1022
Predicted: 0 5.030494044842969
Actual: 13

Race No: 1023
Predicted: 0 5.030494044842969
Actual: 13

Race No: 1024
Predicted: 3 3.6352554824694607
Actual: 3

Race No: 1026
Predicted: 0 5.030494044842969
Actual: 8

Race No: 1029
Predicted: 12 4.0099426190107215
Actual: 12

Race No: 1030
Predicted: 0 5.030494044842969
Actual: 0

Race No: 1016
Predicted: 0 5.030494044842969
Actual: 0

Race No: 1020
Predicted: 10 -0.06787930668749181
Actual: 12

Race No: 1018
Predicted: 12 4.0099426190107215
Actual: 12

Race No: 1027
Predicted: 0 5.030494044842969
Actual: 0

Race No: 1017
Predicted: 0 5.0304

0.7190476190476189