In [61]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [62]:
try:
    df = pd.read_csv('../data/complete_user_data.csv', index_col=0)
except FileNotFoundError as fnfError:
    print(fnfError)

In [63]:
def addFeatures(df):
    df['win_ratio'] = df['wins']/df['battles']
    df['lose_ratio'] = df['losses']/df['battles']
    df['accuracy'] = df['hits']/df['shots']
    df['efficiency'] = df['piercings']/df['hits']
    df['frags_per_battle'] = df['frags']/df['battles']
    df['cap_points_per_battle'] = df['capture_points']/df['battles']
    df['damage_dealt_per_battle'] = df['damage_dealt']/df['battles']
    df['survival_rate'] = df['survived_battles']/df['battles']
    df['spots_per_battle'] = df['spotted']/df['battles']
    
    return df

df = addFeatures(df)
df.columns

Index(['spotted', 'battles_on_stunning_vehicles', 'hits', 'battle_avg_xp',
       'draws', 'max_xp', 'survived_battles', 'wins', 'losses',
       'capture_points', 'battles', 'damage_dealt', 'damage_received',
       'max_frags', 'shots', 'frags', 'max_damage', 'xp',
       'avg_damage_assisted', 'piercings', 'user_id', 'nickname', 'wn8',
       'win_ratio', 'lose_ratio', 'accuracy', 'efficiency', 'frags_per_battle',
       'cap_points_per_battle', 'damage_dealt_per_battle', 'survival_rate',
       'spots_per_battle'],
      dtype='object')

In [64]:
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df, test_size=0.2)

In [65]:
def dropIrrelevantCols(df, cols):
    return df.drop(columns=cols)
df_train = dropIrrelevantCols(df_train, ['user_id', 'nickname', 'battles_on_stunning_vehicles', 'hits', 'draws', 'wins', 'losses', 'capture_points', 'piercings', 'shots', 'max_frags', 'max_damage', 'max_xp', 'spotted', 'damage_dealt', 'damage_received'])
df_test = dropIrrelevantCols(df_test, ['user_id', 'nickname', 'battles_on_stunning_vehicles', 'hits', 'draws', 'wins', 'losses', 'capture_points', 'piercings', 'shots', 'max_frags', 'max_xp', 'max_damage', 'spotted', 'damage_dealt', 'damage_received'])

In [66]:
def convertColsToFloat(df, cols):
    for column in cols:
        try:
            df[column] = df[column].astype(float)
        except:
            df[column] = df[column].str.replace(',', '.')
            df[column] = df[column].astype(float)
            
    return df
df_train = convertColsToFloat(df_train, list(df_train.columns))
df_test = convertColsToFloat(df_test, list(df_test.columns))

In [67]:
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.compose import make_column_transformer

X_train, y_train = df_train.drop(columns=['wn8']), df_train['wn8']
X_test, y_test = df_test.drop(columns=['wn8']), df_test['wn8']
numericalAttribs = X_train.select_dtypes(np.number)
numericalAttribsNames = list(numericalAttribs.columns)
numericalPipeline = make_pipeline(SimpleImputer(strategy='median'),
                                  StandardScaler())

preprocessing = make_column_transformer((numericalPipeline, numericalAttribsNames))

In [68]:
df_train.columns

Index(['battle_avg_xp', 'survived_battles', 'battles', 'frags', 'xp',
       'avg_damage_assisted', 'wn8', 'win_ratio', 'lose_ratio', 'accuracy',
       'efficiency', 'frags_per_battle', 'cap_points_per_battle',
       'damage_dealt_per_battle', 'survival_rate', 'spots_per_battle'],
      dtype='object')

In [69]:
numericalAttribsNames

['battle_avg_xp',
 'survived_battles',
 'battles',
 'frags',
 'xp',
 'avg_damage_assisted',
 'win_ratio',
 'lose_ratio',
 'accuracy',
 'efficiency',
 'frags_per_battle',
 'cap_points_per_battle',
 'damage_dealt_per_battle',
 'survival_rate',
 'spots_per_battle']

In [70]:
from sklearn.metrics import mean_absolute_error, max_error
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Lasso, ElasticNet, Ridge, LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

def trainAndScore(models, X_train, X_test, y_train, y_test):
    modelsScore = {}
    for name, model in models.items():
        metricsScore = {}
        model.fit(X_train, y_train)
        
        meanAbsErr = abs(cross_val_score(model, X_test, y_test, scoring='neg_mean_absolute_error', cv=30)).mean()
        maxErr = abs(cross_val_score(model, X_test, y_test, scoring='max_error', cv=30)).mean()
        metricsScore.update({'Mean Absolute Error':meanAbsErr})
        metricsScore.update({'Max Error':maxErr})
        
        modelsScore.update({name:metricsScore})
    
    return modelsScore

def applyPreprocessing(model, preprocessing):
    modelWithPreprocessing = make_pipeline(preprocessing,
                                           model)
    return modelWithPreprocessing

candidateModels = {'Lasso':applyPreprocessing(Lasso(), preprocessing),
                   'Liner Regression':applyPreprocessing(LinearRegression(), preprocessing),
                   'Elastic Net':applyPreprocessing(ElasticNet(), preprocessing),
                   'Random Forest':applyPreprocessing(RandomForestRegressor(), preprocessing),
                   'Ridge':applyPreprocessing(Ridge(), preprocessing),
                   'SVR (Linear)':applyPreprocessing(SVR(kernel='linear'), preprocessing),
                   'SVR (rbf)':applyPreprocessing(SVR(kernel='rbf'), preprocessing)}

scores = trainAndScore(candidateModels, X_train, X_test, y_train, y_test)

In [71]:
scores

{'Lasso': {'Mean Absolute Error': 60.97934781798683,
  'Max Error': 182.90182349057434},
 'Liner Regression': {'Mean Absolute Error': 62.58050308145586,
  'Max Error': 193.1744171754146},
 'Elastic Net': {'Mean Absolute Error': 90.38558025079,
  'Max Error': 258.35142546035974},
 'Random Forest': {'Mean Absolute Error': 76.48059181481487,
  'Max Error': 233.59364666666684},
 'Ridge': {'Mean Absolute Error': 61.99712098973652,
  'Max Error': 190.06967722448292},
 'SVR (Linear)': {'Mean Absolute Error': 96.36069659274592,
  'Max Error': 277.2355699590953},
 'SVR (rbf)': {'Mean Absolute Error': 417.21717774103416,
  'Max Error': 1048.1515139928833}}

In [72]:
df_train.columns

Index(['battle_avg_xp', 'survived_battles', 'battles', 'frags', 'xp',
       'avg_damage_assisted', 'wn8', 'win_ratio', 'lose_ratio', 'accuracy',
       'efficiency', 'frags_per_battle', 'cap_points_per_battle',
       'damage_dealt_per_battle', 'survival_rate', 'spots_per_battle'],
      dtype='object')