# LIBRARIES

In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_columns', None)
pd.set_option('max_rows', 80)

In [2]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.multioutput import RegressorChain, MultiOutputRegressor

from sklearn.linear_model import RidgeCV, MultiTaskLasso, LinearRegression, ElasticNet, LassoCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

from sklearn.model_selection import cross_val_score, RepeatedKFold

from sklearn.metrics import mean_absolute_error, mean_squared_error

# DATA

In [3]:
df = pd.read_csv('Prepared Data/df.csv')
df = df.fillna(0)

In [4]:
year = 2018
idx_train = df['SEASON'] <= year
idx_test = df['SEASON'] > year

df_train = df.loc[idx_train, :]
df_test = df.loc[idx_test, :]

index_cols = ['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ABBREVIATION', 'SEASON']

df_train = df_train.set_index(index_cols)
df_test = df_test.set_index(index_cols)

In [5]:
label_cols = ['PTS', 'REB', 'AST']

X_train, X_test = df_train.drop(label_cols, axis=1), df_test.drop(label_cols, axis=1)
y_train, y_test = df_train[label_cols], df_test[label_cols]

# HELP METHODS

In [6]:
def evaluate(y, y_hat):
    outputs = []
    
    outputs.append(mean_absolute_error(y, y_hat)) # GENERAL MAE
    outputs.append(np.sqrt(mean_squared_error(y, y_hat))) # GENERAL RMSE
    
    for i in range(y.shape[1]):
        outputs.append(mean_absolute_error(y.iloc[:, i], y_hat[:, i])) #SPECIFIC OUTPUT MAE
        outputs.append(mean_squared_error(y.iloc[:, i], y_hat[:, i])) #SPECIFIC OUTPUT RMSE
        
    return outputs

# MODELS

In [7]:
# models = [LinearRegression(), LassoCV(), RidgeCV(), 
#           DecisionTreeRegressor(), RandomForestRegressor(), 
#           GradientBoostingRegressor(), MLPRegressor()]

models = [LinearRegression(), 
          DecisionTreeRegressor(max_depth=5), RandomForestRegressor(n_estimators=20), 
          GradientBoostingRegressor(n_estimators=20)]

In [9]:
results = []

for model in models:
    wrapper = MultiOutputRegressor(model)
    wrapper.fit(X_train, y_train)
    
    y_hat_train = wrapper.predict(X_train)
    y_hat_test = wrapper.predict(X_test)
    
    results.append(['TRAIN', model.__class__.__name__, *evaluate(y_train, y_hat_train)])
    results.append(['TEST', model.__class__.__name__, *evaluate(y_test, y_hat_test)])
    with open(f'{model.__class__.__name__}.pickle', 'wb') as f:
        pickle.dump(wrapper, f)

In [10]:
results = pd.DataFrame(results, columns=['DATASET', 'ALGORITHM',
                                         'GENERAL_MAE', 'GENERAL_RMSE', 
                                         'PTS_MAE', 'PTS_RMSE', 
                                         'REB_MAE', 'REB_RMSE', 
                                         'AST_MAE', 'AST_RMSE'])

results

Unnamed: 0,DATASET,ALGORITHM,GENERAL_MAE,GENERAL_RMSE,PTS_MAE,PTS_RMSE,REB_MAE,REB_RMSE,AST_MAE,AST_RMSE
0,TRAIN,LinearRegression,3.5766,5.086739,6.081095,58.828647,2.710256,12.262307,1.938448,6.533783
1,TEST,LinearRegression,3.652095,5.301737,6.37293,65.888077,2.600449,11.59003,1.982907,6.847135
2,TRAIN,DecisionTreeRegressor,3.588403,5.107114,6.107057,59.351787,2.721307,12.369616,1.936844,6.526441
3,TEST,DecisionTreeRegressor,3.663994,5.34604,6.402384,67.03578,2.608578,11.748095,1.981019,6.956568
4,TRAIN,RandomForestRegressor,1.421213,2.092533,2.40977,9.94699,1.080349,2.084286,0.77352,1.10481
5,TEST,RandomForestRegressor,3.833822,5.465946,6.60412,69.53133,2.765079,12.609399,2.132267,7.488959
6,TRAIN,GradientBoostingRegressor,3.591443,5.105641,6.111108,59.298196,2.724637,12.373824,1.938585,6.530704
7,TEST,GradientBoostingRegressor,3.663865,5.354123,6.409072,67.334332,2.609584,11.742501,1.97294,6.923078


In [11]:
results.to_csv('Results/MultiOutputRegressor_results.csv')