# LIBRARIES

In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_columns', None)
pd.set_option('max_rows', 80)

In [2]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.multioutput import RegressorChain, MultiOutputRegressor

from sklearn.linear_model import RidgeCV, MultiTaskLasso, LinearRegression, ElasticNet, LassoCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

from sklearn.model_selection import cross_val_score, RepeatedKFold

from sklearn.metrics import mean_absolute_error, mean_squared_error

# DATA

In [3]:
df = pd.read_csv('Prepared Data/df.csv')
df = df.fillna(0)

In [4]:
year = 2018
idx_train = df['SEASON'] <= year
idx_test = df['SEASON'] > year

df_train = df.loc[idx_train, :]
df_test = df.loc[idx_test, :]

index_cols = ['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ABBREVIATION', 'SEASON']

df_train = df_train.set_index(index_cols)
df_test = df_test.set_index(index_cols)

In [5]:
label_cols = ['PTS', 'REB', 'AST']

X_train, X_test = df_train.drop(label_cols, axis=1), df_test.drop(label_cols, axis=1)
y_train, y_test = df_train[label_cols], df_test[label_cols]

# HELP METHODS

In [6]:
def evaluate(y, y_hat):
    outputs = []
    
    outputs.append(mean_absolute_error(y, y_hat)) # GENERAL MAE
    outputs.append(np.sqrt(mean_squared_error(y, y_hat))) # GENERAL RMSE
    
    for i in range(y.shape[1]):
        outputs.append(mean_absolute_error(y.iloc[:, i], y_hat[:, i])) #SPECIFIC OUTPUT MAE
        outputs.append(mean_squared_error(y.iloc[:, i], y_hat[:, i])) #SPECIFIC OUTPUT RMSE
        
    return outputs

# MODELS

In [7]:
models = [LinearRegression(), 
          DecisionTreeRegressor(max_depth=5), RandomForestRegressor(n_estimators=20), 
          GradientBoostingRegressor(n_estimators=20)]
orders = [[0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 0, 1], [2, 1, 0]]

In [8]:
results = []

for model in models:
    for order in orders:
        wrapper = RegressorChain(model, order=order)
        wrapper.fit(X_train, y_train)
        
        y_hat_train = wrapper.predict(X_train)
        y_hat_test = wrapper.predict(X_test)
        
        results.append(['TRAIN', f'{order}', model.__class__.__name__, *evaluate(y_train, y_hat_train)])
        results.append(['TEST', f'{order}', model.__class__.__name__, *evaluate(y_test, y_hat_test)])
        with open(f'{model.__class__.__name__}-{order}.pickle', 'wb') as f:
            pickle.dump(wrapper, f)

In [9]:
results = pd.DataFrame(results, columns=['DATASET', 'ORDER','ALGORITHM',
                                         'GENERAL_MAE', 'GENERAL_RMSE', 
                                         'PTS_MAE', 'PTS_RMSE', 
                                         'REB_MAE', 'REB_RMSE', 
                                         'AST_MAE', 'AST_RMSE'])

results

Unnamed: 0,DATASET,ORDER,ALGORITHM,GENERAL_MAE,GENERAL_RMSE,PTS_MAE,PTS_RMSE,REB_MAE,REB_RMSE,AST_MAE,AST_RMSE
0,TRAIN,"[0, 1, 2]",LinearRegression,3.5766,5.086739,6.081095,58.828647,2.710256,12.262307,1.938448,6.533783
1,TEST,"[0, 1, 2]",LinearRegression,3.652095,5.301737,6.37293,65.888077,2.600449,11.59003,1.982907,6.847135
2,TRAIN,"[0, 2, 1]",LinearRegression,3.5766,5.086739,6.081095,58.828647,2.710256,12.262307,1.938448,6.533783
3,TEST,"[0, 2, 1]",LinearRegression,3.652095,5.301737,6.37293,65.888077,2.600449,11.59003,1.982907,6.847135
4,TRAIN,"[1, 0, 2]",LinearRegression,3.5766,5.086739,6.081095,58.828647,2.710256,12.262307,1.938448,6.533783
5,TEST,"[1, 0, 2]",LinearRegression,3.652095,5.301737,6.37293,65.888077,2.600449,11.59003,1.982907,6.847135
6,TRAIN,"[1, 2, 0]",LinearRegression,3.5766,5.086739,6.081095,58.828647,2.710256,12.262307,1.938448,6.533783
7,TEST,"[1, 2, 0]",LinearRegression,3.652095,5.301737,6.37293,65.888077,2.600449,11.59003,1.982907,6.847135
8,TRAIN,"[2, 0, 1]",LinearRegression,3.5766,5.086739,6.081095,58.828647,2.710256,12.262307,1.938448,6.533783
9,TEST,"[2, 0, 1]",LinearRegression,3.652095,5.301737,6.37293,65.888077,2.600449,11.59003,1.982907,6.847135


In [10]:
results.to_csv('Results/RegressionChain_results.csv')