# LIBRARIES

In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_columns', None)
pd.set_option('max_rows', 80)

In [2]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.multioutput import RegressorChain, MultiOutputRegressor

from sklearn.linear_model import RidgeCV, MultiTaskLasso, LinearRegression, ElasticNet, LassoCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

from sklearn.model_selection import cross_val_score, RepeatedKFold

from sklearn.metrics import mean_absolute_error, mean_squared_error

# DATA

In [3]:
df = pd.read_csv('Prepared Data/df.csv')
df = df.fillna(0)

In [4]:
year = 2018
idx_train = df['SEASON'] <= year
idx_test = df['SEASON'] > year

df_train = df.loc[idx_train, :]
df_test = df.loc[idx_test, :]

index_cols = ['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ABBREVIATION', 'SEASON']

df_train = df_train.set_index(index_cols)
df_test = df_test.set_index(index_cols)

In [5]:
label_cols = ['PTS', 'REB', 'AST']

X_train, X_test = df_train.drop(label_cols, axis=1), df_test.drop(label_cols, axis=1)
y_train, y_test = df_train[label_cols], df_test[label_cols]

# HELP METHODS

In [6]:
def evaluate(y, y_hat):
    outputs = []
    
    outputs.append(mean_absolute_error(y, y_hat)) # GENERAL MAE
    outputs.append(np.sqrt(mean_squared_error(y, y_hat))) # GENERAL RMSE
    
    for i in range(y.shape[1]):
        outputs.append(mean_absolute_error(y.iloc[:, i], y_hat[:, i])) #SPECIFIC OUTPUT MAE
        outputs.append(mean_squared_error(y.iloc[:, i], y_hat[:, i])) #SPECIFIC OUTPUT RMSE
        
    return outputs

# MODELS

In [7]:
model = MultiTaskLasso(alpha=0.1)

In [9]:
model.fit(X_train, y_train)
results = []

y_hat_train = model.predict(X_train)
y_hat_test = model.predict(X_test)

results.append(['TRAIN', model.__class__.__name__, *evaluate(y_train, y_hat_train)])
results.append(['TEST', model.__class__.__name__, *evaluate(y_test, y_hat_test)])
with open(f'{model.__class__.__name__}.pickle', 'wb') as f:
    pickle.dump(model, f)

In [10]:
results = pd.DataFrame(results, columns=['DATASET', 'ALGORITHM',
                                         'GENERAL_MAE', 'GENERAL_RMSE', 
                                         'PTS_MAE', 'PTS_RMSE', 
                                         'REB_MAE', 'REB_RMSE', 
                                         'AST_MAE', 'AST_RMSE'])

results

Unnamed: 0,DATASET,ALGORITHM,GENERAL_MAE,GENERAL_RMSE,PTS_MAE,PTS_RMSE,REB_MAE,REB_RMSE,AST_MAE,AST_RMSE
0,TRAIN,MultiTaskLasso,3.587445,5.100821,6.097992,59.145175,2.71948,12.339373,1.944864,6.57059
1,TEST,MultiTaskLasso,3.658488,5.322672,6.393761,66.439105,2.596172,11.650085,1.985532,6.903308
