# LIBRARIES

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [82]:
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_columns', None)
pd.set_option('max_rows', 80)



# DATA

In [84]:
df = pd.read_csv('Prepared Data/df.csv')
df = df.fillna(0)

In [85]:
year = 2018
idx_train = df['SEASON'] <= year
idx_test = df['SEASON'] > year

df_train = df.loc[idx_train, :]
df_test = df.loc[idx_test, :]

index_cols = ['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ABBREVIATION', 'SEASON']

df_train = df_train.set_index(index_cols)
df_test = df_test.set_index(index_cols)

In [86]:
label_cols = ['PTS', 'REB', 'AST']
X_train, X_test = df_train.drop(label_cols, axis=1), df_test.drop(label_cols, axis=1)
y_train, y_test = df_train[label_cols], df_test[label_cols]

print(X_train.shape)
print(y_train.shape)

(248306, 63)
(248306, 3)


In [87]:
scaler_X = StandardScaler().fit(X_train)
scaler_y = StandardScaler().fit(y_train)

X_train = pd.DataFrame(scaler_X.transform(X_train), columns=X_train.columns)
X_test = pd.DataFrame(scaler_X.transform(X_test), columns=X_test.columns)
y_train_norm = pd.DataFrame(scaler_y.transform(y_train), columns=y_train.columns)
y_test_norm = pd.DataFrame(scaler_y.transform(y_test), columns=y_test.columns)

In [88]:
X_train = torch.from_numpy(np.array(X_train).astype(np.float32))
X_test = torch.from_numpy(np.array(X_test).astype(np.float32))
y_train = torch.from_numpy(np.array(y_train_norm).astype(np.float32))
y_test = torch.from_numpy(np.array(y_test_norm).astype(np.float32))

In [89]:
X_train.shape[1]

63

# NEURAL NETWORK

In [90]:
class ANN(nn.Module):
    def __init__(self, input_size=63, output_size=3, layers=[120,84]):
        super().__init__()
        self.fc1 = nn.Linear(input_size,layers[0])
        self.fc2 = nn.Linear(layers[0],layers[1])
        self.fc3 = nn.Linear(layers[1],output_size)
    
    def forward(self, X):
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = self.fc3(X)
        return X

In [91]:
torch.manual_seed(35)

<torch._C.Generator at 0x21b6cc8d8a0>

In [92]:
model = ANN()
model

ANN(
  (fc1): Linear(in_features=63, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=3, bias=True)
)

In [93]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [94]:
epochs = 100
losses = []

for i in range(epochs):
    y_hat = model(X_train)
    loss = criterion(y_hat, y_train)
    losses.append(loss)
    
 
    if i % 10 == 0:
        print(f'Epoch: {i} Loss: {loss}')
 
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epoch: 0 Loss: 1.01754629611969
Epoch: 10 Loss: 0.9360349178314209
Epoch: 20 Loss: 0.9271999597549438
Epoch: 30 Loss: 0.9235746264457703
Epoch: 40 Loss: 0.9207262396812439
Epoch: 50 Loss: 0.9181305766105652
Epoch: 60 Loss: 0.9155824780464172
Epoch: 70 Loss: 0.9148122072219849
Epoch: 80 Loss: 0.9119161367416382
Epoch: 90 Loss: 0.9097720980644226


In [95]:
def evaluate(y, y_hat):
    outputs = []
    
    outputs.append(mean_absolute_error(y, y_hat)) # GENERAL MAE
    outputs.append(np.sqrt(mean_squared_error(y, y_hat))) # GENERAL RMSE
    
    for i in range(y.shape[1]):
        outputs.append(mean_absolute_error(y.iloc[:, i], y_hat.iloc[:, i])) #SPECIFIC OUTPUT MAE
        outputs.append(mean_squared_error(y.iloc[:, i], y_hat.iloc[:, i])) #SPECIFIC OUTPUT RMSE
        
    return outputs

In [100]:
y_train_hat_pd = pd.DataFrame(scaler_y.inverse_transform(model(X_train).detach().numpy()))
y_train_pd = pd.DataFrame(scaler_y.inverse_transform(y_train.detach().numpy()))

y_train_hat_pd

Unnamed: 0,0,1,2
0,11.521053,5.423691,2.390883
1,11.165410,5.124151,2.656630
2,10.690843,5.372534,2.457751
3,11.302227,4.873906,3.105391
4,10.974297,4.875594,3.022850
...,...,...,...
248301,11.087193,4.258136,1.434515
248302,15.866229,4.832871,4.569745
248303,15.695960,6.103225,4.267400
248304,16.077885,6.715473,3.784178


In [101]:
res = pd.DataFrame(pd.DataFrame(evaluate(y_train_pd, y_train_hat_pd)).T)
res.columns = ['GENERAL_MAE', 'GENERAL_RMSE', 'PTS_MAE', 'PTS_RMSE', 'REB_MAE', 'REB_RMSE', 'AST_MAE', 'AST_RMSE']
res

Unnamed: 0,GENERAL_MAE,GENERAL_RMSE,PTS_MAE,PTS_RMSE,REB_MAE,REB_RMSE,AST_MAE,AST_RMSE
0,3.536658,5.033592,6.022465,57.68449,2.685231,11.993486,1.902313,6.337399


In [102]:
y_test_hat_pd = pd.DataFrame(scaler_y.inverse_transform(model(X_test).detach().numpy()))
y_test_pd = pd.DataFrame(scaler_y.inverse_transform(y_test.detach().numpy()))

In [103]:
res = pd.DataFrame(pd.DataFrame(evaluate(y_test_pd, y_test_hat_pd)).T)
res.columns = ['GENERAL_MAE', 'GENERAL_RMSE', 'PTS_MAE', 'PTS_RMSE', 'REB_MAE', 'REB_RMSE', 'AST_MAE', 'AST_RMSE']
res

Unnamed: 0,GENERAL_MAE,GENERAL_RMSE,PTS_MAE,PTS_RMSE,REB_MAE,REB_RMSE,AST_MAE,AST_RMSE
0,3.65979,5.295354,6.382733,65.749237,2.620887,11.557782,1.975768,6.81554
