# LIBRARIES

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import mean_absolute_error, mean_squared_error

In [3]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_columns', None)
pd.set_option('max_rows', 80)

# DATA

In [4]:
df = pd.read_csv('Prepared Data/df.csv')
df = df.fillna(0)

In [5]:
year = 2018
idx_train = df['SEASON'] <= year
idx_test = df['SEASON'] > year

df_train = df.loc[idx_train, :]
df_test = df.loc[idx_test, :]

index_cols = ['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ABBREVIATION', 'SEASON']

df_train = df_train.set_index(index_cols)
df_test = df_test.set_index(index_cols)

In [6]:
label_cols = ['PTS', 'REB', 'AST']
X_train, X_test = df_train.drop(label_cols, axis=1), df_test.drop(label_cols, axis=1)
y_train, y_test = df_train[label_cols], df_test[label_cols]

print(X_train.shape)
print(y_train.shape)

(248306, 63)
(248306, 3)


In [7]:
scaler_X = StandardScaler().fit(X_train)
scaler_y = StandardScaler().fit(y_train)

X_train = pd.DataFrame(scaler_X.transform(X_train), columns=X_train.columns)
X_test = pd.DataFrame(scaler_X.transform(X_test), columns=X_test.columns)
y_train_norm = pd.DataFrame(scaler_y.transform(y_train), columns=y_train.columns)
y_test_norm = pd.DataFrame(scaler_y.transform(y_test), columns=y_test.columns)

In [8]:
X_train = torch.from_numpy(np.array(X_train).astype(np.float32))
X_test = torch.from_numpy(np.array(X_test).astype(np.float32))
y_train = torch.from_numpy(np.array(y_train_norm).astype(np.float32))
y_test = torch.from_numpy(np.array(y_test_norm).astype(np.float32))

In [9]:
X_train.shape[1]

63

# NEURAL NETWORK

In [10]:
class ANN(nn.Module):
    def __init__(self, input_size=63, output_size=3, layers=[120,84]):
        super().__init__()
        self.fc1 = nn.Linear(input_size,layers[0])
        self.fc2 = nn.Linear(layers[0],layers[1])
        self.fc3 = nn.Linear(layers[1],output_size)
        self.dropout = nn.Dropout(0.25)
    
    def forward(self, X):
        X = F.relu(self.fc1(X))
        X = self.dropout(X)
        X = F.relu(self.fc2(X))
        X = self.dropout(X)
        X = self.fc3(X)
        return X

In [11]:
torch.manual_seed(35)

<torch._C.Generator at 0x24b64b49410>

In [12]:
model = ANN()
model

ANN(
  (fc1): Linear(in_features=63, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=3, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
)

In [13]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [14]:
epochs = 100
losses = []

for i in range(epochs):
    y_hat = model(X_train)
    loss = criterion(y_hat, y_train)
    losses.append(loss)
    
 
    if i % 10 == 0:
        print(f'Epoch: {i} Loss: {loss}')
 
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
with open(f'{model.__class__.__name__}.pickle', 'wb') as f:
            pickle.dump(model, f)

Epoch: 0 Loss: 1.0252125263214111
Epoch: 10 Loss: 0.9461822509765625
Epoch: 20 Loss: 0.9370214343070984
Epoch: 30 Loss: 0.9317667484283447
Epoch: 40 Loss: 0.9283778667449951
Epoch: 50 Loss: 0.9264545440673828
Epoch: 60 Loss: 0.9244831204414368
Epoch: 70 Loss: 0.923098087310791
Epoch: 80 Loss: 0.9216331243515015
Epoch: 90 Loss: 0.9206502437591553


In [15]:
def evaluate(y, y_hat):
    outputs = []
    
    outputs.append(mean_absolute_error(y, y_hat)) # GENERAL MAE
    outputs.append(np.sqrt(mean_squared_error(y, y_hat))) # GENERAL RMSE
    
    for i in range(y.shape[1]):
        outputs.append(mean_absolute_error(y.iloc[:, i], y_hat.iloc[:, i])) #SPECIFIC OUTPUT MAE
        outputs.append(mean_squared_error(y.iloc[:, i], y_hat.iloc[:, i])) #SPECIFIC OUTPUT RMSE
        
    return outputs

In [16]:
y_train_hat_pd = pd.DataFrame(scaler_y.inverse_transform(model(X_train).detach().numpy()))
y_train_pd = pd.DataFrame(scaler_y.inverse_transform(y_train.detach().numpy()))

y_train_hat_pd

Unnamed: 0,0,1,2
0,12.146446,4.829453,2.609970
1,11.692315,4.584507,2.614982
2,12.504880,5.120720,2.514208
3,11.141133,4.707856,1.977403
4,12.608542,4.966882,2.692571
...,...,...,...
248301,9.659372,3.893003,1.573970
248302,17.508623,4.791647,5.175361
248303,15.605177,6.671285,3.027626
248304,17.734938,7.405539,3.941108


In [17]:
res = pd.DataFrame(pd.DataFrame(evaluate(y_train_pd, y_train_hat_pd)).T)
res.columns = ['GENERAL_MAE', 'GENERAL_RMSE', 'PTS_MAE', 'PTS_RMSE', 'REB_MAE', 'REB_RMSE', 'AST_MAE', 'AST_RMSE']
res

Unnamed: 0,GENERAL_MAE,GENERAL_RMSE,PTS_MAE,PTS_RMSE,REB_MAE,REB_RMSE,AST_MAE,AST_RMSE
0,3.559201,5.064713,6.062113,58.401894,2.692513,12.134678,1.923135,6.422238


In [18]:
y_test_hat_pd = pd.DataFrame(scaler_y.inverse_transform(model(X_test).detach().numpy()))
y_test_pd = pd.DataFrame(scaler_y.inverse_transform(y_test.detach().numpy()))

In [19]:
res = pd.DataFrame(pd.DataFrame(evaluate(y_test_pd, y_test_hat_pd)).T)
res.columns = ['GENERAL_MAE', 'GENERAL_RMSE', 'PTS_MAE', 'PTS_RMSE', 'REB_MAE', 'REB_RMSE', 'AST_MAE', 'AST_RMSE']
res

Unnamed: 0,GENERAL_MAE,GENERAL_RMSE,PTS_MAE,PTS_RMSE,REB_MAE,REB_RMSE,AST_MAE,AST_RMSE
0,3.659056,5.302969,6.386239,65.915871,2.609045,11.6076,1.981876,6.841067


In [20]:
res.to_csv('Results/PyTorch_results.csv')