In [246]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import r2_score

In [247]:
df = pd.read_csv('../NBA-Game-Wins-Prediction/kaggle/working/nba_data.csv')
df.dropna(inplace=True)
x = df[df.columns.difference(['NEXT_W'])]
y = df[['NEXT_W']]

In [248]:
print(x)

        AST    BLK    DREB    FG3A    FG3M  FG3_PCT     FGA     FGM   FG_PCT  \
0    1666.0  410.0  2543.0  1256.0   422.0  104.083  6609.0  2857.0  320.764   
1    1649.0  350.0  2387.0   997.0   309.0  114.160  6821.0  2997.0  357.705   
2    1759.0  417.0  2427.0  1205.0   450.0  138.741  6997.0  3196.0  358.522   
3    1728.0  480.0  2528.0  1161.0   385.0  127.166  6986.0  3105.0  369.110   
4    2084.0  536.0  2885.0  1250.0   436.0  122.879  7668.0  3457.0  382.692   
..      ...    ...     ...     ...     ...      ...     ...     ...      ...   
442  1833.0  492.0  3070.0  1736.0   551.0  154.944  7612.0  3191.0  370.551   
443  2027.0  485.0  3403.0  2745.0   987.0  266.812  8122.0  3528.0  425.323   
444  2024.0  425.0  3111.0  2526.0   866.0  188.367  7589.0  3318.0  387.285   
445  1869.0  393.0  3078.0  2379.0   872.0  212.096  7537.0  3386.0  390.534   
446  2173.0  386.0  2704.0  2976.0  1095.0  211.146  7117.0  3210.0  352.732   

        FTA  ...  NEXT_SEASON    OREB  

In [249]:
scaler_x = StandardScaler()
scaler_y = StandardScaler()
x = scaler_x.fit_transform(x)
y = scaler_y.fit_transform(y)

In [250]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)

In [251]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
seq_len = X_train[0].shape[0]

In [252]:
print(X_train)

tensor([[-0.4207,  0.4727, -0.5396,  ..., -1.3312, -0.0799, -1.3252],
        [ 0.3945,  0.3931, -0.3688,  ..., -1.0995, -1.4637, -1.1547],
        [ 2.5404,  0.0066,  1.5629,  ..., -1.5629,  0.7586, -1.5267],
        ...,
        [ 3.7580,  3.5876,  2.4139,  ..., -0.8679,  1.8766, -0.8215],
        [-1.1231, -0.7437, -0.5200,  ..., -1.3312, -0.3116, -1.3639],
        [ 1.9016,  1.7232,  1.4117,  ..., -0.4045,  1.6721, -0.3952]])


In [253]:
class NBA(Dataset):
    def __init__(self, X_train, y_train):
        super(Dataset, self).__init__()
        self.X_train = X_train
        self.y_train = y_train

    def __len__(self):
        return len(self.X_train)
    
    def __getitem__(self, idx):
        x = self.X_train[idx]
        y = self.y_train[idx]
        return x, y

In [254]:
hidden_size = 512
num_layers = 2
learning_rate = 0.001
batch_size = 64
epoch_size = 10

In [255]:
train_dataset = NBA(X_train, y_train)
test_dataset = NBA(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [256]:
class RNN(nn.Module):
    def __init__(self, input_feature_size, hidden_size, num_layers):
        super(RNN, self).__init__()
        self.lstm = nn.LSTM(input_feature_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [257]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
rnn = RNN(22, hidden_size, num_layers).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.0001)

In [258]:
rnn.train()
for epoch in range(epoch_size):
    loss = 0.0

    for batch_idx, data in enumerate(train_loader):
        inputs, targets = data
        inputs.to(device)
        targets.to(device)

        optimizer.zero_grad()

        outputs = rnn(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        loss += loss.item()
        if batch_idx % 100 == 99:
            print(f'[{epoch + 1}, {batch_idx + 1:5d}] loss: {loss / 100:.3f}')
            loss = 0.0

In [260]:
prediction = []
ground_truth = []

rnn.eval()
with torch.no_grad():
    for data in test_loader:
        inputs, targets = data
        inputs = inputs.to(device)

        ground_truth += targets.flatten().tolist()
        out = rnn(inputs).detach().cpu().flatten().tolist()
        prediction += out

In [261]:
# prediction = scaler_y.inverse_transform(prediction)
# ground_truth = scaler_y.inverse_transform(ground_truth)

In [262]:
r2score = r2_score(prediction, ground_truth)
print(r2score)

-16.997282714393172


In [263]:
for i in range(len(prediction)):
    print("Prediction: ", prediction[i], ", ground truth: ", ground_truth[i])
    print(prediction[i] - ground_truth[i])

Prediction:  -0.16808904707431793 , ground truth:  -1.3704775584469473
1.2023885113726294
Prediction:  -0.06454604864120483 , ground truth:  -0.7267329441022363
0.6621868954610315
Prediction:  -0.18768128752708435 , ground truth:  1.0435647453457184
-1.2312460328728028
Prediction:  -0.43723392486572266 , ground truth:  -0.9681371744815029
0.5309032496157803
Prediction:  -0.4160013496875763 , ground truth:  1.2045008989318962
-1.6205022486194725
Prediction:  -0.30876651406288147 , ground truth:  -0.08298832975752554
-0.22577818430535593
Prediction:  0.04021993279457092 , ground truth:  -1.3704775584469473
1.4106974912415182
Prediction:  -0.20161041617393494 , ground truth:  0.8826285917595407
-1.0842390079334756
Prediction:  -0.20611503720283508 , ground truth:  -0.6462648673091476
0.4401498301063125
Prediction:  -0.036537814885377884 , ground truth:  0.2388839774148299
-0.2754217923002078
Prediction:  0.24548177421092987 , ground truth:  0.5607562845871853
-0.3152745103762554
Predictio