In [191]:
import torch
import numpy as np
import pandas as pd

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T
import torch.nn.functional as F

In [192]:
df = pd.read_csv("nba_games.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,mp,mp.1,fg,fga,fg%,3p,3pa,3p%,ft,...,tov%_max_opp,usg%_max_opp,ortg_max_opp,drtg_max_opp,team_opp,total_opp,home_opp,season,date,won
0,0,240.0,240.0,39.0,81.0,0.481,6.0,20.0,0.3,14.0,...,22.8,29.0,178.0,111.0,DAL,95,1,2016,2015-12-09,True
1,1,240.0,240.0,36.0,100.0,0.36,7.0,31.0,0.226,16.0,...,50.0,32.6,152.0,111.0,ATL,98,0,2016,2015-12-09,False
2,2,240.0,240.0,37.0,85.0,0.435,8.0,19.0,0.421,17.0,...,20.0,30.9,148.0,116.0,SAS,107,1,2018,2017-10-18,False
3,3,240.0,240.0,41.0,89.0,0.461,8.0,21.0,0.381,17.0,...,28.6,30.9,138.0,118.0,MIN,99,0,2018,2017-10-18,True
4,4,240.0,240.0,27.0,86.0,0.314,6.0,26.0,0.231,15.0,...,16.8,30.9,157.0,90.0,MEM,92,1,2021,2021-04-30,False


In [193]:
print(list(df.columns))

['Unnamed: 0', 'mp', 'mp.1', 'fg', 'fga', 'fg%', '3p', '3pa', '3p%', 'ft', 'fta', 'ft%', 'orb', 'drb', 'trb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', '+/-', 'ts%', 'efg%', '3par', 'ftr', 'orb%', 'drb%', 'trb%', 'ast%', 'stl%', 'blk%', 'tov%', 'usg%', 'ortg', 'drtg', 'mp_max', 'mp_max.1', 'fg_max', 'fga_max', 'fg%_max', '3p_max', '3pa_max', '3p%_max', 'ft_max', 'fta_max', 'ft%_max', 'orb_max', 'drb_max', 'trb_max', 'ast_max', 'stl_max', 'blk_max', 'tov_max', 'pf_max', 'pts_max', '+/-_max', 'ts%_max', 'efg%_max', '3par_max', 'ftr_max', 'orb%_max', 'drb%_max', 'trb%_max', 'ast%_max', 'stl%_max', 'blk%_max', 'tov%_max', 'usg%_max', 'ortg_max', 'drtg_max', 'team', 'total', 'home', 'index_opp', 'mp_opp', 'mp_opp.1', 'fg_opp', 'fga_opp', 'fg%_opp', '3p_opp', '3pa_opp', '3p%_opp', 'ft_opp', 'fta_opp', 'ft%_opp', 'orb_opp', 'drb_opp', 'trb_opp', 'ast_opp', 'stl_opp', 'blk_opp', 'tov_opp', 'pf_opp', 'pts_opp', '+/-_opp', 'ts%_opp', 'efg%_opp', '3par_opp', 'ftr_opp', 'orb%_opp', 'drb%_opp', 'trb

In [194]:
relevant =  ['ts%', '+/-_max_opp', '+/-_max', 'orb%', 'drb%', 'ast%', 'stl%', \
             'blk%', 'tov%', '3par', 'ftr', 'ts%_opp', 'drb%_opp', 'orb%_opp', \
             'ast%_opp', 'stl%_opp', 'blk%_opp', 'tov%_opp', '3par_opp', 'home_opp', 'won']
features = ['ts%', '+/-_max_opp', '+/-_max', 'orb%', 'drb%', 'ast%', 'stl%', \
             'blk%', 'tov%', '3par', 'ftr', 'ts%_opp', 'drb%_opp', 'orb%_opp', \
             'ast%_opp', 'stl%_opp', 'blk%_opp', 'tov%_opp', '3par_opp', 'home_opp']
df_no_na = df[relevant].dropna()
y = torch.tensor(df_no_na['won'].values.astype(int))
y

tensor([1, 0, 0,  ..., 0, 1, 0])

In [195]:
feature_data = df_no_na[features]
feature_data.head()

Unnamed: 0,ts%,+/-_max_opp,+/-_max,orb%,drb%,ast%,stl%,blk%,tov%,3par,ftr,ts%_opp,drb%_opp,orb%_opp,ast%_opp,stl%_opp,blk%_opp,tov%_opp,3par_opp,home_opp
0,0.551,11.0,9.0,16.2,67.2,66.7,7.4,10.1,13.6,0.247,0.222,0.438,83.8,32.8,50.0,7.4,4.9,7.7,0.31,1
1,0.438,9.0,11.0,32.8,83.8,50.0,7.4,4.9,7.7,0.31,0.19,0.551,67.2,16.2,66.7,7.4,10.1,13.6,0.247,0
2,0.52,15.0,15.0,22.4,72.1,62.2,7.4,5.9,12.0,0.224,0.271,0.55,77.6,27.9,53.7,8.5,16.7,10.2,0.236,1
3,0.55,15.0,15.0,27.9,77.6,53.7,8.5,16.7,10.2,0.236,0.213,0.52,72.1,22.4,62.2,7.4,5.9,12.0,0.224,0
4,0.396,22.0,-2.0,22.2,74.6,59.3,4.1,10.0,17.4,0.302,0.233,0.434,77.8,25.4,73.5,10.2,13.3,7.0,0.394,1


In [196]:
X = torch.tensor(feature_data.values, dtype=torch.float)
X

tensor([[ 0.5510, 11.0000,  9.0000,  ...,  7.7000,  0.3100,  1.0000],
        [ 0.4380,  9.0000, 11.0000,  ..., 13.6000,  0.2470,  0.0000],
        [ 0.5200, 15.0000, 15.0000,  ..., 10.2000,  0.2360,  1.0000],
        ...,
        [ 0.5520,  6.0000,  4.0000,  ..., 10.9000,  0.3380,  0.0000],
        [ 0.5960,  4.0000, 23.0000,  ..., 11.5000,  0.5180,  1.0000],
        [ 0.5300, 23.0000,  4.0000,  ..., 12.5000,  0.3060,  0.0000]])

In [197]:
print(X.shape)
print(y.shape)

torch.Size([17772, 20])
torch.Size([17772])


In [198]:
train_size = int(0.8 * len(y))
val_size = len(y) - train_size
X_train = X[0:train_size]
X_val = X[train_size:]
y_train = y[0:train_size]
y_val = y[train_size:]

In [199]:
print_every = 100

def train_part34(model, optimizer, epochs=1):
#     model = model.to(device=device)  # move the model parameters to CPU/GPU
    for e in range(epochs):
        for t, (x, y) in enumerate(loader_train):
            model.train()  # put model to training mode
#             x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
#             y = y.to(device=device, dtype=torch.long)

            scores = model(x)
            loss = F.cross_entropy(scores, y)

            # Zero out all of the gradients for the variables which the optimizer
            # will update.
            optimizer.zero_grad()

            # This is the backwards pass: compute the gradient of the loss with
            # respect to each  parameter of the model.
            loss.backward()

            # Actually update the parameters of the model using the gradients
            # computed by the backwards pass.
            optimizer.step()

            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy_part34(loader_val, model)
                print()


def check_accuracy_part34(loader, model):
#     if loader.dataset.train:
#         print('Checking accuracy on validation set')
#     else:
#         print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
#             x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
#             y = y.to(device=device, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
                
train_set = TensorDataset(X_train, y_train)
loader_train = DataLoader(train_set, batch_size=64, 
                          sampler=sampler.SubsetRandomSampler(range(train_size)))

val_set = TensorDataset(X_val, y_val)
loader_val = DataLoader(val_set, batch_size=64,
                      sampler=sampler.SubsetRandomSampler(range(val_size)))


In [200]:
channel_1 = 128
channel_2 = 64
learning_rate = 1e-3

model = nn.Sequential(
    nn.Linear(20, 2),
#     nn.ReLU()
)

# you can use Nesterov momentum in optim.SGD
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                      momentum=0.90, nesterov=True)
train_part34(model, optimizer, epochs=10)

Iteration 0, loss = 22.1281
Got 1196 / 3555 correct (33.64)

Iteration 100, loss = 0.2733
Got 3288 / 3555 correct (92.49)

Iteration 200, loss = 0.1059
Got 3141 / 3555 correct (88.35)

Iteration 0, loss = 0.5787
Got 2851 / 3555 correct (80.20)

Iteration 100, loss = 0.2782
Got 3052 / 3555 correct (85.85)

Iteration 200, loss = 0.5050
Got 3156 / 3555 correct (88.78)

Iteration 0, loss = 0.3590
Got 3270 / 3555 correct (91.98)

Iteration 100, loss = 0.3629
Got 3086 / 3555 correct (86.81)

Iteration 200, loss = 0.6072
Got 3287 / 3555 correct (92.46)

Iteration 0, loss = 3.8704
Got 2387 / 3555 correct (67.14)

Iteration 100, loss = 0.2061
Got 3181 / 3555 correct (89.48)

Iteration 200, loss = 0.6247
Got 3242 / 3555 correct (91.20)

Iteration 0, loss = 0.8982
Got 2819 / 3555 correct (79.30)

Iteration 100, loss = 0.3066
Got 3295 / 3555 correct (92.69)

Iteration 200, loss = 0.5845
Got 3298 / 3555 correct (92.77)

Iteration 0, loss = 0.1267
Got 3297 / 3555 correct (92.74)

Iteration 100, loss