# Imports

In [78]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split

# Data Processing

In [None]:
import os
import pickle
import numpy as np
import torch

##################################################
# 1) Define data directory + which seasons go where
##################################################
data_dir = "../data/"
train_seasons = ["2013-2014.pkl", "2014-2015.pkl", "2015-2016.pkl"]
val_seasons   = ["2016-2017.pkl"]
test_seasons  = ["2017-2018.pkl"]

X_train_list, y_train_list = [], []
X_val_list,   y_val_list   = [], []
X_test_list,  y_test_list  = [], []

def load_pkl(path):
    """Helper to load a .pkl file: returns (X, y)."""
    with open(path, 'rb') as f:
        return pickle.load(f, encoding='latin1')

##################################################
# 2) Read each file, bucket into train/val/test
##################################################
for fname in os.listdir(data_dir):
    if not fname.endswith(".pkl"):
        continue
    file_path = os.path.join(data_dir, fname)
    print(f"Found file: {fname}")
    X_data, y_data = load_pkl(file_path)
    
    if fname in train_seasons:
        X_train_list.append(X_data)
        y_train_list.append(y_data)
        print("  -> Assigned to TRAIN")
    elif fname in val_seasons:
        X_val_list.append(X_data)
        y_val_list.append(y_data)
        print("  -> Assigned to VALIDATION")
    elif fname in test_seasons:
        X_test_list.append(X_data)
        y_test_list.append(y_data)
        print("  -> Assigned to TEST")
    else:
        print("  -> Not in any known season list (skipping or handle separately).")

##################################################
# 3) Concatenate each split
##################################################
X_train = np.concatenate(X_train_list, axis=0) if X_train_list else np.empty((0,))
y_train = np.concatenate(y_train_list, axis=0) if y_train_list else np.empty((0,))
X_valid = np.concatenate(X_val_list,   axis=0) if X_val_list   else np.empty((0,))
y_valid = np.concatenate(y_val_list,   axis=0) if y_val_list   else np.empty((0,))
X_test  = np.concatenate(X_test_list,  axis=0) if X_test_list  else np.empty((0,))
y_test  = np.concatenate(y_test_list,  axis=0) if y_test_list  else np.empty((0,))

print(f"Final shapes:")
print(f"  Train: X={X_train.shape}, y={y_train.shape}")
print(f"  Valid: X={X_valid.shape}, y={y_valid.shape}")
print(f"  Test:  X={X_test.shape},  y={y_test.shape}")

##################################################
# 4) Normalize using train stats only
#    Assuming X is [N, 3, 508]
##################################################
X_mean = np.mean(X_train, axis=(0,1), keepdims=True)
X_std  = np.std(X_train,  axis=(0,1), keepdims=True) + 1e-8

X_train_norm = (X_train - X_mean) / X_std
X_valid_norm = (X_valid - X_mean) / X_std
X_test_norm  = (X_test  - X_mean) / X_std

# Store the original y mean/std before normalizing (for later un-scaling if desired)
orig_y_mean = y_train.mean()
orig_y_std  = y_train.std()

y_train_norm = (y_train - orig_y_mean) / orig_y_std
y_valid_norm = (y_valid - orig_y_mean) / orig_y_std
y_test_norm  = (y_test  - orig_y_mean) / orig_y_std  # needed if you plan to evaluate MSE in normalized space

##################################################
# 5) Convert to PyTorch Tensors
##################################################
X_train_t = torch.tensor(X_train_norm, dtype=torch.float32)
y_train_t = torch.tensor(y_train_norm, dtype=torch.float32)
X_val_t   = torch.tensor(X_valid_norm, dtype=torch.float32)
y_val_t   = torch.tensor(y_valid_norm, dtype=torch.float32)
X_test_t  = torch.tensor(X_test_norm,  dtype=torch.float32)
y_test_t  = torch.tensor(y_test_norm,  dtype=torch.float32)

print(f"Tensors ready:")
print(f"  X_train_t={X_train_t.shape}, y_train_t={y_train_t.shape}")
print(f"  X_val_t=  {X_val_t.shape},   y_val_t=  {y_val_t.shape}")
print(f"  X_test_t= {X_test_t.shape},  y_test_t= {y_test_t.shape}")

# Now you can feed X_train_t and y_train_t into your existing model and training loop.


Found file: 2015-2016.pkl
  -> Assigned to TRAIN
Found file: 2014-2015.pkl
  -> Assigned to TRAIN
Found file: 2013-2014.pkl
  -> Assigned to TRAIN
Found file: 2017-2018.pkl
  -> Assigned to TEST
Found file: 2016-2017.pkl
  -> Assigned to VALIDATION
Final shapes:
  Train: X=(3804, 3, 508), y=(3804,)
  Valid: X=(1260, 3, 508), y=(1260,)
  Test:  X=(1264, 3, 508),  y=(1264,)
Tensors ready:
  X_train_t=torch.Size([3804, 3, 508]), y_train_t=torch.Size([3804])
  X_val_t=  torch.Size([1260, 3, 508]),   y_val_t=  torch.Size([1260])
  X_test_t= torch.Size([1264, 3, 508]),  y_test_t= torch.Size([1264])


# Basic NN

In [80]:
# Model matching paper's description
class NBANeuralNetwork(nn.Module):
    def __init__(self):
        super(NBANeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(1524, 500)
        self.fc2 = nn.Linear(500, 100)
        self.fc3 = nn.Linear(100, 20)
        self.fc4 = nn.Linear(20, 1)
        
    def forward(self, x):
        # Reshape input: [batch_size, 3, 508] -> [batch_size, 1524]
        batch_size = x.size(0)
        x = x.reshape(batch_size, -1)  # Flatten the 3×508 to 1524
        
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x
network = NBANeuralNetwork()

In [81]:
optimizer = torch.optim.Adam(network.parameters(), lr=0.005, weight_decay=1e-3)
loss = nn.MSELoss() 

In [82]:
# Convert numpy arrays to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_valid = torch.tensor(X_valid, dtype=torch.float32)
y_valid = torch.tensor(y_valid, dtype=torch.float32)

# Training loop with validation phase
for t in range(10000):
    # Training phase
    network.train()                         # Set the model to training mode
    y_train_pred = network(X_train)
    train_loss = loss(y_train_pred.squeeze(), y_train)
    optimizer.zero_grad()
    train_loss.backward()
    optimizer.step()

    # Validation phase
    network.eval()                          # Set the model to evaluation mode
    with torch.no_grad():
        y_val_pred = network(X_valid)
        val_loss = loss(y_val_pred.squeeze(), y_valid)

    # Print every 100 epochs
    if t % 100 == 0:
        true_train_loss = train_loss.item()
        true_val_loss   = val_loss.item()  
        print(f"Epoch {t}, Train Loss: {true_train_loss:.6f}, Val Loss: {true_val_loss:.6f}")

Epoch 0, Train Loss: 39568.132812, Val Loss: 35502.121094
Epoch 100, Train Loss: 1254.787720, Val Loss: 1958.070801
Epoch 200, Train Loss: 1247.250122, Val Loss: 1959.159912
Epoch 300, Train Loss: 1239.743530, Val Loss: 1960.108521
Epoch 400, Train Loss: 1230.189331, Val Loss: 1960.470215
Epoch 500, Train Loss: 1215.447998, Val Loss: 1959.286499
Epoch 600, Train Loss: 1190.578003, Val Loss: 1958.309326
Epoch 700, Train Loss: 1144.134644, Val Loss: 1957.639893
Epoch 800, Train Loss: 1140.628174, Val Loss: 1996.751221
Epoch 900, Train Loss: 1071.019653, Val Loss: 1978.993286
Epoch 1000, Train Loss: 1116.199097, Val Loss: 1956.599243
Epoch 1100, Train Loss: 1293.598633, Val Loss: 2113.998535
Epoch 1200, Train Loss: 1050.458252, Val Loss: 2113.176270
Epoch 1300, Train Loss: 1005.559265, Val Loss: 2057.912109
Epoch 1400, Train Loss: 955.917908, Val Loss: 2099.799316
Epoch 1500, Train Loss: 933.894714, Val Loss: 2054.260010
Epoch 1600, Train Loss: 899.722351, Val Loss: 2119.898193
Epoch 1700

# More Complex Fully Connected / LSTM Neural Networks 

In [83]:
class AdvancedBasketballNN(nn.Module):
    def __init__(self, input_size=508, hidden_size=256, lstm_layers=2, output_size=1):
        super(AdvancedBasketballNN, self).__init__()
        
        # Bidirectional LSTM for capturing temporal dynamics in both directions
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=lstm_layers,
            batch_first=True,
            bidirectional=True,
            dropout=0.3 if lstm_layers > 1 else 0
        )
        
        # Attention mechanism to focus on important timesteps
        self.attention = nn.Linear(hidden_size*2, 1)  # *2 for bidirectional
        
        # Deeper fully connected layers with batch normalization
        self.bn1 = nn.BatchNorm1d(hidden_size*2)
        self.fc1 = nn.Linear(hidden_size*2, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 256)
        self.bn3 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.bn4 = nn.BatchNorm1d(128)
        self.fc4 = nn.Linear(128, output_size)
        
        # Advanced regularization
        self.dropout = nn.Dropout(0.4)
        
    def attention_mechanism(self, lstm_output):
        # lstm_output shape: (batch_size, seq_len, hidden_size*2)
        attention_weights = torch.softmax(self.attention(lstm_output), dim=1)
        # Apply attention weights to LSTM output
        context_vector = torch.sum(attention_weights * lstm_output, dim=1)
        return context_vector
        
    def forward(self, x):
        # x shape: (batch_size, seq_len=3, features=508)
        
        # Process sequence with bidirectional LSTM
        lstm_output, _ = self.lstm(x)
        # lstm_output shape: (batch_size, seq_len, hidden_size*2)
        
        # Apply attention mechanism
        context = self.attention_mechanism(lstm_output)
        
        # Apply batch normalization
        context = self.bn1(context)
        
        # Deep fully connected layers with residual connections
        residual = context
        out = torch.relu(self.fc1(context))
        out = self.bn2(out)
        out = self.dropout(out)
        
        out = torch.relu(self.fc2(out))
        out = self.bn3(out)
        out = self.dropout(out)
        
        out = torch.relu(self.fc3(out))
        out = self.bn4(out)
        out = self.dropout(out)
        
        # Final output layer
        out = self.fc4(out)
        
        return out
network = AdvancedBasketballNN(input_size=508, hidden_size=256, lstm_layers=2, output_size=1)

In [84]:
# More sophisticated optimizer with learning rate scheduling
optimizer = torch.optim.AdamW(network.parameters(), lr=0.005, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)




In [None]:
# Training loop with early stopping
best_val_loss = float('inf')
patience = 20
patience_counter = 0

for t in range(1000):  # Increase epochs for better training
    # Training phase
    network.train()
    y_pred = network(X_train)
    train_loss = loss(y_pred.squeeze(), y_train)
    
    optimizer.zero_grad() 
    train_loss.backward()
    # Gradient clipping to prevent exploding gradients
    torch.nn.utils.clip_grad_norm_(network.parameters(), max_norm=1.0)
    optimizer.step()
    
    # Validation phase
    network.eval()
    with torch.no_grad():
        y_pred = network(X_valid)
        val_loss = loss(y_pred.squeeze(), y_valid)
    
    # Learning rate scheduling
    scheduler.step(val_loss)
    
    # Early stopping check
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        # Save best model
        torch.save(network.state_dict(), 'best_basketball_model.pt')
    else:
        patience_counter += 1
        
    # if patience_counter >= patience:
    #     print(f"Early stopping at epoch {t+1}")
    #     break
        
    if t % 25 == 0:  # Print every 25 epochs
        print(f"Epoch {t+1}, Train Loss: {train_loss.item():.6f}, Val Loss: {val_loss.item():.6f}")

Epoch 1, Train Loss: 40330.464844, Val Loss: 43311.515625
Epoch 26, Train Loss: 39290.156250, Val Loss: 40246.968750
Epoch 51, Train Loss: 36117.578125, Val Loss: 36545.511719
Epoch 76, Train Loss: 31135.705078, Val Loss: 33071.394531
Epoch 101, Train Loss: 25103.230469, Val Loss: 26073.726562
Epoch 126, Train Loss: 18501.537109, Val Loss: 19847.583984
Epoch 151, Train Loss: 12156.049805, Val Loss: 13233.961914
Epoch 176, Train Loss: 6597.234375, Val Loss: 7542.374023
Epoch 201, Train Loss: 2817.803955, Val Loss: 3285.694580
Epoch 226, Train Loss: 1551.450195, Val Loss: 1985.763550
Epoch 251, Train Loss: 1513.460938, Val Loss: 1998.112305
Epoch 276, Train Loss: 1510.474487, Val Loss: 1990.132568
Epoch 301, Train Loss: 1501.399536, Val Loss: 1996.257690
Epoch 326, Train Loss: 1489.084595, Val Loss: 1999.914307
Epoch 351, Train Loss: 1499.808960, Val Loss: 2000.380615
Epoch 376, Train Loss: 1502.905396, Val Loss: 2000.390503
Epoch 401, Train Loss: 1510.060913, Val Loss: 2000.561523
Epoch

In [None]:

# Define a proper model for sequential data
class BasketballNN(nn.Module):
    def __init__(self, input_size=508, hidden_size=100, output_size=1):
        super(BasketballNN, self).__init__()
        
        # LSTM to handle sequential data (3 timesteps)
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            batch_first=True  # expects input shape: (batch, seq, features)
        )
        
        # Fully connected layers after processing the sequence
        self.fc1 = nn.Linear(hidden_size, 50)
        self.fc2 = nn.Linear(50, output_size)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        # x shape: (batch_size, seq_len=3, features=508)
        
        # Process sequence with LSTM
        # out shape: (batch_size, seq_len, hidden_size)
        out, (hidden, _) = self.lstm(x)
        
        # Get the output from the last timestep
        # hidden shape: (1, batch_size, hidden_size)
        out = hidden.squeeze(0)
        
        # Process through fully connected layers
        out = torch.relu(self.fc1(out))
        out = self.dropout(out)
        out = self.fc2(out)
        
        return out

# Create the model with proper dimensions
network = BasketballNN(input_size=508, hidden_size=100, output_size=1)
