In [1]:
import sklearn.preprocessing as sklp
import sklearn.model_selection as sklm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import numpy as np
import pandas as pd

## LSTM Model: Based on 4701 Implementation

### Dataframe Creation:

In this section, import relevant data on NFL games, preprocessed to include only relevant features and drop NA values:

In [2]:
nfl_df = None # TODO: replace when data importation / preprocessing logic is implemented!!!

### Custom Time Series Dataset:

In this section, define a custom time-series dataset object for input into our LSTM module

In [3]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, n_lags, forecast_horizon=1, feature_cols=None, label_col='Close'):
        self.n_lags = n_lags
        self.forecast_horizon = forecast_horizon
        self.label_col = label_col

        #allow selection of features
        if feature_cols:
          data = data[feature_cols].values

        #normalize features
        self.scaler = sklp.MinMaxScaler()
        data_scaled = self.scaler.fit_transform(data)

        #create usable data from scaled df
        self.X, self.y = self.create_sequences(data_scaled)

    def create_sequences(self, data):
        X, y = [], []
        for i in range(len(data) - self.n_lags - self.forecast_horizon + 1):
            X.append(data[i:i + self.n_lags])
            y.append(data[i + self.n_lags + self.forecast_horizon - 1][3])
        return np.array(X), np.array(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float), torch.tensor(self.y[idx], dtype=torch.float)


In [5]:
#params for dataloader
n_lags = 4 #look back 4 games
forecast_horizon = 1 #look ahead 1 game

ts_dataset = TimeSeriesDataset(nfl_df, n_lags, forecast_horizon)

ValueError: Expected 2D array, got scalar array instead:
array=nan.
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [None]:
class LSTM(nn.Module):
    def __init__(self, num_layers, input_size, hidden_size, seq_length, num_classes=2):
        """
        Inputs:
        num_layers: Number of recurrent layers
        input_size: Number of features for input
        hidden_size: Number of features in hidden state
        **these could be wrong**
        seq_length: Length of sequences in a batch
        num_classes: Number of categories for labels

        Outputs: none
        """
        super(LSTM, self).__init__()

        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)

        self.dense = nn.Sequential(
            nn.ReLU(),
            nn.Linear(num_layers * hidden_size, 128),
            nn.ReLU(),
            nn.Linear(128, 15) # One output node per "expected points entry" (15 including offense, defense, spt.)
        )

    def forward(self, x):
      '''
      Inputs:
      x: input data

      Outputs:
      out: output of forward pass
      '''

      out, (hn, cn) = self.lstm(x)

      hn = hn.transpose(0, 1).reshape(x.size(0), -1)

      x = self.dense(hn)

      return x

### Training / Validation / Testing:

In [None]:
def val(model, val_loader, criterion):
    """
    Inputs:
    model (torch.nn.Module): The deep learning model to be trained.
    val_data_loader (torch.utils.data.DataLoader): DataLoader for the validation dataset.
    criterion (torch.nn.Module): Loss function to compute the training loss.

    Outputs:
    Validation Loss
    """
    val_running_loss = 0
    num_correct = 0
    total = 0

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(val_loader, 0):

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item()


    return val_running_loss

In [None]:
def train(model, train_loader, val_loader, criterion, epochs, optimizer):
  """
    Inputs:
    model (torch.nn.Module): The deep learning model to be trained.
    train_loader (torch.utils.data.DataLoader): DataLoader for the training dataset.
    val_loader (torch.utils.data.DataLoader): DataLoader for the validation dataset.
    criterion (torch.nn.Module): Loss function to compute the training loss.
    epochs: Number of epochs to train for.
    optimizer: The optimizer to use during training.

    Outputs:
    Tuple of (train_loss_arr, val_loss_arr, val_acc_arr)
  """
  train_loss_arr = []
  val_loss_arr = []
  running_loss = 0.0


  for epoch in range(epochs):
      running_loss = 0.0
      for i, (inputs, labels) in enumerate(train_loader, 0):

        optimizer.zero_grad()
        preds = model(inputs)
        loss = criterion(preds, labels)

        running_loss += loss.item()

        loss.backward()
        optimizer.step()

      val_loss = val(model, val_loader, criterion)
      train_loss_arr.append(running_loss)
      val_loss_arr.append(val_loss)

      print("epoch:", epoch+1, "training loss:", running_loss, 'val loss:', round(val_loss, 3))

  print(running_loss)
  print('Training finished.')

  return train_loss_arr, val_loss_arr

In [None]:
num_layers = 2
input_size = 300
hidden_size = 64
seq_length = 40
num_classes = 2

batch_size = 16

train_size = int(0.8 * len(ts_dataset))
val_size = int(0.1*len(ts_dataset))
test_size = len(ts_dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(ts_dataset, [train_size, val_size, test_size])

# you may change the learning rate and numbers of epochs run
learning_rate = 0.01
lstm_epochs = 10

criterion = nn.MSELoss()

# Initialize LSTM model
lstm_model = LSTM(num_layers, input_size, hidden_size, seq_length, num_classes)

#Initialize optimizer
optimizer = optim.Adam(lstm_model.parameters(), lr=learning_rate)

#run training
lstm_train_loss, lstm_val_loss = train(lstm_model, train_loader, val_loader, criterion, lstm_epochs, optimizer)