In [None]:
#%% import packages:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import os


In [None]:
#%% check if cuda is available:
if torch.cuda.is_available():
    print('cuda is available')
    device = torch.device("cuda:0")
else:
    print('cuda is not available')
    device = torch.device("cpu")

cuda is available


In [None]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

max_field_width = 53.3  # in yards
max_field_height = 120  # in yards
num_channels = 8  # 2 for position, 3 for player attributes, 3 for ball attributes
max_distance = 50.08118409143298

class NFLDataset(Dataset):
    def __init__(self, csv_file, grid_size, transform=None):
        self.nfl_data = pd.read_csv(csv_file)
        self.grid_size = grid_size
        self.transform = transform

    def __len__(self):
        return len(self.nfl_data)

    def __getitem__(self, idx):
        data_row = self.nfl_data.iloc[idx]

        # Initialize a grid with multiple channels
        grid = np.zeros((num_channels, *self.grid_size))

        # Center of the grid
        center_x, center_y = self.grid_size[0] // 2, self.grid_size[1] // 2

        # Normalize and translate player and ball positions
        ball_x, ball_y = data_row['ball_x'], data_row['ball_y']
        player_x, player_y = data_row['player_x'], data_row['player_y']

        # Normalize positions
        norm_player_x = int((player_x - ball_x) / max_field_width * self.grid_size[0]) + center_x
        norm_player_y = int((player_y - ball_y) / max_field_height * self.grid_size[1]) + center_y

        # Ensure the normalized positions are within bounds
        norm_player_x = max(0, min(self.grid_size[0] - 1, norm_player_x))
        norm_player_y = max(0, min(self.grid_size[1] - 1, norm_player_y))

        # Place player and ball in their respective position layers
        grid[0, norm_player_x, norm_player_y] = 1  # Player position layer
        grid[1, center_x, center_y] = 1  # Ball position layer (always at the center)

        # Populate other attribute layers (speed, acceleration, direction) for player and ball
        # Normalize these attributes as needed
        grid[2, norm_player_x, norm_player_y] = data_row['player_s']  # Player speed
        grid[3, norm_player_x, norm_player_y] = data_row['player_a']  # Player acceleration
        grid[4, norm_player_x, norm_player_y] = data_row['player_dir']  # Player direction
        grid[5, center_x, center_y] = data_row['ball_s']  # Ball speed
        grid[6, center_x, center_y] = data_row['ball_a']  # Ball acceleration
        grid[7, center_x, center_y] = data_row['ball_dir']  # Ball direction

        grid_tensor = torch.tensor(grid, dtype=torch.float32)
        # print(grid_tensor.shape)

        label = torch.tensor(data_row['tackle'], dtype=torch.float32)
        label.item()
        # print(label.shape)

        if self.transform:
            grid_tensor = self.transform(grid_tensor)

        return grid_tensor, label


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class SimpleCNN(nn.Module):
    def __init__(self, num_channels=8, dropout_rate=0.0):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(num_channels, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.dropout1 = nn.Dropout(p=dropout_rate)  # Dropout layer added
        self.fc1 = nn.Linear(64 * 75 * 75, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = F.relu(self.conv3(x))
        x = self.dropout1(x) # Apply dropout after the third convolutional layer
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return torch.sigmoid(x)

class LargerCNN(nn.Module):
    def __init__(self, num_channels=8):
        super(LargerCNN, self).__init__()
        self.conv1 = nn.Conv2d(num_channels, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, padding=1)


        # Assuming input image size is 150x150
        self.fc1 = nn.Linear(175232, 512)  # Adjusted the input size
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 1)

    def forward(self, x):
        # Convolutional layers with max pooling
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)

        x = F.relu(self.conv3(x))
        x = F.max_pool2d(F.relu(self.conv4(x)), 2)

        x = torch.flatten(x, 1)

        # Fully connected layers with ReLU activation
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))

        # Output layer with sigmoid activation for binary classification
        x = self.fc4(x)
        return torch.sigmoid(x)

In [None]:
def calculate_accuracy(data_loader, model):
    correct_predictions = 0
    total_samples = 0

    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():
        for data in data_loader:
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)

            # Apply a threshold (e.g., 0.5) to the model's output to convert it into binary predictions
            predictions = (outputs >= 0.5).float()

            # Count correct predictions in the current batch
            correct_predictions += (predictions == labels).sum().item()

            # Accumulate total samples in the current batch
            total_samples += labels.size(0)

    # Calculate the overall accuracy across the entire validation set
    accuracy = (correct_predictions / total_samples) * 100.0
    return accuracy


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd 'drive/MyDrive/AU/Deep learning'



/content/drive/MyDrive/AU/Deep learning


In [None]:
from torch.utils.data import random_split
from torch.utils.data import DataLoader
#nfl_dataset_big = NFLDataset(csv_file='extended_tackles_with_tracking.csv', grid_size=(50, 50))
nfl_dataset = NFLDataset(csv_file='Data/extended_tackles_with_tracking_full.csv', grid_size=(150, 150))

# Define the proportions
total_size = len(nfl_dataset)  # Use the nfl_dataset instance you already created
train_size = int(total_size * 0.7)  # 70% of data
val_size = int(total_size * 0.2)  # 20% of data
test_size = total_size - train_size - val_size  # Remaining 10% for testing

# Split the dataset
train_dataset, val_dataset, test_dataset = random_split(nfl_dataset, [train_size, val_size, test_size])

# Create DataLoaders for each set
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)


In [None]:
!pip install ray wandb ray[tune]

Collecting ray
  Downloading ray-2.8.1-cp310-cp310-manylinux2014_x86_64.whl (62.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.6/62.6 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wandb
  Downloading wandb-0.16.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m99.9 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.38.0-py2.py3-none-any.whl (252 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m252.8/252.8 kB[0m [31m31.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle 

In [None]:
def calculate_test_accuracy(test_loader, model, device):
    correct_predictions = 0
    total_samples = 0

    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)

            # Apply a threshold (e.g., 0.5) to the model's output to convert it into binary predictions
            predictions = (outputs >= 0.5).float()

            # Count correct predictions
            correct_predictions += (predictions == labels).sum().item()
            total_samples += labels.size(0)

    # Calculate the test accuracy
    accuracy = (correct_predictions / total_samples) * 100.0
    return accuracy


In [None]:


import ray
from ray import tune
import time
import wandb
import os
import torch

import torch.nn.functional as F
os.environ["WANDB_API_KEY"] = "KEY"
wandb.login()
training_setups = [
    [0.00028325, 0.0375715, 1.01857e-06, 'sgd', 20, 'run1'],
    [0.00314019, 0.2020876, 9.64192e-05, 'adam', 10,'run2'],
    [0.000825128, 0.403226, 2.22612e-06, 'sgd', 20,'run3'],
    [0.00075884, 0.0834711, 1.77119e-05, 'adam', 10,'run4'],
    [0.000119576, 0.336194, 0.000175564, 'sgd', 10,'run5'],
    [0.00118866, 0.4767, 0.000567221, 'adam', 20,'run6'],
    [0.296388, 0.357747, 2.3064e-05, 'sgd', 10,'run7'],
    [0.0980849, 0.456781, 0.000452278, 'sgd', 10,'run8'],
    [0.800791832, 0.396527, 0.000238396, 'adam', 20,'run9'],
    [0.820299, 0.30633, 1.63383e-06, 'sgd', 20,'run10'],
    [0.8742011, 0.969196, 0.000151901, 'adam', 20,'run11'],
    [0.0165513, 0.216144, 2.59366e-05, 'adam', 10,'run12']
]
training_setup3 = [[0.000825128, 0.403226, 2.22612e-06, 'sgd', 10,'run3_new'],
                    [0.00075884, 0.0834711, 1.77119e-05, 'adam', 10,'run4_new'],]
for setup in training_setup3:
    nfl_dataset = NFLDataset(csv_file='Data/extended_tackles_with_tracking_full.csv', grid_size=(150, 150))

    # Define the proportions
    total_size = len(nfl_dataset)  # Use the nfl_dataset instance you already created
    train_size = int(total_size * 0.7)  # 70% of data
    val_size = int(total_size * 0.2)  # 20% of data
    test_size = total_size - train_size - val_size  # Remaining 10% for testing

    # Split the dataset
    train_dataset, val_dataset, test_dataset = random_split(nfl_dataset, [train_size, val_size, test_size])

    # Create DataLoaders for each set
    train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    lr = setup[0]  # Access the learning rate (lr)
    dropout = setup[1]  # Access the dropout rate (dropout)
    weight_decay = setup[2]  # Access the weight decay (weight_decay)
    opt = setup[3]  # Access the optimizer (optimizer)
    epochs = setup[4]  # Access the number of epochs (num_epochs)
    exp_name = setup[5]

    wandb.init(project="NFL2", name=exp_name,
               config={
                   "lr": lr,
                   "dropout": dropout,
                   "weight_decay": weight_decay,
                   "optimizer": opt,
                   "epochs": epochs

               })
    device = torch.device("cuda:0")
    #if torch.cuda.is_available():
    #  print('cuda is available')
    #  device = torch.device("cuda:0")
    #  #numGPUs=torch.cuda.device_count()
    #else:
    #  print('cuda is not available')
    #  device = torch.device("cpu")
    #  numGPUs=0
    num_epochs = epochs
    model = SimpleCNN()
    model = model.to(device)
    loss_function = nn.BCELoss()
    if opt == "adam":
      optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif opt == "sgd":
      optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
    for epoch in range(num_epochs):
      model.train()
      total_loss_train = 0.0  # Initialize total loss for training
      start_time = time.time()
      for batch in train_loader:
          inputs, labels = batch
          inputs = inputs.to(device)
          labels = labels.to(device)
          optimizer.zero_grad()
          outputs = model(inputs).squeeze(1)
          loss = loss_function(outputs, labels.float())
          loss.backward()
          optimizer.step()
          total_loss_train += loss.item() * len(batch)  # Accumulate the loss
      average_loss_train = total_loss_train / len(train_loader.dataset)  # Calculate average loss
      # Calculate training accuracy
      train_accuracy = calculate_accuracy(train_loader, model)
      # Switch model to evaluation mode for validation loss and accuracy
      model.eval()
      total_loss_val = 0.0  # Initialize total loss for validation
      for batch in val_loader:
          inputs, labels = batch
          inputs = inputs.to(device)
          labels = labels.to(device)
          outputs = model(inputs).squeeze(1)
          loss = loss_function(outputs, labels.float())
          total_loss_val += loss.item() * len(batch)  # Accumulate the loss
      end_time = time.time()
      average_loss_val = total_loss_val / len(val_loader.dataset)  # Calculate average loss
      # Calculate validation accuracy
      val_accuracy = calculate_accuracy(val_loader, model)
      epoch_time = end_time - start_time
      val_f1 = calculate_f1_score(val_loader, model, device)

      print(f'Run name: {exp_name},Epoch {epoch+1}/{num_epochs}, Training Loss: {average_loss_train:.4f}, Training Accuracy: {train_accuracy:.4f}%, Validation Loss: {average_loss_val:.4f}, Validation Accuracy: {val_accuracy:.4f}%, Epoch time: {epoch_time}s')
      wandb.log({"trainLoss": average_loss_train,
                 "trainAcc": train_accuracy,
                 "epoch": epoch,
                 "vLoss": average_loss_val,
                 "vAcc": val_accuracy,
                 "valF1":val_f1})
    # Import necessary libraries
    model.eval()
    with torch.no_grad():
      test_accuracy = calculate_test_accuracy(test_loader, model, device)
      test_f1 = calculate_f1_score(test_loader, model, device)
      wandb.log({"testAcc": test_accuracy, "testF1": test_f1})


    wandb.finish()



Run name: run3_new,Epoch 1/10, Training Loss: 0.9518, Training Accuracy: 81.9461%, Validation Loss: 0.9740, Validation Accuracy: 80.1115%, Epoch time: 46.33906030654907s
Run name: run3_new,Epoch 2/10, Training Loss: 0.8885, Training Accuracy: 82.6762%, Validation Loss: 0.9072, Validation Accuracy: 80.7156%, Epoch time: 45.0331175327301s
Run name: run3_new,Epoch 3/10, Training Loss: 0.7975, Training Accuracy: 85.0657%, Validation Loss: 0.8185, Validation Accuracy: 82.9926%, Epoch time: 44.96333384513855s
Run name: run3_new,Epoch 4/10, Training Loss: 0.7377, Training Accuracy: 84.9993%, Validation Loss: 0.8619, Validation Accuracy: 83.0855%, Epoch time: 44.52966547012329s
Run name: run3_new,Epoch 5/10, Training Loss: 0.6937, Training Accuracy: 87.3755%, Validation Loss: 0.7908, Validation Accuracy: 84.1078%, Epoch time: 45.210005044937134s
Run name: run3_new,Epoch 6/10, Training Loss: 0.6731, Training Accuracy: 86.5260%, Validation Loss: 0.8284, Validation Accuracy: 84.2937%, Epoch time:

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▃▄▅▆▆▇█
testAcc,▁
trainAcc,▁▂▄▄▆▅▇███
trainLoss,█▇▅▄▃▂▂▁▁▁
vAcc,▁▂▄▅▆▆▇▇██
vLoss,█▆▄▅▃▄▃▂▁▁

0,1
epoch,9.0
testAcc,85.88672
trainAcc,89.12784
trainLoss,0.61095
vAcc,85.7342
vLoss,0.72272


Run name: run4_new,Epoch 1/10, Training Loss: 0.9892, Training Accuracy: 81.2160%, Validation Loss: 0.9170, Validation Accuracy: 82.4349%, Epoch time: 59.50867819786072s
Run name: run4_new,Epoch 2/10, Training Loss: 0.9122, Training Accuracy: 81.2160%, Validation Loss: 0.8112, Validation Accuracy: 82.4349%, Epoch time: 59.449798345565796s
Run name: run4_new,Epoch 3/10, Training Loss: 0.7929, Training Accuracy: 87.2693%, Validation Loss: 0.7026, Validation Accuracy: 86.1524%, Epoch time: 59.70113563537598s
Run name: run4_new,Epoch 4/10, Training Loss: 3.8902, Training Accuracy: 81.2160%, Validation Loss: 35.1301, Validation Accuracy: 82.4349%, Epoch time: 61.548301696777344s
Run name: run4_new,Epoch 5/10, Training Loss: 20.7378, Training Accuracy: 86.4065%, Validation Loss: 0.7638, Validation Accuracy: 85.8271%, Epoch time: 60.63689303398132s
Run name: run4_new,Epoch 6/10, Training Loss: 0.6658, Training Accuracy: 89.1411%, Validation Loss: 0.6950, Validation Accuracy: 87.5000%, Epoch t

VBox(children=(Label(value='0.002 MB of 0.013 MB uploaded\r'), FloatProgress(value=0.13778041015769601, max=1.…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
testAcc,▁
trainAcc,▁▁▅▁▅▇▇▇▇█
trainLoss,▁▁▁▂█▁▁▁▁▁
vAcc,▁▁▅▁▅▇▇▇▆█
vLoss,▁▁▁█▁▁▁▁▁▁

0,1
epoch,9.0
testAcc,89.22934
trainAcc,90.86685
trainLoss,0.60455
vAcc,88.47584
vLoss,0.63489


In [None]:
wandb.finish()

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))