In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler


In [35]:
## Reading in Data
game_data = pd.read_csv("data/games.csv")
player_play_data = pd.read_csv("data/player_play.csv")
play_data = pd.read_csv("data/plays.csv")
player_data = pd.read_csv("data/players.csv")
tracking_data = pd.read_csv("data/tracking_week_1.csv")

In [33]:
## Attempting to Analyze Defender Effectiveness

# Position variables
off_positions = ['QB', 'RB', 'FB', 'WR', 'TE', 'T', 'C', 'G']
def_positions = ['DE', 'NT', 'SS', 'FS', 'OLB', 'DT', 'CB', 'ILB', 'MLB', 'DB', 'LB']

# Filter for only Defensive Players
df_defensive_players = player_data[player_data["position"].isin(def_positions)]

# Sort by playId
sorted_plays = play_data.sort_values(by=["gameId", "playId"])

sorted_plays.to_csv("Test.csv", index=False)

# print(play_Ids)


In [None]:
## Dataset Class
class NFLPlayDataset(Dataset):
    def __init__(self, df):
        self.df = df.copy()

        # Encode categorical variables
        self.team_encoder = LabelEncoder()
        self.df['possessionTeam_enc'] = self.team_encoder.fit_transform(df['possessionTeam'])
        self.df['defensiveTeam_enc'] = self.team_encoder.fit_transform(df['defensiveTeam'])
        
        # Select features
        self.features = [
            'quarter', 'down', 'yardsToGo', 'absoluteYardlineNumber',
            'possessionTeam_enc', 'defensiveTeam_enc', 'expectedPoints'
        ]
        
        # Normalize numerical features
        self.scaler = StandardScaler()
        self.df[self.features] = self.scaler.fit_transform(self.df[self.features])

        self.target = self.df['yardsGained'].values  # Example: Yards Gained is the main target

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        X = torch.tensor(self.df.iloc[idx][self.features].values, dtype=torch.float32)
        y = torch.tensor(self.target[idx], dtype=torch.float32)
        return X, y


In [None]:
## Transformer Model
class NFLTransformerModel(nn.Module):
    def __init__(self, input_dim, d_model=64, nhead=4, num_layers=2):
        super(NFLTransformerModel, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead), 
            num_layers=num_layers
        )
        
        # Regression head for yards (Mean + Variance for Bayesian output)
        self.yards_mean = nn.Linear(d_model, 1)
        self.yards_logvar = nn.Linear(d_model, 1)  # Predict log variance for stability

        # Classification head for turnover
        self.turnover_head = nn.Linear(d_model, 1)

    def forward(self, x):
        """
        x: (batch_size, sequence_length, input_dim)
        """
        x = self.embedding(x)  # Shape: (batch, seq_len, d_model)
        x = x.permute(1, 0, 2)  # Transformer expects (seq_len, batch, d_model)
        x = self.transformer(x)  # Output: (seq_len, batch, d_model)
        x = x[-1]  # Use the last token for prediction

        # Yardage prediction (Gaussian mean and variance)
        mean = self.yards_mean(x).squeeze()
        log_var = self.yards_logvar(x).squeeze()

        # Turnover classification
        turnover_logits = self.turnover_head(x).squeeze()
        turnover_prob = torch.sigmoid(turnover_logits)

        return mean, log_var, turnover_prob


In [7]:
def gaussian_nll_loss(mean, log_var, target):
    var = torch.exp(log_var)
    return torch.mean(0.5 * torch.log(var) + 0.5 * (target - mean) ** 2 / var)
bce_loss = nn.BCELoss()

In [8]:
def total_loss(mean, log_var, yards_target, turnover_prob, turnover_target):
    yard_loss = gaussian_nll_loss(mean, log_var, yards_target)
    turnover_loss = bce_loss(turnover_prob, turnover_target)
    return yard_loss + turnover_loss

In [11]:
dataset = NFLPlayDataset(play_data)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

model = NFLTransformerModel(input_dim=len(dataset.features))
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(10):
    for X_batch, y_batch in loader:
        optimizer.zero_grad()
        mean, log_var, turnover_prob = model(X_batch.unsqueeze(1))  # Add seq dim
        # Example: Fake binary target for turnover
        turnover_target = (y_batch < 0).float()
        loss = total_loss(mean, log_var, y_batch, turnover_prob, turnover_target)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch}: Loss = {loss.item()}")
    print(4)




TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.