## Kaggle Predections

### Packages 

In [61]:
import pandas as pd
import polars as pl
import numpy as np
import os
import matplotlib.pyplot as plt
import torch
from torch.nn.utils.rnn import pad_sequence
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Dataset, ConcatDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler
import gc
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
torch.manual_seed(26)
os.chdir('C:/Users/dalto/OneDrive/Pictures/Documents/Emory/NFL Lab/data/')

In [5]:
df = pd.read_csv('C:/Users/dalto/OneDrive/Pictures/Documents/Emory/NFL Lab/data/train/input_2023_w01.csv')

In [6]:
df['play_id_n'] = df.groupby(['game_id','play_id']).ngroup()
df['play_direction'] = df['play_direction'].str.strip().str.lower()
df.loc[df['play_direction'] == 'left', 'y'] = 53.3 - df.loc[df['play_direction'] == 'left', 'y']
df.loc[df['play_direction'] == 'left', 'x'] = 120 - df.loc[df['play_direction'] == 'left', 'x']

In [7]:
max_targets = 9
max_input = 123
max_output = 94

### Model

In [8]:
class PositionalEncoding(nn.Module):
    def __init__(self, embed_size, dropout, max_length=150):
        super(PositionalEncoding, self).__init__()
        # droput
        self.dropout = nn.Dropout(p=dropout)

        # create matrix
        pe = torch.zeros(max_length, embed_size)

        # position tensor shape
        position = torch.arange(0, max_length, dtype=torch.float).unsqueeze(1)

        # div_term tensor shape
        div_term = torch.exp(torch.arange(0, embed_size, 2).float() * (-torch.log(torch.tensor(10000.0)) / embed_size))

        # apply sin to even indices
        pe[:, 0::2] = torch.sin(position * div_term)

        # apply cos to odd indices
        if embed_size % 2 == 1:
            pe[:, 1::2] = torch.cos(position * div_term[:-1])
        else:
            pe[:, 1::2] = torch.cos(position * div_term)

        # register as buffer so it moves with model to device
        self.register_buffer('pe', pe)

    def forward(self, x):
        pe_slice = self.pe[:x.size(1), :].to(x.device)
        x = x + pe_slice
        return self.dropout(x)

In [9]:
class PlayerPositionEmbedding(nn.Module):
    def __init__(self, embed_size, num_positions = 19):
        super().__init__()
        self.embed_size = embed_size

        self.position_projection = nn.Embedding(num_positions, embed_size)

    def forward(self, player_positons, target_masks):
        # ints to learnable embedding space
        pos_embeds = self.position_projection(player_positons.long().squeeze(-1))

        # target mask, ignore padded values
        target_mask_expand = target_masks.unsqueeze(-1).expand_as(pos_embeds)
        pos_embeds = pos_embeds * target_mask_expand.float()

        return pos_embeds

In [10]:
class FourierEmbedding(nn.Module):
    def __init__(self, input_dim, embed_dim, scale=10.0):
        super().__init__()
        self.input_dim = input_dim
        self.embed_dim = embed_dim
        self.scale = scale
        self.B = nn.Parameter(torch.randn(input_dim, embed_dim // 2) * scale, requires_grad=False)

        self.out_proj = nn.Linear(embed_dim, embed_dim)

    def forward(self, x):
        # float bc autocast
        x_proj = (2 * np.pi * x.float()) @ self.B.float()
        x_embed = torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1)
        return self.out_proj(x_embed)

In [11]:
class SpatialSoftmax(nn.Module):
  def __init__(self, height, width, device='cuda'):
    super(SpatialSoftmax, self).__init__()
    self.height = height
    self.width = width
    self.device = device

    pos_x, pos_y = np.meshgrid(np.linspace(-1., 1., width),
                               np.linspace(-1., 1., height))

    pos_x = torch.from_numpy(pos_x.reshape(self.height * self.width)).float().to(device)
    pos_y = torch.from_numpy(pos_y.reshape(self.height * self.width)).float().to(device)

    self.register_buffer('pos_x', pos_x)
    self.register_buffer('pos_y', pos_y)

  def forward(self, feature_map):
    B, C, H, W = feature_map.shape
    feature_flat = feature_map.view(B, C, -1)
    softmax_attn = F.softmax(feature_flat, dim=-1)

    expected_x = torch.sum(self.pos_x * softmax_attn, dim=2, keepdim = True)
    expected_y = torch.sum(self.pos_y * softmax_attn, dim=2, keepdim = True)

    expected_xy = torch.cat([expected_x, expected_y], dim=2)

    return expected_xy.view(B, -1)

class CNN_DownSample(nn.Module):
    def __init__(self, dim):
        super().__init__()
        # variable based on amount of targets
        input_chan = 2 + max_targets + 1

        self.heatmap_encoder = nn.Sequential(
            nn.Conv2d(in_channels=input_chan, out_channels=32, kernel_size=3, stride = 2, padding=1),
            nn.BatchNorm2d(32),
            nn.GELU(),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride = 1, padding=1),
            nn.BatchNorm2d(64),
            nn.GELU(),

            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride = 1, padding=1),
            nn.BatchNorm2d(64),
            nn.GELU()
        )

        self.spatial_softmax = SpatialSoftmax(height=28, width=61)

        # heatmap to embedding
        self.output_proj = nn.Linear(128, dim)

    def forward(self, x):
        x = self.heatmap_encoder(x)
        x = self.spatial_softmax(x)
        x = self.output_proj(x)
        return x

In [12]:
class TransEncoder(nn.Module):
    def __init__(self, input_dim, embed_size, num_layers, nhead, device, dropout, mask, max_length):
        super(TransEncoder, self).__init__()
        # emebef size and deivice
        self.embed_size = embed_size
        self.device = device

        # learned matrix projection
        self.input_projection = nn.Linear(input_dim, embed_size)

        # postional encoding
        self.position_encoding = PositionalEncoding(embed_size, dropout, max_length)

        # transformer encoder layer
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_size,
            nhead=nhead,
            dim_feedforward=embed_size,
            dropout=dropout,
            batch_first=True,
            norm_first=True
        )

        # transformation encoder
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # normalize after attention
        self.norm = nn.LayerNorm(embed_size)

    def forward(self, x, mask):
        # input layer matrix mult
        projected_input = self.input_projection(x)
        # position encodings
        out = self.position_encoding(projected_input)
        # invert mask
        src_key_padding_mask = ~mask
        out = self.transformer_encoder(out, src_key_padding_mask=src_key_padding_mask)
        # normalize gradients
        out = self.norm(out)
        return out

In [13]:
class TransDecoder(nn.Module):
    def __init__(self, target_mask, embedding, dropout, nhead, layers, max_targets, max_step_change, max_seq_len):
        super(TransDecoder, self).__init__()
        self.max_targets = max_targets
        self.max_seq_len = max_seq_len
        self.embedding = embedding
        self.max_step = max_step_change
        self.pos_dim = 8
        self.role_dim = 2

        # project 2d cords to embedding space
        self.start_pos_projection = FourierEmbedding(2, embedding, scale=1.0)

        # projection for movement
        self.delta_projection = FourierEmbedding(5, embedding, scale=5.0)

        # player pos embeddings
        self.player_pos_embedding = PlayerPositionEmbedding(self.pos_dim, 19)

        # player pos embeddings
        self.player_role_embedding = PlayerPositionEmbedding(self.role_dim, 4)

        # concat info
        self.input_fusion = nn.Linear(embedding + self.role_dim + self.pos_dim, embedding)

        # project outputs back to 2d space
        self.output_projection = nn.Linear(embedding, 5)

        # postional embeddings
        self.pos_embed = PositionalEncoding(embed_size=embedding, dropout=0.15, max_length=150)

        # decoder layers
        decoder_layer = nn.TransformerDecoderLayer(d_model=embedding,
                                                    nhead=nhead,
                                                    dim_feedforward=embedding,
                                                    dropout=dropout,
                                                    batch_first=True,
                                                    norm_first=True)

        # decoder
        self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=layers)

        # norm
        self.norm = nn.LayerNorm(embedding)

    def forward(self, encoded_context, start_positions, target_mask, player_position, player_role, encoder_padding_mask=None):
        # project all of the postion embeddings
        player_pos_embeds = self.player_pos_embedding(player_position, target_mask)
        # role embeds
        player_role_embeds = self.player_role_embedding(player_role, target_mask)
        # train vs validation
        return self.val_forward(encoded_context, start_positions, player_pos_embeds, player_role_embeds, encoder_padding_mask)

    def val_forward(self, encoded_context, start_positions, player_pos_embeds, player_role_embeds, encoder_padding_mask):
        device = encoded_context.device
        batch_size = encoded_context.shape[0]

        # init pos
        current_input = self.start_pos_projection(start_positions).unsqueeze(2)

        # player pos embeddings
        pos_embeds = player_pos_embeds.unsqueeze(2)
        role_embeds = player_role_embeds.unsqueeze(2)
        combined_init = torch.cat([current_input, role_embeds, pos_embeds], dim=-1)
        current_input_fused = self.input_fusion(combined_init)

        # reshape, previous inputs
        decoder_history = current_input_fused.view(batch_size * self.max_targets, 1, self.embedding)

        # context
        expanded_context = encoded_context.repeat_interleave(self.max_targets, dim=0)

        # mem mask
        if encoder_padding_mask is not None:
            memory_key_padding_mask = encoder_padding_mask.repeat_interleave(self.max_targets, dim=0)
        else:
            memory_key_padding_mask = None

        all_predictions = []

        # predict actual output seq
        for step in range(self.max_seq_len):

            # postions
            history_encoded = self.pos_embed(decoder_history)

            # casual mask
            seq_len = history_encoded.shape[1]
            tgt_mask = nn.Transformer.generate_square_subsequent_mask(seq_len, device=device)

            # through the decoded layers
            decoded = self.transformer_decoder(
                tgt = history_encoded,
                memory = expanded_context,
                tgt_mask=tgt_mask,
                memory_key_padding_mask=memory_key_padding_mask)

            decoded = self.norm(decoded)

            # predict only last step
            last_step_output = decoded[:, -1:, :]
            next_delta_pred = self.output_projection(last_step_output)

            all_predictions.append(next_delta_pred.view(batch_size, self.max_targets, 5))

            # project to cords
            next_input_embed = self.delta_projection(next_delta_pred)

            # add player id context
            player_pos_flat = player_pos_embeds.view(batch_size * self.max_targets, 1, -1)
            player_role_flat = player_role_embeds.view(batch_size * self.max_targets, 1, -1)
            combined_next = torch.cat([next_input_embed, player_pos_flat, player_role_flat], dim=-1)
            next_input_fused = self.input_fusion(combined_next)

            # add to context
            decoder_history = torch.cat([decoder_history, next_input_fused], dim=1)

        # stack and return preds
        return torch.stack(all_predictions, dim=2)

In [14]:
class SeqPrediction(nn.Module):
    def __init__(self, embed_size, encoder_layers, decoder_layers,
                 max_targets, dropout, nheads, max_step, dev='cuda') -> None:
        super().__init__()

        # general vars
        self.embedding_size = embed_size
        self.max_targets = max_targets
        self.device = dev

        # context cnn
        self.context_cnn = CNN_DownSample(dim=embed_size)

        # transformer encoder
        self.encoder = TransEncoder(input_dim=embed_size,
                                    embed_size=embed_size,
                                    num_layers=encoder_layers,
                                    device=dev,
                                    nhead = nheads,
                                    mask=None,
                                    dropout=dropout,
                                    max_length=150)

        # decoder
        self.decoder = TransDecoder(target_mask=None,
                                    embedding=embed_size,
                                    dropout=dropout,
                                    nhead=nheads,
                                    layers=decoder_layers,
                                    max_targets=max_targets,
                                    max_step_change=max_step,
                                    max_seq_len=max_output)

    def forward(self, heatmap_sequence, start_pos, target_mask,
                input_lengths,  player_positions, player_role):
        # derive batch size, length of transformer output
        batch_size, seq_len = heatmap_sequence.shape[:2]

        # cnn features
        cnn_features = []
        for t in range(seq_len):
            frame = heatmap_sequence[:,t]
            features = self.context_cnn(frame)
            features = features.flatten(1)
            cnn_features.append(features)

        # stack and encode
        sequence_feat = torch.stack(cnn_features, dim=1)

        # encoder mask based on input seq
        encoder_mask = torch.zeros(batch_size, seq_len, device=heatmap_sequence.device, dtype=torch.bool)
        for i, length in enumerate(input_lengths):
            encoder_mask[i, :length] = True

        # context
        encoded_context = self.encoder(sequence_feat, mask=encoder_mask)

        # catch na context
        if torch.isnan(encoded_context).any() or torch.isinf(encoded_context).any():
            print(encoded_context)
            raise ValueError()

        # encoder padding mask
        encoder_padding_mask = ~encoder_mask

        # output predictions
        predictions = self.decoder(encoded_context, start_pos, target_mask, player_positions, player_role, encoder_padding_mask)

        return predictions

In [15]:
def var_mean_scalars(df_tracking):
    # standard scale postion on field
    pos_scaler = MinMaxScaler(feature_range=(-1, 1))
    pos_scaler.fit(df_tracking[['x', 'y']].values)

    # sort to ensure time, then take the mean change in diff between play
    df_sorted = df_tracking.sort_values(['play_id_n', 'nfl_id', 'frame_id'])
    deltas = df_sorted.groupby(['play_id_n', 'nfl_id'])[['x', 'y']].diff()

    # rename, dop nams
    deltas.columns = ['delta_x', 'delta_y']
    deltas_clean = deltas.dropna()

    # scaler for change values, fit to the delta
    delta_scaler = MinMaxScaler(feature_range=(-1, 1))
    delta_scaler.fit(deltas_clean[['delta_x', 'delta_y']].values)

    return pos_scaler, delta_scaler

pos, delta = var_mean_scalars(df)

In [16]:
# model
model = SeqPrediction(embed_size=64, encoder_layers=7, decoder_layers=2,
                   max_targets=max_targets, max_step=1.4, dropout=0.2, nheads=2 , dev='cuda').to('cuda')



In [17]:
state_dict = torch.load("C:/Users/dalto/OneDrive/Pictures/Documents/Emory/NFL Lab/model_weights/11_24_2.8_64_7_2.pth")
model.load_state_dict(state_dict)

<All keys matched successfully>

##### Pixel Mapping

In [18]:
def create_gaussian_kernel(sigma=0.8, kernel_size=11):
    ax = np.linspace(-(kernel_size - 1) / 2., (kernel_size - 1) / 2., kernel_size)
    xx, yy = np.meshgrid(ax, ax)
    kernel = np.exp(-0.5 * (np.square(xx) + np.square(yy)) / np.square(sigma))
    return kernel / np.max(kernel) 

# gaussian kernal
GAUSSIAN_KERNEL = create_gaussian_kernel(sigma=0.8, kernel_size=11)

In [42]:
def pixel_map_vectorized(player_data, target_player_ids, max_targets, grid_width=121, grid_height=55):
    # output map
    num_channels = 2 + max_targets + 1
    output_map = np.zeros((num_channels, grid_height, grid_width), dtype=np.float32)
    
    # paste kernal based on point
    def add_points_to_channel(channel_idx, x_coords, y_coords):
        kernel_r = GAUSSIAN_KERNEL.shape[0] // 2
        
        # cords to nearest int
        x_ints = np.round(x_coords).astype(int)
        y_ints = np.round(y_coords).astype(int)
        
        for x, y in zip(x_ints, y_ints):
            # edges 
            x_start = max(0, x - kernel_r)
            x_end = min(grid_width, x + kernel_r + 1)
            y_start = max(0, y - kernel_r)
            y_end = min(grid_height, y + kernel_r + 1)
            
            # kernal bounds
            k_x_start = kernel_r - (x - x_start)
            k_x_end = k_x_start + (x_end - x_start)
            k_y_start = kernel_r - (y - y_start)
            k_y_end = k_y_start + (y_end - y_start)
            # kernal 
            output_map[channel_idx, y_start:y_end, x_start:x_end] += GAUSSIAN_KERNEL[k_y_start:k_y_end, k_x_start:k_x_end]

    # offense channel
    offense_data = player_data[player_data['player_side'] == 'Offense']
    if not offense_data.empty:
        add_points_to_channel(0, offense_data['x'].values, offense_data['y'].values)

    # defense channel
    defense_data = player_data[player_data['player_side'] == 'Defense']
    if not defense_data.empty:
        add_points_to_channel(1, defense_data['x'].values, defense_data['y'].values)

    # target channel
    player_locs = dict(zip(player_data['nfl_id'], zip(player_data['x'], player_data['y'])))
    
    for i, target_id in enumerate(target_player_ids[:max_targets]):
        if target_id in player_locs:
            x, y = player_locs[target_id]
            add_points_to_channel(2 + i, [x], [y])

    # ball channel
    b_x = player_data['ball_land_x'].iloc[0]
    b_y = player_data['ball_land_y'].iloc[0]
    add_points_to_channel(-1, [b_x], [b_y])

    return output_map

### Prediction

In [59]:
def inference(test_input: pd.DataFrame):
    MAX_TARGETS = 9
    # dicts
    pos_dict = {1: ['WR'], 2: ['TE'], 3: ['QB'], 4: ['FB'], 13: ['RB'],
                5: ['SS'], 6: ['CB'], 7: ['FS'], 8: ['S'], 9: ['ILB'], 
                10: ['LB'], 11: ['MLB'], 12: ['DE'], 14: ['NT'],
                15: ['OLB'], 16: ['DT'], 17: ['T'], 18: ['K'], 19:['P']}
    
    role_dict = {0: ['Passer'], 1:['Targeted Receiver'], 2:['Defensive Coverage'], 3:['Other Route Runner']}
    
    # batch info
    batch_grids = []
    batch_start_pos_list = []
    batch_player_pos_list = []
    batch_player_role_list = []
    batch_target_mask_list = []
    batch_input_lengths = []

    # info for df reconstruction
    meta_target_ids = []
    meta_output_lens = []
    for (play_id, game_id), play_df in test_input.groupby(['play_id', 'game_id']):
        # sort frames
        play_df = play_df.sort_values('frame_id')
        frame_ids = play_df['frame_id'].unique()

        # targets to predict
        target_ids = play_df[play_df['player_to_predict'] == True]['nfl_id'].unique().tolist()
        current_targets = target_ids[:MAX_TARGETS]
        meta_target_ids.append(current_targets)

        # output lens
        t_lens = []
        for tid in current_targets:
            p_data = play_df[play_df['nfl_id'] == tid]
            t_lens.append(int(p_data['num_frames_output'].iloc[0]))
        meta_output_lens.append(t_lens)

        # input grids
        grids = []
        for fid in frame_ids:
            frame_data = play_df[play_df['frame_id'] == fid]
            grid = pixel_map_vectorized(frame_data, target_ids, max_targets=MAX_TARGETS)
            grids.append(torch.from_numpy(grid).float())

        # play sequence 
        play_sequence = torch.stack(grids)
        batch_grids.append(play_sequence)
        batch_input_lengths.append(len(frame_ids))

        # last frame 
        last_frame = play_df[play_df['frame_id'] == frame_ids[-1]]

        # start pos, positon, role
        p_start_pos = torch.zeros(MAX_TARGETS, 2)
        p_pos_ids = torch.zeros(MAX_TARGETS, dtype=torch.long)
        p_role_ids = torch.zeros(MAX_TARGETS, dtype=torch.long)
        p_mask = torch.zeros(MAX_TARGETS, dtype=torch.bool)
        
        # mask, postion, role
        for i, tid in enumerate(current_targets):
            p_data = last_frame[last_frame['nfl_id'] == tid]
            if not p_data.empty:
                # Start Pos
                x, y = p_data['x'].iloc[0], p_data['y'].iloc[0]
                scaled_xy = pos.transform([[x, y]])[0]
                p_start_pos[i] = torch.tensor(scaled_xy)
                
                # Position ID
                p_pos_str = p_data['player_position'].iloc[0]
                pid = 0
                for k, v in pos_dict.items():
                    if p_pos_str in v:
                        pid = k
                        break
                p_pos_ids[i] = pid

                 # Role ID
                p_role_str = p_data['player_role'].iloc[0]
                rid = 0
                for k, v in role_dict.items():
                    if p_role_str in v:
                        rid = k
                        break
                p_role_ids[i] = rid
                
                # Mask
                p_mask[i] = True
        
        batch_start_pos_list.append(p_start_pos)
        batch_player_pos_list.append(p_pos_ids)
        batch_player_role_list.append(p_role_ids)
        batch_target_mask_list.append(p_mask)
    
    batch_sequence = pad_sequence(batch_grids, batch_first=True).to('cuda')
    # stack all 
    batch_start_pos = torch.stack(batch_start_pos_list).to('cuda') # (B, 9, 2)
    player_pos_tensor = torch.stack(batch_player_pos_list).to('cuda') # (B, 9)
    player_role_tensor = torch.stack(batch_player_role_list).to('cuda') # (B, 9)
    target_mask = torch.stack(batch_target_mask_list).to('cuda') # (B, 9)

    # model predecions
    model.eval()
    with torch.no_grad():
        predictions = model(batch_sequence, batch_start_pos, target_mask, 
                          batch_input_lengths, player_pos_tensor, player_role_tensor)    
    
    # preds shape
    pred_deltas_xy = predictions[:, :, :, :2]
    B, S, T, _ = pred_deltas_xy.shape
    
    # reshape for inverse transform
    pred_deltas_flat = pred_deltas_xy.reshape(-1, 2).cpu().numpy()
    start_pos_flat = batch_start_pos.reshape(-1, 2).cpu().numpy()
    
    pred_deltas_unscaled_flat = delta.inverse_transform(pred_deltas_flat)
    start_pos_unscaled_flat = pos.inverse_transform(start_pos_flat)

    # reshape back for gpu 
    pred_deltas_unscaled = torch.tensor(pred_deltas_unscaled_flat).view(B, S, T, 2)
    start_pos_unscaled = torch.tensor(start_pos_unscaled_flat).view(B, S, 2)

    # calculate trajectory
    pred_traj = torch.cumsum(pred_deltas_unscaled, dim=2) + start_pos_unscaled.unsqueeze(2)
    
    # to cpu
    pred_traj_np = pred_traj.cpu().numpy()

    # construct df
    final_trajs = []
    final_ids = []
    final_steps = []
    
    for b in range(B):
        targets = meta_target_ids[b]
        lens = meta_output_lens[b]
        play_traj = pred_traj_np[b] 
        
        for i, tid in enumerate(targets):
            seq_len = lens[i]
            # cut sequnece below max
            seq_len = min(seq_len, play_traj.shape[1])
            
            traj = play_traj[i, :seq_len, :]

            final_trajs.append(traj)
            final_ids.extend([tid] * seq_len)
            final_steps.extend(np.arange(1, seq_len + 1))
    
    # concate
    flat_traj = np.concatenate(final_trajs, axis=0)

    # add info cols
    df_pred = pd.DataFrame(flat_traj, columns=['x', 'y'])
    df_pred['nfl_id'] = final_ids
    df_pred['step'] = final_steps

    return df_pred
    