In [None]:
#Below is the original authors note, this notebook is actually based on another notebook that only provided
#a inference notebook. 

#This notebook is found https://www.kaggle.com/code/royalacecat/training-nfl-2-5d-cnn-lb-0-671-with-tta

#Unless otherwise suggested, all comments in cells are from Joe Pehlke 

# If the training notebook is useful please upvote !!!

### This notebook is based on the notebooks made by zzy.

Please upvote the LB:0.667 original notebooks:

https://www.kaggle.com/code/zzy990106/nfl-2-5d-cnn-baseline-inference

also can use to LB:0.671, 2.5D CNN Baseline（More TTA trick）

https://www.kaggle.com/code/royalacecat/lb-0-671-2-5d-cnn-baseline-more-tta-trick

In [1]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import random
import math
import gc
#Here we import cv2 only to load images, tried with pillow for a little bit, but gave in
#as it was easy to just load with cv2.
import cv2
from tqdm import tqdm
from PIL import Image
import time
from functools import lru_cache
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
#timm is a great library for pretrained models
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from timm.scheduler import CosineLRScheduler
#I attempted a step scheduler to see if there would be a speedup, but did not find much
from timm.scheduler import StepLRScheduler
sys.path.append('../input/timm-0-6-9/pytorch-image-models-master')

In [2]:
#!pip install dill

In [3]:
#Here I changed the model to resnet18 and the learning rate to 1e-1, I noticed a large speedup when I made these
#changes of approx 10 minutes an epoch
CFG = {
    'seed': 42,
    'model': 'resnet18',
    'img_size': 256,
    'epochs': 10,
    'train_bs': 8, 
    'valid_bs': 4,
    'lr': 1e-1, 
    'weight_decay': 1e-6,
    'num_workers': 20,
    'max_grad_norm' : 1000,
    'epochs_warmup' : 1.0
}

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['seed'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
torch.cuda.is_available()

cuda


True

In [5]:
def expand_contact_id(df):
    """
    Splits out contact_id into seperate columns.
    """
    df["game_play"] = df["contact_id"].str[:12]
    df["step"] = df["contact_id"].str.split("_").str[-3].astype("int")
    df["nfl_player_id_1"] = df["contact_id"].str.split("_").str[-2]
    df["nfl_player_id_2"] = df["contact_id"].str.split("_").str[-1]
    return df
torch.cuda.is_available()

True

In [6]:
labels = expand_contact_id(pd.read_csv("kaggle/train_labels.csv"))
train_tracking = pd.read_csv("kaggle/train_player_tracking.csv")
train_helmets = pd.read_csv("kaggle/train_baseline_helmets.csv")
train_video_metadata = pd.read_csv("kaggle/train_video_metadata.csv")

In [9]:
#!mkdir -p kaggle/train/frames

#for video in tqdm(train_helmets.video.unique()):
#    if 'Endzone2' not in video:
#        !ffmpeg -i kaggle/train/{video} -q:v 2 -f image2 kaggle/train/frames/{video}_%04d.jpg -hide_banner -loglevel error

In [9]:
def create_features(df, tr_tracking, merge_col="step", use_cols=["x_position", "y_position"]):
    output_cols = []
    df_combo = (
        df.astype({"nfl_player_id_1": "str"})
        .merge(
            tr_tracking.astype({"nfl_player_id": "str"})[
                ["game_play", merge_col, "nfl_player_id",] + use_cols
            ],
            left_on=["game_play", merge_col, "nfl_player_id_1"],
            right_on=["game_play", merge_col, "nfl_player_id"],
            how="left",
        )
        .rename(columns={c: c+"_1" for c in use_cols})
        .drop("nfl_player_id", axis=1)
        .merge(
            tr_tracking.astype({"nfl_player_id": "str"})[
                ["game_play", merge_col, "nfl_player_id"] + use_cols
            ],
            left_on=["game_play", merge_col, "nfl_player_id_2"],
            right_on=["game_play", merge_col, "nfl_player_id"],
            how="left",
        )
        .drop("nfl_player_id", axis=1)
        .rename(columns={c: c+"_2" for c in use_cols})
        .sort_values(["game_play", merge_col, "nfl_player_id_1", "nfl_player_id_2"])
        .reset_index(drop=True)
    )
    output_cols += [c+"_1" for c in use_cols]
    output_cols += [c+"_2" for c in use_cols]
    
    if ("x_position" in use_cols) & ("y_position" in use_cols):
        index = df_combo['x_position_2'].notnull()
        
        distance_arr = np.full(len(index), np.nan)
        tmp_distance_arr = np.sqrt(
            np.square(df_combo.loc[index, "x_position_1"] - df_combo.loc[index, "x_position_2"])
            + np.square(df_combo.loc[index, "y_position_1"]- df_combo.loc[index, "y_position_2"])
        )
        
        distance_arr[index] = tmp_distance_arr
        df_combo['distance'] = distance_arr
        output_cols += ["distance"]
        
    df_combo['G_flug'] = (df_combo['nfl_player_id_2']=="G")
    output_cols += ["G_flug"]
    return df_combo, output_cols


use_cols = [
    'x_position', 'y_position', 'speed', 'distance',
    'direction', 'orientation', 'acceleration', 'sa'
]

train, feature_cols = create_features(labels, train_tracking, use_cols=use_cols)

In [10]:
#The first query is a simple way to filter out false positives as two players cannot be touching if they are 
#more than 2 yards away from each other (are very unlikely to be at the least)
train_filtered = train.query('not distance>2').reset_index(drop=True)

#This query is done by me in order to reduce the number of frames seen in the hope that this will lead to a greater
#temporal understanding as the frames can be farther away, what I did not realize is that a better solution would 
#be to simply pick 24 frames that are farther away than the ones that are right next to the current frame! 

#I did not realize this until after seeing the winning solution
train_filtered = train_filtered.query('step % 5 == 0').reset_index(drop=True)


#Control for the frames not being exactly 60 fps
train_filtered['frame'] = (train_filtered['step']/10*59.94+5*59.94).astype('int')+1

train_filtered

Unnamed: 0,contact_id,game_play,datetime,step,nfl_player_id_1,nfl_player_id_2,contact,x_position_1,y_position_1,speed_1,...,y_position_2,speed_2,distance_2,direction_2,orientation_2,acceleration_2,sa_2,distance,G_flug,frame
0,58168_003392_0_37084_38567,58168_003392,2020-09-11T03:01:48.100Z,0,37084,38567,0,41.90,20.08,0.54,...,19.88,0.66,0.07,136.70,88.92,0.90,0.89,1.543017,False,300
1,58168_003392_0_37084_G,58168_003392,2020-09-11T03:01:48.100Z,0,37084,G,0,41.90,20.08,0.54,...,,,,,,,,,True,300
2,58168_003392_0_37211_46445,58168_003392,2020-09-11T03:01:48.100Z,0,37211,46445,0,39.59,17.07,0.53,...,18.08,1.10,0.10,148.93,92.39,2.03,2.03,1.258014,False,300
3,58168_003392_0_37211_G,58168_003392,2020-09-11T03:01:48.100Z,0,37211,G,0,39.59,17.07,0.53,...,,,,,,,,,True,300
4,58168_003392_0_38556_G,58168_003392,2020-09-11T03:01:48.100Z,0,38556,G,0,41.93,30.61,0.67,...,,,,,,,,,True,300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135229,58582_003121_90_48220_G,58582_003121,2021-10-12T02:42:29.000Z,90,48220,G,0,32.92,25.29,2.52,...,,,,,,,,,True,840
135230,58582_003121_90_52493_G,58582_003121,2021-10-12T02:42:29.000Z,90,52493,G,0,65.01,38.81,1.33,...,,,,,,,,,True,840
135231,58582_003121_90_52500_G,58582_003121,2021-10-12T02:42:29.000Z,90,52500,G,0,58.80,40.24,1.50,...,,,,,,,,,True,840
135232,58582_003121_90_52609_G,58582_003121,2021-10-12T02:42:29.000Z,90,52609,G,0,60.47,25.96,1.33,...,,,,,,,,,True,840


del train, labels, train_tracking
gc.collect()

In [11]:

#Simple transforms to avoid overfitting to a particular view
train_aug = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    A.Normalize(mean=[0.], std=[1.]),
    ToTensorV2()
])

#Standard transforms for image data
valid_aug = A.Compose([
    A.Normalize(mean=[0.], std=[1.]),
    ToTensorV2()
])

In [12]:
#Create dictionary that can 
video2helmets = {}
train_helmets_new = train_helmets.set_index('video')
for video in tqdm(train_helmets.video.unique()):
    video2helmets[video] = train_helmets_new.loc[video].reset_index(drop=True)

100%|█████████████████████████████████████████| 481/481 [00:14<00:00, 32.39it/s]


del train_helmets, train_helmets_new
gc.collect()

In [13]:
video2frames = {}

for game_play in tqdm(train_video_metadata.game_play.unique()):
    for view in ['Endzone', 'Sideline']:
        video = game_play + f'_{view}.mp4'
        video2frames[video] = max(list(map(lambda x:int(x.split('_')[-1].split('.')[0]), \
                                           glob.glob(f'kaggle/train/frames/{video}*'))))

100%|█████████████████████████████████████████| 240/240 [01:40<00:00,  2.39it/s]


In [14]:

#I believe this dataset is best part of the provided notebook. 
#I think that I would have been able to put a model together, but the construction of the dataset and dataloader
#are done brilliantly. They are a pairwise join of all of the players at each time frame, the window option is 
#an easy way to determine how many frames one wants to use in order to incorporate temporal data

#It is currently set to 10 as I was training a resnet with 10 neighbors who are all at least 5 frames away.
class MyDataset(Dataset):
    def __init__(self, df, aug=train_aug, mode='train'):
        self.df = df
        self.frame = df.frame.values
        self.feature = df[feature_cols].fillna(-1).values
        self.players = df[['nfl_player_id_1','nfl_player_id_2']].values
        self.game_play = df.game_play.values
        self.aug = aug
        self.mode = mode
        
    def __len__(self):
        return len(self.df)
    
    # @lru_cache(1024)
    # def read_img(self, path):
    #     return cv2.imread(path, 0)
   
    def __getitem__(self, idx):   
        window = 10
        frame = self.frame[idx]
        
        if self.mode == 'train':
            frame = frame + random.randint(-6, 6)

        players = []
        for p in self.players[idx]:
            #G represents the ground in this dataset
            if p == 'G':
                players.append(p)
            else:
                players.append(int(p))
        
        imgs = []
        for view in ['Endzone', 'Sideline']:
            video = self.game_play[idx] + f'_{view}.mp4'

            tmp = video2helmets[video]
            #below is commented out from the original notebook
#             tmp = tmp.query('@frame-@window<=frame<=@frame+@window')
            tmp[tmp['frame'].between(frame-window, frame+window)]
            #below is also commented out from the original notebook
            tmp = tmp[tmp.nfl_player_id.isin(players)]#.sort_values(['nfl_player_id', 'frame'])
            tmp_frames = tmp.frame.values
            tmp = tmp.groupby('frame')[['left','width','top','height']].mean()
#0.002s

            bboxes = []
    #here we go though the window range and set the bounding boxes, then we do it with the images themselves
    #this contructs a 3d list of images that we can feed into the neural network
            for f in range(frame-window, frame+window+1, 1):
                if f in tmp_frames:
                    x, w, y, h = tmp.loc[f][['left','width','top','height']]
                    bboxes.append([x, w, y, h])
                else:
                    bboxes.append([np.nan, np.nan, np.nan, np.nan])
            bboxes = pd.DataFrame(bboxes).interpolate(limit_direction='both').values
            bboxes = bboxes[::4]

            if bboxes.sum() > 0:
                flag = 1
            else:
                flag = 0
#0.03s
                    
            for i, f in enumerate(range(frame-window, frame+window+1, 4)):
                img_new = np.zeros((256, 256), dtype=np.float32)

                if flag == 1 and f <= video2frames[video]:
                    img = cv2.imread(f'kaggle/train/frames/{video}_{f:04d}.jpg', 0)
                    #This may need to be grayscale to work
                    #img = np.asarray(Image.fromarray(f'kaggle/train/frames/{video}_{f:04d}.jpg').convert('L'))
                    #print(img)
                    x, w, y, h = bboxes[i]
                    #A crop based on the bounding box
                    img = img[int(y+h/2)-128:int(y+h/2)+128,int(x+w/2)-128:int(x+w/2)+128].copy()
                    img_new[:img.shape[0], :img.shape[1]] = img
                    
                    #plt.imshow(img_new)
                
                
                imgs.append(img_new)
                
            
#0.06s
                
        feature = np.float32(self.feature[idx])
        #change dimensions so they line up, return features as the non-contact features in the dataset to be 
        #used for prediciton
        img = np.array(imgs).transpose(1, 2, 0)    
        img = self.aug(image=img)["image"]
        label = np.float32(self.df.contact.values[idx])
        

        return img, feature, label

In [15]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        #Pretrained on imagenet, uses the resnet to get 500 dimension representation of the input
        #in_chans is dependent on the size of the window (see better code for switching models in the
        #evals models notebook)
        
        #The backbone output for endzone and sideline is then combined with MLP that takes in position features
        #of the players. Output is single number representing whether or not there is collision based on some
        #threshold
        self.backbone = timm.create_model(CFG['model'], pretrained=True, num_classes=500, in_chans=6)
        self.mlp = nn.Sequential(
            nn.Linear(18, 64),
            nn.LayerNorm(64),
            nn.ReLU(),
            nn.Dropout(0.2),
        )
        self.fc = nn.Linear(64+500*2, 1)

        #Change batch size and cut number of channels in half before going through the model to fit resnet
    def forward(self, img, feature):
        b, c, h, w = img.shape
        img = img.reshape(b*2, c//2, h, w)
        img = self.backbone(img).reshape(b, -1)
        feature = self.mlp(feature)
        y = self.fc(torch.cat([img, feature], dim=1))
        return y
    
torch.cuda.is_available()

True

In [16]:
#This did not work to fix my jupyter woes when it would crash
#import dill
#dill.dump_session('notebook_env.db')

In [17]:
#Output of model, actually a fairly simple system
model = Model()
model.to(device)
model.train()

Model(
  (backbone): ResNet(
    (conv1): Conv2d(6, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act1): ReLU(inplace=True)
        (aa): Identity()
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)

In [None]:
#Use this code to evaluate on validation set.

In [135]:
#BCE loss for the output
import torch.nn as nn
criterion = nn.BCEWithLogitsLoss()

In [136]:
#Simple loss computation, used to store the best checkpoint
def evaluate(model, loader_val, *, compute_score=True, pbar=None):
    """
    Predict and compute loss and score
    """
    tb = time.time()
    in_training = model.training
    model.eval()

    loss_sum = 0.0
    n_sum = 0
    y_all = []
    y_pred_all = []

    if pbar is not None:
        pbar = tqdm(desc='Predict', nrows=78, total=pbar)
        
    total= len(loader_val)

    for ibatch,(img, feature, label) in tqdm(enumerate(loader_val),total = total):
        # img, feature, label = [x.to(device) for x in batch]
        img = img.to(device)
        feature = feature.to(device)
        n = label.size(0)
        label = label.to(device)

        with torch.no_grad():
            y_pred = model(img, feature)
        loss = criterion(y_pred.view(-1), label)

        n_sum += n
        loss_sum += n * loss.item()
        
        if pbar is not None:
            pbar.update(len(img))
        
        del loss, img, label
        gc.collect()

    loss_val = loss_sum / n_sum


    ret = {'loss': loss_val,
           'time': time.time() - tb}
    
    model.train(in_training) 
    gc.collect()
    return ret

In [19]:

#test and validation loaders, saw big increase in speed when I increased num_workers.
train_set,valid_set = train_test_split(train_filtered,test_size=0.05, random_state=42,stratify = train_filtered['contact'])
train_set = MyDataset(train_set, train_aug, 'train')
train_loader = DataLoader(train_set, batch_size=CFG['train_bs'], shuffle=True, num_workers=20, pin_memory=True,drop_last=True)
valid_set = MyDataset(valid_set, valid_aug, 'test')
valid_loader = DataLoader(valid_set, batch_size=CFG['valid_bs'], shuffle=False, num_workers=20, pin_memory=True)

In [20]:
print(valid_set.df.shape)
print(valid_set.df.head())

(6762, 26)
                        contact_id     game_play                  datetime  \
83364      58418_000637_25_37161_G  58418_000637  2021-01-03T18:24:39.900Z   
93974      58510_000152_65_43307_G  58510_000152  2021-09-12T17:10:13.800Z   
78337  58401_002419_25_48259_52486  58401_002419  2020-12-27T19:49:37.900Z   
15955   58202_000546_5_42445_47939  58202_000546  2020-09-27T17:20:41.500Z   
4731       58180_000986_80_45008_G  58180_000986  2020-09-13T21:03:11.800Z   

       step nfl_player_id_1 nfl_player_id_2  contact  x_position_1  \
83364    25           37161               G        0         38.44   
93974    65           43307               G        0         70.89   
78337    25           48259           52486        1         29.26   
15955     5           42445           47939        0        108.65   
4731     80           45008               G        0         45.56   

       y_position_1  speed_1  ...  y_position_2  speed_2  distance_2  \
83364         30.40     1.7

In [139]:
#AdamW, fast optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
nbatch = len(train_loader)
warmup = CFG['epochs_warmup'] * nbatch
nsteps = CFG['epochs'] * nbatch 

In [140]:
#Cosine LR worked better for me
scheduler = CosineLRScheduler(optimizer,warmup_t=warmup, warmup_lr_init=0.0, warmup_prefix=True,t_initial=(nsteps - warmup), lr_min=1e-6) 
#scheduler = StepLRScheduler(optimizer, decay_t = 5, decay_rate=1.)


In [141]:
time_val = 0.0
tb = time.time()
best_cv = 0
best_loss = 1e10
#Fairly boilerplate training loop, saving best version on val
for iepoch in range(CFG['epochs']):
    print('Epoch:', iepoch+1)
    loss_sum = 0.0
    n_sum = 0
    total = len(train_loader)

    # Train (ORIGINAL COMMENT)
    for ibatch,(img, feature, label) in tqdm(enumerate(train_loader),total = total):
        img = img.to(device)
        feature = feature.to(device)
        n = label.size(0)
        label = label.to(device)
        

        optimizer.zero_grad()
        y_pred = model(img, feature).squeeze(-1)
        loss = criterion(y_pred, label)
        loss_train = loss.item()
        loss_sum += n * loss_train
        n_sum += n

        loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),CFG['max_grad_norm'])

        optimizer.step()
        scheduler.step(iepoch * nbatch + ibatch + 1)
        
    val = evaluate(model, valid_loader)
    time_val += val['time']
    loss_train = loss_sum / n_sum
    dt = (time.time() - tb) / 60
    print('Epoch: %d Train Loss: %.4f Test Loss: %.4f Time: %.2f min' %
          (iepoch + 1, loss_train, val['loss'],dt))
    if val['loss'] < best_loss:
        best_loss = val['loss']
        # Save model (ORIGINAL COMMENT)
        #I changed filenames manually, this is old filename
        ofilename = 'res18_1_wind_200_out_mod_frames.pytorch'
        torch.save(model.state_dict(), ofilename)
        print(ofilename, 'written')
    del val
    gc.collect()

dt = time.time() - tb
print(' %.2f min total, %.2f min val' % (dt / 60, time_val / 60))
gc.collect()

Epoch: 1


100%|█████████████████████████████████████| 16059/16059 [11:02<00:00, 24.25it/s]
100%|███████████████████████████████████████| 1691/1691 [04:19<00:00,  6.52it/s]


Epoch: 1 Train Loss: 1.9964 Test Loss: 0.4009 Time: 15.44 min
res18_1_wind_200_out_mod_frames.pytorch written
Epoch: 2


100%|█████████████████████████████████████| 16059/16059 [11:12<00:00, 23.88it/s]
100%|███████████████████████████████████████| 1691/1691 [04:19<00:00,  6.53it/s]


Epoch: 2 Train Loss: 0.5166 Test Loss: 0.3634 Time: 31.06 min
res18_1_wind_200_out_mod_frames.pytorch written
Epoch: 3


100%|█████████████████████████████████████| 16059/16059 [10:52<00:00, 24.61it/s]
100%|███████████████████████████████████████| 1691/1691 [04:20<00:00,  6.49it/s]


Epoch: 3 Train Loss: 0.4187 Test Loss: 0.4833 Time: 46.37 min
Epoch: 4


100%|█████████████████████████████████████| 16059/16059 [10:52<00:00, 24.60it/s]
100%|███████████████████████████████████████| 1691/1691 [04:23<00:00,  6.42it/s]


Epoch: 4 Train Loss: 0.4349 Test Loss: 0.2722 Time: 61.73 min
res18_1_wind_200_out_mod_frames.pytorch written
Epoch: 5


100%|█████████████████████████████████████| 16059/16059 [10:52<00:00, 24.62it/s]
100%|███████████████████████████████████████| 1691/1691 [04:18<00:00,  6.53it/s]


Epoch: 5 Train Loss: 0.3658 Test Loss: 0.2837 Time: 77.00 min
Epoch: 6


100%|█████████████████████████████████████| 16059/16059 [10:38<00:00, 25.13it/s]
100%|███████████████████████████████████████| 1691/1691 [04:17<00:00,  6.58it/s]


Epoch: 6 Train Loss: 0.3269 Test Loss: 0.4404 Time: 92.02 min
Epoch: 7


 74%|███████████████████████████▌         | 11941/16059 [07:54<02:45, 24.94it/s]