# If the training notebook is useful please upvote !!!

### This notebook is based on the notebooks made by zzy.

Please upvote the LB:0.667 original notebooks:

https://www.kaggle.com/code/zzy990106/nfl-2-5d-cnn-baseline-inference

also can use to LB:0.671, 2.5D CNN Baseline（More TTA trick）

https://www.kaggle.com/code/royalacecat/lb-0-671-2-5d-cnn-baseline-more-tta-trick

In [1]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import random
import math
import gc
#Here we import cv2 only to load images, tried with pillow for a little bit, but gave in
#as it was easy to just load with cv2.
import cv2
from tqdm import tqdm
from PIL import Image
import time
from functools import lru_cache
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from timm.scheduler import CosineLRScheduler
from timm.scheduler import StepLRScheduler
sys.path.append('../input/timm-0-6-9/pytorch-image-models-master')

In [2]:
#!pip install dill

In [2]:
CFG18 = {
    'seed': 42,
    'model': 'resnet18',
    'img_size': 256,
    'epochs': 10,
    'train_bs': 8, 
    'valid_bs': 4,
    'lr': 1e-1, 
    'weight_decay': 1e-6,
    'num_workers': 20,
    'max_grad_norm' : 1000,
    'epochs_warmup' : 1.0
}

CFG50 = {
    'seed': 42,
    'model': 'resnet50',
    'img_size': 256,
    'epochs': 10,
    'train_bs': 8, 
    'valid_bs': 4,
    'lr': 1e-01, 
    'weight_decay': 1e-6,
    'num_workers': 20,
    'max_grad_norm' : 1000,
    'epochs_warmup' : 1.0
}

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG18['seed'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
torch.cuda.is_available()

cuda


True

In [4]:
def expand_contact_id(df):
    """
    Splits out contact_id into seperate columns.
    """
    df["game_play"] = df["contact_id"].str[:12]
    df["step"] = df["contact_id"].str.split("_").str[-3].astype("int")
    df["nfl_player_id_1"] = df["contact_id"].str.split("_").str[-2]
    df["nfl_player_id_2"] = df["contact_id"].str.split("_").str[-1]
    return df
torch.cuda.is_available()

True

In [5]:
#I changed nothing to cell

labels = expand_contact_id(pd.read_csv("kaggle/train_labels.csv"))
train_tracking = pd.read_csv("kaggle/train_player_tracking.csv")
train_helmets = pd.read_csv("kaggle/train_baseline_helmets.csv")
train_video_metadata = pd.read_csv("kaggle/train_video_metadata.csv")

In [6]:
#I changed nothing to cell
def create_features(df, tr_tracking, merge_col="step", use_cols=["x_position", "y_position"]):
    output_cols = []
    df_combo = (
        df.astype({"nfl_player_id_1": "str"})
        .merge(
            tr_tracking.astype({"nfl_player_id": "str"})[
                ["game_play", merge_col, "nfl_player_id",] + use_cols
            ],
            left_on=["game_play", merge_col, "nfl_player_id_1"],
            right_on=["game_play", merge_col, "nfl_player_id"],
            how="left",
        )
        .rename(columns={c: c+"_1" for c in use_cols})
        .drop("nfl_player_id", axis=1)
        .merge(
            tr_tracking.astype({"nfl_player_id": "str"})[
                ["game_play", merge_col, "nfl_player_id"] + use_cols
            ],
            left_on=["game_play", merge_col, "nfl_player_id_2"],
            right_on=["game_play", merge_col, "nfl_player_id"],
            how="left",
        )
        .drop("nfl_player_id", axis=1)
        .rename(columns={c: c+"_2" for c in use_cols})
        .sort_values(["game_play", merge_col, "nfl_player_id_1", "nfl_player_id_2"])
        .reset_index(drop=True)
    )
    output_cols += [c+"_1" for c in use_cols]
    output_cols += [c+"_2" for c in use_cols]
    
    if ("x_position" in use_cols) & ("y_position" in use_cols):
        index = df_combo['x_position_2'].notnull()
        
        distance_arr = np.full(len(index), np.nan)
        tmp_distance_arr = np.sqrt(
            np.square(df_combo.loc[index, "x_position_1"] - df_combo.loc[index, "x_position_2"])
            + np.square(df_combo.loc[index, "y_position_1"]- df_combo.loc[index, "y_position_2"])
        )
        
        distance_arr[index] = tmp_distance_arr
        df_combo['distance'] = distance_arr
        output_cols += ["distance"]
        
    df_combo['G_flug'] = (df_combo['nfl_player_id_2']=="G")
    output_cols += ["G_flug"]
    return df_combo, output_cols


use_cols = [
    'x_position', 'y_position', 'speed', 'distance',
    'direction', 'orientation', 'acceleration', 'sa'
]

train, feature_cols = create_features(labels, train_tracking, use_cols=use_cols)

In [7]:
#Setting up my different dataset to validate on every fifth frame 
train_filtered = train.query('not distance>2').reset_index(drop=True)

train_filtered_step_mod_5 = train_filtered.query('step % 5 == 0').reset_index(drop=True)
train_filtered_step_mod_5['frame'] = (train_filtered_step_mod_5['step']/10*59.94+5*59.94).astype('int')+1

train_filtered['frame'] = (train_filtered['step']/10*59.94+5*59.94).astype('int')+1

train_filtered

Unnamed: 0,contact_id,game_play,datetime,step,nfl_player_id_1,nfl_player_id_2,contact,x_position_1,y_position_1,speed_1,...,y_position_2,speed_2,distance_2,direction_2,orientation_2,acceleration_2,sa_2,distance,G_flug,frame
0,58168_003392_0_37084_38567,58168_003392,2020-09-11T03:01:48.100Z,0,37084,38567,0,41.90,20.08,0.54,...,19.88,0.66,0.07,136.70,88.92,0.90,0.89,1.543017,False,300
1,58168_003392_0_37084_G,58168_003392,2020-09-11T03:01:48.100Z,0,37084,G,0,41.90,20.08,0.54,...,,,,,,,,,True,300
2,58168_003392_0_37211_46445,58168_003392,2020-09-11T03:01:48.100Z,0,37211,46445,0,39.59,17.07,0.53,...,18.08,1.10,0.10,148.93,92.39,2.03,2.03,1.258014,False,300
3,58168_003392_0_37211_G,58168_003392,2020-09-11T03:01:48.100Z,0,37211,G,0,39.59,17.07,0.53,...,,,,,,,,,True,300
4,58168_003392_0_38556_G,58168_003392,2020-09-11T03:01:48.100Z,0,38556,G,0,41.93,30.61,0.67,...,,,,,,,,,True,300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660548,58582_003121_91_48220_G,58582_003121,2021-10-12T02:42:29.100Z,91,48220,G,0,33.18,25.26,2.55,...,,,,,,,,,True,846
660549,58582_003121_91_52493_G,58582_003121,2021-10-12T02:42:29.100Z,91,52493,G,0,65.04,38.68,1.31,...,,,,,,,,,True,846
660550,58582_003121_91_52500_G,58582_003121,2021-10-12T02:42:29.100Z,91,52500,G,0,58.74,40.11,1.34,...,,,,,,,,,True,846
660551,58582_003121_91_52609_G,58582_003121,2021-10-12T02:42:29.100Z,91,52609,G,0,60.32,25.93,1.38,...,,,,,,,,,True,846


In [8]:
train_filtered_step_mod_5

Unnamed: 0,contact_id,game_play,datetime,step,nfl_player_id_1,nfl_player_id_2,contact,x_position_1,y_position_1,speed_1,...,y_position_2,speed_2,distance_2,direction_2,orientation_2,acceleration_2,sa_2,distance,G_flug,frame
0,58168_003392_0_37084_38567,58168_003392,2020-09-11T03:01:48.100Z,0,37084,38567,0,41.90,20.08,0.54,...,19.88,0.66,0.07,136.70,88.92,0.90,0.89,1.543017,False,300
1,58168_003392_0_37084_G,58168_003392,2020-09-11T03:01:48.100Z,0,37084,G,0,41.90,20.08,0.54,...,,,,,,,,,True,300
2,58168_003392_0_37211_46445,58168_003392,2020-09-11T03:01:48.100Z,0,37211,46445,0,39.59,17.07,0.53,...,18.08,1.10,0.10,148.93,92.39,2.03,2.03,1.258014,False,300
3,58168_003392_0_37211_G,58168_003392,2020-09-11T03:01:48.100Z,0,37211,G,0,39.59,17.07,0.53,...,,,,,,,,,True,300
4,58168_003392_0_38556_G,58168_003392,2020-09-11T03:01:48.100Z,0,38556,G,0,41.93,30.61,0.67,...,,,,,,,,,True,300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135229,58582_003121_90_48220_G,58582_003121,2021-10-12T02:42:29.000Z,90,48220,G,0,32.92,25.29,2.52,...,,,,,,,,,True,840
135230,58582_003121_90_52493_G,58582_003121,2021-10-12T02:42:29.000Z,90,52493,G,0,65.01,38.81,1.33,...,,,,,,,,,True,840
135231,58582_003121_90_52500_G,58582_003121,2021-10-12T02:42:29.000Z,90,52500,G,0,58.80,40.24,1.50,...,,,,,,,,,True,840
135232,58582_003121_90_52609_G,58582_003121,2021-10-12T02:42:29.000Z,90,52609,G,0,60.47,25.96,1.33,...,,,,,,,,,True,840


In [9]:
#I changed nothing to cell

train_aug = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    A.Normalize(mean=[0.], std=[1.]),
    ToTensorV2()
])

valid_aug = A.Compose([
    A.Normalize(mean=[0.], std=[1.]),
    ToTensorV2()
])

In [10]:
#I changed nothing to cell

video2helmets = {}
train_helmets_new = train_helmets.set_index('video')
for video in tqdm(train_helmets.video.unique()):
    video2helmets[video] = train_helmets_new.loc[video].reset_index(drop=True)

100%|█████████████████████████████████████████| 481/481 [00:15<00:00, 31.47it/s]


del train_helmets, train_helmets_new
gc.collect()

In [11]:
#I changed nothing to cell

video2frames = {}

for game_play in tqdm(train_video_metadata.game_play.unique()):
    for view in ['Endzone', 'Sideline']:
        video = game_play + f'_{view}.mp4'
        video2frames[video] = max(list(map(lambda x:int(x.split('_')[-1].split('.')[0]), \
                                           glob.glob(f'kaggle/train/frames/{video}*'))))

100%|█████████████████████████████████████████| 240/240 [01:40<00:00,  2.40it/s]


In [12]:
class MyDataset(Dataset):
    def __init__(self, df, window = 24, aug=train_aug, mode='train'):
        self.df = df
        self.frame = df.frame.values
        self.feature = df[feature_cols].fillna(-1).values
        self.players = df[['nfl_player_id_1','nfl_player_id_2']].values
        self.game_play = df.game_play.values
        self.aug = aug
        self.mode = mode
        self.window = window
        
    def __len__(self):
        return len(self.df)
    
    # @lru_cache(1024)
    # def read_img(self, path):
    #     return cv2.imread(path, 0)
   
    def __getitem__(self, idx):   
        #I added window param to dataset to customize window without having to redefine class, only change
        window = self.window
        frame = self.frame[idx]
        
        if self.mode == 'train':
            frame = frame + random.randint(-6, 6)

        players = []
        for p in self.players[idx]:
            if p == 'G':
                players.append(p)
            else:
                players.append(int(p))
        
        imgs = []
        for view in ['Endzone', 'Sideline']:
            video = self.game_play[idx] + f'_{view}.mp4'

            tmp = video2helmets[video]
#             tmp = tmp.query('@frame-@window<=frame<=@frame+@window')
            tmp[tmp['frame'].between(frame-window, frame+window)]
            tmp = tmp[tmp.nfl_player_id.isin(players)]#.sort_values(['nfl_player_id', 'frame'])
            tmp_frames = tmp.frame.values
            tmp = tmp.groupby('frame')[['left','width','top','height']].mean()
#0.002s

            bboxes = []
            for f in range(frame-window, frame+window+1, 1):
                if f in tmp_frames:
                    x, w, y, h = tmp.loc[f][['left','width','top','height']]
                    bboxes.append([x, w, y, h])
                else:
                    bboxes.append([np.nan, np.nan, np.nan, np.nan])
            bboxes = pd.DataFrame(bboxes).interpolate(limit_direction='both').values
            bboxes = bboxes[::4]

            if bboxes.sum() > 0:
                flag = 1
            else:
                flag = 0
#0.03s
                    
            for i, f in enumerate(range(frame-window, frame+window+1, 4)):
                img_new = np.zeros((256, 256), dtype=np.float32)

                if flag == 1 and f <= video2frames[video]:
                    img = cv2.imread(f'kaggle/train/frames/{video}_{f:04d}.jpg', 0)
                    #This may need to be grayscale to work
                    #img = np.asarray(Image.fromarray(f'kaggle/train/frames/{video}_{f:04d}.jpg').convert('L'))
                    #print(img)
                    x, w, y, h = bboxes[i]

                    img = img[int(y+h/2)-128:int(y+h/2)+128,int(x+w/2)-128:int(x+w/2)+128].copy()
                    img_new[:img.shape[0], :img.shape[1]] = img
                    
                    #plt.imshow(img_new)
                
                
                imgs.append(img_new)
                
            
#0.06s
                
        feature = np.float32(self.feature[idx])

        img = np.array(imgs).transpose(1, 2, 0)    
        img = self.aug(image=img)["image"]
        label = np.float32(self.df.contact.values[idx])
        

        return img, feature, label

In [13]:
class Model(nn.Module):
    #added window, CFG, and num_classes parameters to make it easier to define models
    def __init__(self, window=24, CFG=CFG18, num_classes=500):
        super(Model, self).__init__()
        #changing to 1 to make work
        self.backbone = timm.create_model(CFG['model'], pretrained=True, num_classes=num_classes, in_chans=int((window/2)+1))
        self.mlp = nn.Sequential(
            nn.Linear(18, 64),
            nn.LayerNorm(64),
            nn.ReLU(),
            nn.Dropout(0.2),
        )
        self.fc = nn.Linear(64+num_classes*2, 1)

    def forward(self, img, feature):
        b, c, h, w = img.shape
        img = img.reshape(b*2, c//2, h, w)
        img = self.backbone(img).reshape(b, -1)
        feature = self.mlp(feature)
        y = self.fc(torch.cat([img, feature], dim=1))
        return y
    
torch.cuda.is_available()

True

In [30]:
#Bit tricky, way it works is the valid_set 1 is for a window of 1 for resnet 18, valid_set 24
#is for the 24 window resnet 50 that was trained by the first author (NOT TRAINED BY ME)
train_set,valid_set = train_test_split(train_filtered,test_size=0.05, random_state=42,stratify = train_filtered['contact'])
valid_set_1 = MyDataset(valid_set, window = 1, aug=valid_aug, mode='test')
valid_loader_1 = DataLoader(valid_set_1, batch_size=CFG18['valid_bs'], shuffle=False, num_workers=20, pin_memory=True)

valid_set_24 = MyDataset(valid_set, window = 24, aug=valid_aug, mode='test')
valid_loader_24 = DataLoader(valid_set_24, batch_size=CFG18['valid_bs'], shuffle=False, num_workers=20, pin_memory=True)


In [47]:
#Last val set is for the model which tries to take every fifth frame. I was going to predict the others
#by filling in averages between every fifth frame, but the performance was very bad on predicted frames.
train_set_mod5,valid_set_5 = train_test_split(train_filtered_step_mod_5,test_size=0.05, random_state=42,stratify = train_filtered_step_mod_5['contact'])
valid_set_5 = MyDataset(valid_set_5,window = 10, aug=valid_aug, mode='test')
valid_loader_5 = DataLoader(valid_set_5, batch_size=CFG18['valid_bs'], shuffle=False, num_workers=20, pin_memory=True)

In [45]:
print(valid_set_1.df.shape)
print(valid_set5.df.shape)

(33028, 26)
(6762, 26)


In [55]:
#load models, res_version_window_size
model18_1 = Model(window=0,CFG=CFG18, num_classes=200).to('cuda')
model18_1.load_state_dict(torch.load('res18_1.pytorch'))
model18_10 = Model(window=10, CFG=CFG18, num_classes=500).to('cuda')
model18_10.load_state_dict(torch.load('res18_10.pytorch'))
model55_24 = Model(window=24, CFG=CFG50, num_classes=500).to('cuda')
model55_24.load_state_dict(torch.load('resnet50_fold0.pt'))

<All keys matched successfully>

In [34]:

#Not a pretty way to write the evaluations, but it works 
#Repeated version with TTA, given in notebook all that is changed is the model and the loader
y_pred = []
with torch.no_grad():
    tk = tqdm(valid_loader_1, total=len(valid_loader_1))
    for step, batch in enumerate(tk):
        if(step % 4 != 3):
            img, feature, label = [x.to('cuda') for x in batch]
            output1 = model18_1(img, feature).squeeze(-1)
            output2 = model18_1(img.flip(-1), feature).squeeze(-1)
            
            y_pred.extend(0.2*(output1.sigmoid().cpu().numpy()) + 0.8*(output2.sigmoid().cpu().numpy()))
        else:
            img, feature, label = [x.to(device) for x in batch]
            output = model18_1(img.flip(-1), feature).squeeze(-1)
            y_pred.extend(output.sigmoid().cpu().numpy())    

y_pred_18_1 = np.array(y_pred)
#len(y_pred)
#print(y_pred)

100%|██████████████████████████████████████| 8257/8257 [01:11<00:00, 115.45it/s]


In [35]:
#Test on filtered by mod == 0
th = .11

res_18_1 = (y_pred_18_1 >= th).astype('int')


In [61]:
r = np.asarray(valid_set_1.df['contact'], dtype=int)

In [63]:
import sklearn
print(sklearn.metrics.matthews_corrcoef(r, res_18_1))
print(sklearn.metrics.accuracy_score(r, res_18_1))

0.25155636076972787
0.5284304226716725


In [57]:

#Not a pretty way to write the evaluations, but it works 
y_pred_18_10 = []
with torch.no_grad():
    tk = tqdm(valid_loader_5, total=len(valid_loader_5))
    for step, batch in enumerate(tk):
        if(step % 4 != 3):
            img, feature, label = [x.to('cuda') for x in batch]
            output1 = model18_10(img, feature).squeeze(-1)
            output2 = model18_10(img.flip(-1), feature).squeeze(-1)
            
            y_pred_18_10.extend(0.2*(output1.sigmoid().cpu().numpy()) + 0.8*(output2.sigmoid().cpu().numpy()))
        else:
            img, feature, label = [x.to(device) for x in batch]
            output = model18_10(img.flip(-1), feature).squeeze(-1)
            y_pred_18_10.extend(output.sigmoid().cpu().numpy())    

y_pred_18_10 = np.array(y_pred_18_10)
#len(y_pred)
#print(y_pred)

100%|███████████████████████████████████████| 1691/1691 [00:28<00:00, 58.39it/s]


In [58]:
th = .11

res_18_10 = (y_pred_18_10 >= th).astype('int')
r = np.asarray(valid_set_5.df['contact'], dtype=int)
print(sklearn.metrics.matthews_corrcoef(r, res_18_10))
print(sklearn.metrics.accuracy_score(r, res_18_10))

0.24796176862713953
0.5865128660159716


In [59]:

#Not a pretty way to write the evaluations, but it works 
y_pred_50_24 = []
with torch.no_grad():
    tk = tqdm(valid_loader_24, total=len(valid_loader_24))
    for step, batch in enumerate(tk):
        if(step % 4 != 3):
            img, feature, label = [x.to('cuda') for x in batch]
            output1 = model55_24(img, feature).squeeze(-1)
            output2 = model55_24(img.flip(-1), feature).squeeze(-1)
            
            y_pred_50_24.extend(0.2*(output1.sigmoid().cpu().numpy()) + 0.8*(output2.sigmoid().cpu().numpy()))
        else:
            img, feature, label = [x.to(device) for x in batch]
            output = model55_24(img.flip(-1), feature).squeeze(-1)
            y_pred_50_24.extend(output.sigmoid().cpu().numpy())    

y_pred_50_24 = np.array(y_pred_50_24)
#len(y_pred)
#print(y_pred)

100%|███████████████████████████████████████| 8257/8257 [04:49<00:00, 28.53it/s]


In [60]:
th = .11

res_50_24 = (y_pred_50_24 >= th).astype('int')
r = np.asarray(valid_set_24.df['contact'], dtype=int)
print(sklearn.metrics.matthews_corrcoef(r, res_50_24))
print(sklearn.metrics.accuracy_score(r, res_50_24))

0.5670892553446354
0.8720479593072544
