In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ../src/

/home/theo/kaggle/nfl_impact/src


## Imports

In [3]:
import os
import re
import cv2
import time
import json
import torch
import imageio
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from collections import Counter
from tqdm.notebook import tqdm
from skimage.transform import resize

from sklearn.metrics import roc_auc_score, f1_score

In [4]:
from params import *
from utils.plot import visualize_preds_indexed, visualize_preds
from utils.metrics import boxes_f1_score, precision_calc, get_boxes_from_df

## Load

In [5]:
df_train = pd.read_csv(DATA_PATH + 'df_train.csv')
folds = pd.read_csv(OUT_DIR + "folds.csv")
df_train = df_train.merge(folds, on="video")
df_train['truth'] = (df_train['impact'] == 1) & (df_train['confidence'] > 1) & (df_train['visibility'] > 0) 


df_val = df_train[df_train["val_idx"] == 0]

In [6]:
videos = df_val['video'].unique()

In [7]:
# PREDS_PATH = OUT_DIR + '21_12/'
# epoch = 11
# preds = pd.read_csv(PREDS_PATH + f"pred_0_fold_epoch_{epoch}_score_001.csv")

PREDS_PATH = OUT_DIR + '22_12/'
epoch = 9
preds = pd.read_csv(PREDS_PATH + f"pred_0_fold_epoch_{epoch}_score_001_aug.csv")

In [8]:
preds = preds[preds['pred'] > 0.1].reset_index(drop=True)

### Params

In [9]:
# Probability thresholding
THRESHOLD_PRED = 0.8

# Adjacency post-processing
NMS_THRESHOLD = 0.35
THRESHOLD_IOU = 0.35
MAX_DIST = 4
MIN_CLUST_SIZE = 0

# View post-processing
MIN_DIST = 6

# Impact post-processing
MAX_FRAME_DIST = 10

## Score

In [10]:
pred_val = preds[preds['pred'] > THRESHOLD_PRED].reset_index(drop=True)

In [11]:
# scores = pred_val.groupby('video').agg(list)['pred'][videos].tolist()

In [12]:
gt_boxes = get_boxes_from_df(df_val[df_val['truth'] == 1], videos)
pred_boxes = get_boxes_from_df(pred_val, videos)

In [13]:
score = boxes_f1_score(pred_boxes, gt_boxes)

print(f' -> CV score is {score:.4f}')

 -> CV score is 0.1417


### Post-processing

In [14]:
from post_processing.adjacency import post_process_adjacency

In [15]:
df_pred_pp = pred_val.copy()

In [16]:
df_pred_pp = post_process_adjacency(
    df_pred_pp,
    threshold=THRESHOLD_IOU,
    max_dist=MAX_DIST,
    min_clust_size=MIN_CLUST_SIZE,
)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=24.0), HTML(value='')))




In [17]:
pred_boxes_pp = get_boxes_from_df(df_pred_pp, videos)
score = boxes_f1_score(pred_boxes_pp, gt_boxes)

print(f' -> CV score is {score:.4f}')

 -> CV score is 0.3454


In [18]:
df_pred = preds.copy()
df_pred = pd.read_csv('../output/df_preds.csv')

In [19]:
df_pred

Unnamed: 0,gameKey,playID,view,video,frame,left,width,top,height,pred,...,pred_cls_3d_29_6,pred_cls_3d_30_1,pred_cls_3d_30_0,pred_cls_blend,pred_cls_3d_30_3,pred_cls_3d_30_6,pred_cls_3d_02_5,scores,pred_cls_3d_02_7,pred_cls_3d_02_13
0,57586,540,Endzone,57586_000540_Endzone.mp4,1,262,22,311,33,0.184864,...,0.001065,0.001398,0.000895,0.015899,0.000864,0.001965,0.000728,0.184864,0.000091,0.002800
1,57586,540,Endzone,57586_000540_Endzone.mp4,2,262,22,311,33,0.230077,...,0.001065,0.001398,0.000895,0.013188,0.000864,0.001965,0.000728,0.230077,0.000091,0.002800
2,57586,540,Endzone,57586_000540_Endzone.mp4,3,262,22,311,33,0.240801,...,0.001065,0.001398,0.000895,0.012845,0.000864,0.001965,0.000728,0.240801,0.000091,0.002800
3,57586,540,Endzone,57586_000540_Endzone.mp4,4,262,22,311,33,0.216231,...,0.001065,0.001398,0.000895,0.012742,0.000864,0.001965,0.000728,0.216231,0.000091,0.002800
4,57586,540,Endzone,57586_000540_Endzone.mp4,5,262,22,311,33,0.150308,...,0.001065,0.001398,0.000895,0.012606,0.000864,0.001965,0.000728,0.150308,0.000091,0.002800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19880,58107,4362,Sideline,58107_004362_Sideline.mp4,469,300,38,450,35,0.987954,...,0.625361,0.040857,0.090510,0.133984,0.059651,0.210945,0.005276,0.987954,0.290548,0.001630
19881,58107,4362,Sideline,58107_004362_Sideline.mp4,470,298,37,450,35,0.994125,...,0.476308,0.031630,0.177316,0.094148,0.151503,0.464457,0.003448,0.994125,0.278926,0.002118
19882,58107,4362,Sideline,58107_004362_Sideline.mp4,471,293,38,450,35,0.989209,...,0.230620,0.055825,0.240954,0.135178,0.324442,0.569888,0.014813,0.989209,0.906283,0.006567
19883,58107,4362,Sideline,58107_004362_Sideline.mp4,472,291,36,450,35,0.991600,...,0.232335,0.023035,0.312849,0.180950,0.493354,0.806137,0.061685,0.991600,0.895978,0.020090


# Classifier 3D inference

In [21]:
from inference.classifier_3d import *



### Data

In [22]:
if "nb_frame" not in df_pred.columns:
    df_max_frame = df_pred[['video', 'frame']].groupby('video').max().rename(columns={"frame": "nb_frame"}).reset_index()
    df_pred = df_pred.merge(df_max_frame, on="video")

In [23]:
df_pred_vid = df_pred[df_pred["video"] == df_pred['video'].unique()[1]]

dataset = NFLDatasetClsInference3D(
    df_pred_vid,
    visualize=True,
    stride=2,
    n_frames=9,
    root=IMG_PATH_F,
)

### Main

In [24]:
images = df_pred["image_name"].unique()

In [26]:
models = []
CP_FOLDER = "../logs_cls_3d/2021-01-02/13/"

configs = {
    "slowonly": {
        "name": "slowonly",
        "num_classes": 1,
        "num_classes_aux": 0,
        "k": 5,
        "stride": 2,
        "num_frames": 9
    },
}

for model in configs:
    models += [retrieve_model(configs[model], fold=0, log_folder=CP_FOLDER)]
    
    
preds = []
for vid in tqdm(df_pred['video'].unique()):
    df_pred_vid = df_pred[df_pred["video"] == vid]

    pred = inference(df_pred_vid, models, root=IMG_PATH_F, stride=2, n_frames=9, batch_size=128)
    preds.append(pred)
    
df_pred['pred_cls_3d_02_13'] = np.concatenate(preds)

# df_pred.to_csv('../output/df_preds.csv', index=False)


 -> Loading weights from ../logs_cls_3d/2021-01-02/13/slowonly_0.pt



FileNotFoundError: [Errno 2] No such file or directory: '../logs_cls_3d/2021-01-02/13/slowonly_0.pt'

# Post-processing

In [28]:
from post_processing.adjacency import post_process_adjacency
from post_processing.expansion import expand_boxes
from post_processing.view import post_process_view

In [29]:
df_pred = pd.read_csv('../output/df_preds.csv')

In [30]:
df_pred['scores'] = df_pred['pred']

In [62]:
weights = {  
    'pred_cls_3d_29_6': 0.25,  #r18 ext
    'pred_cls_3d_30_1': 0.25, #r34
    'pred_cls_3d_30_0': 0.25, #r18
    'pred_cls_3d_30_3': 0.25, #r18 aux
    'pred_cls_3d_02_5': 0.66, #i3d
    'pred_cls_3d_02_7': 0.66, # slowfast
    'pred_cls_3d_02_13': 0.66,  # slowonly
}

In [63]:
#     df_pred['pred_cls_3d_29_6'].values, # 1-fold - 18 extended target
# #     df_pred['pred_cls_3d_30_8'].values, # 5-fold - 18 extended target
#     df_pred['pred_cls_3d_30_1'].values, # 5-fold - 34  (31/1)
#     df_pred['pred_cls_3d_30_0'].values, # 5-fold - 18
#     df_pred['pred_cls_3d_30_3'].values, # 5-fold - 18 aux   (31/0)
#     df_pred['pred_cls_3d_02_5'].values, # 1-fold - i3d
#     df_pred['pred_cls_3d_02_7'].values, # 1-fold - slowfast
#     df_pred['pred_cls_3d_02_13'].values, # 1-fold - slowonly

In [64]:
df_pred['pred_cls_blend'] = 0

for col in weights:
    df_pred['pred_cls_blend'] += df_pred[col] * weights[col]
    
df_pred['pred_cls_blend'] /= np.sum(list(weights.values()))

### Params

In [65]:
DET_THRESHOLD = 0.35
CLS_THRESHOLD = 0.48

# Change threshold after frame
SWITCH_FRAME = 150
DET_THRESHOLD2 = 0.40
CLS_THRESHOLD2 = 0.65

# Lower thresholds for sideline
DELTA_CLS = -0.07
DELTA_DET = -0.05

# Adjacency post-processing
NMS_THRESHOLD = 0.41
MAX_FRAME_DIST = 9
N_TIMES = 1

# View post-processing
MIN_DIST = 4
VIEW_THRESHOLD = 0.86  

# Boxes expansion
R = 0.22

# Ensemble
CLS_3D_NEW = 2 #these are i3d, slowonly, slowfast
CLS_3D_OLD = 1 # these are three resnet18, and one resnet34
CLS_2D = 0 #these are three 2d classification models

### Thresholding

In [66]:
df_pred_pp = df_pred.copy()

df_pred_pp1 = df_pred_pp.loc[
    (df_pred_pp.scores > DET_THRESHOLD)
    & (df_pred_pp.frame <= SWITCH_FRAME)
    & (df_pred_pp.view == "Endzone")
]
df_pred_pp2 = df_pred_pp.loc[
    (df_pred_pp.scores > DET_THRESHOLD2)
    & (df_pred_pp.frame > SWITCH_FRAME)
    & (df_pred_pp.view == "Endzone")
]
df_pred_pp3 = df_pred_pp.loc[
    (df_pred_pp.scores > DET_THRESHOLD - DELTA_DET)
    & (df_pred_pp.frame <= SWITCH_FRAME)
    & (df_pred_pp.view == "Sideline")
]
df_pred_pp4 = df_pred_pp.loc[
    (df_pred_pp.scores > DET_THRESHOLD2 - DELTA_DET)
    & (df_pred_pp.frame > SWITCH_FRAME)
    & (df_pred_pp.view == "Sideline")
]
df_pred_pp = pd.concat([df_pred_pp1, df_pred_pp2, df_pred_pp3, df_pred_pp4], axis=0).reset_index(drop=True)

In [67]:
df_pred_pp_cls1 = df_pred_pp.copy()[
    (df_pred_pp["pred_cls_blend"] > CLS_THRESHOLD)
    & (df_pred_pp.frame <= SWITCH_FRAME)
    & (df_pred_pp.view == "Endzone")
]
df_pred_pp_cls2 = df_pred_pp.copy()[
    (df_pred_pp["pred_cls_blend"] > CLS_THRESHOLD2)
    & (df_pred_pp.frame > SWITCH_FRAME)
    & (df_pred_pp.view == "Endzone")
]
df_pred_pp_cls3 = df_pred_pp.copy()[
    (df_pred_pp["pred_cls_blend"] > CLS_THRESHOLD - DELTA_CLS)
    & (df_pred_pp.frame <= SWITCH_FRAME)
    & (df_pred_pp.view == "Sideline")
]
df_pred_pp_cls4 = df_pred_pp.copy()[
    (df_pred_pp["pred_cls_blend"] > CLS_THRESHOLD2 - DELTA_CLS)
    & (df_pred_pp.frame > SWITCH_FRAME)
    & (df_pred_pp.view == "Sideline")
]
df_pred_pp = pd.concat(
    [df_pred_pp_cls1, df_pred_pp_cls2, df_pred_pp_cls3, df_pred_pp_cls4], axis=0
).reset_index(drop=True)

In [68]:
pred_boxes_pp = get_boxes_from_df(df_pred_pp, videos)
score = boxes_f1_score(pred_boxes_pp, gt_boxes)

print(f' -> CV score is {score:.4f}')

 -> CV score is 0.2191


### Box expansion

In [69]:
df_pred_pp = expand_boxes(df_pred_pp, r=R)

### Adjacency Post-processing

In [70]:
for i in range(N_TIMES):
    df_pred_pp = post_process_adjacency(
        df_pred_pp,
        threshold=THRESHOLD_IOU,
        max_dist=MAX_DIST,
        min_clust_size=MIN_CLUST_SIZE,
    )

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=24.0), HTML(value='')))




In [71]:
# df_pred_pp= df_pred_pp[df_pred_pp['predicted_impact_type'] != "ground"]

In [72]:
pred_boxes_pp = get_boxes_from_df(df_pred_pp, videos)
score = boxes_f1_score(pred_boxes_pp, gt_boxes)

print(f' -> CV score is {score:.4f}')

 -> CV score is 0.5423


### View PP

In [73]:
if MIN_DIST > 0:
    df_pred_pp_view = post_process_view(
        df_pred_pp, 
        min_dist=MIN_DIST,
        threshold=VIEW_THRESHOLD,
        cls_col="pred_cls_blend",
    )

In [74]:
pred_boxes_pp = get_boxes_from_df(df_pred_pp_view, videos)
score = boxes_f1_score(pred_boxes_pp, gt_boxes)

print(f' -> CV score is {score:.4f}')

 -> CV score is 0.5404
