In [1]:
import os
import numpy as np
import pandas as pd

from sklearn.metrics import matthews_corrcoef
from sklearn.model_selection import train_test_split

In [2]:
!ls /kaggle/input/nfl-split

train_labels.csv      train_labels_p2p.csv  valid_labels_all.csv
train_labels_all.csv  train_p2g_all.csv     valid_labels_p2g.csv
train_labels_p2g.csv  train_p2p_all.csv     valid_labels_p2p.csv


In [3]:
!ls /kaggle/input/nfl-player-contact-detection/

sample_submission.csv	   train
test			   train_baseline_helmets.csv
test_baseline_helmets.csv  train_labels.csv
test_player_tracking.csv   train_player_tracking.csv
test_video_metadata.csv    train_video_metadata.csv


In [4]:
class CFG:
    input_dir = '/kaggle/input/nfl-player-contact-detection'
    split_dir = '/kaggle/input/nfl-split'
    # --------------------- FIXED --------------------- #
    train_labels_dir = os.path.join(input_dir, 'train_labels.csv')
    train_tracking_dir = os.path.join(input_dir, 'train_player_tracking.csv')
    test_sub_dir = os.path.join(input_dir, 'sample_submission.csv')
    test_tracking_dir = os.path.join(input_dir, 'test_player_tracking.csv')
    train_helmets_dir = os.path.join(input_dir, 'train_baseline_helmets.csv')
    test_helmets_dir = os.path.join(input_dir, 'test_baseline_helmets.csv')
    
    train_all = os.path.join(split_dir, 'train_labels.csv')
    train_labels_all = os.path.join(split_dir, 'train_labels_all.csv')
    train_labels_p2p = os.path.join(split_dir, 'train_labels_p2p.csv')
    train_labels_p2g = os.path.join(split_dir, 'train_labels_p2g.csv')
    valid_labels_all = os.path.join(split_dir, 'valid_labels_all.csv')
    valid_labels_p2p = os.path.join(split_dir, 'valid_labels_p2p.csv')
    valid_labels_p2g = os.path.join(split_dir, 'valid_labels_p2g.csv')
    train_p2p_labels = os.path.join(split_dir, 'train_p2p_all.csv')
    train_p2g_labels = os.path.join(split_dir, 'train_p2g_all.csv')
    
    labels_dict = {
        'train': train_all,
        'train_all': train_labels_all,
        'train_p2p': train_labels_p2p,
        'train_p2g': train_labels_p2g,
        'valid_all': valid_labels_all,
        'valid_p2p': valid_labels_p2p,
        'valid_p2g': valid_labels_p2g,
        'p2p_all': train_p2p_labels,
        'p2g_all': train_p2g_labels,
    }
    
    # --------------------- SPECIFY ----------------------- #
    default_labels = labels_dict['valid_all']
    
    
    def expand_contact_id(df):
        df['game_play'] = df['contact_id'].str[:12]
        df['game'] = df['contact_id'].apply(lambda s: s.split('_')[0])
        df['play'] = df['contact_id'].apply(lambda s: s.split('_')[1])
        df['step'] = df['contact_id'].apply(lambda s: s.split('_')[2]).astype(int)
        df['nfl_player_id_1'] = df['contact_id'].apply(lambda s: s.split('_')[-2])
        df['nfl_player_id_2'] = df['contact_id'].apply(lambda s: s.split('_')[-1])
        return df
    
    def merge_helmet_views(helmets):
        df = helmets.drop(columns=['play_id', 'video'])
        on_cols = ['game_key', 'game_play', 'frame', 'nfl_player_id', 'player_label']
        df_view = df[df['view']=='Endzone'].merge(
            df[df['view']=='Sideline'], on=on_cols, how='outer', suffixes=['_end', '_side'])
        del df_view['view_end'], df_view['view_side'], df_view['player_label']
        return df_view
    
    def merge_label_helmet(label, helmet):
        if 'frame' not in label.columns:
            label['frame'] = np.round(label['step']/10*59.94+5*59.94).astype(int)
        helmet = helmet.copy(deep=True)
        helmet['nfl_player_id'] = helmet['nfl_player_id'].astype(str)
        df = label.merge(helmet, 
                         left_on=['game_play', 'frame', 'nfl_player_id_1'],
                         right_on=['game_play', 'frame', 'nfl_player_id'],
                         how='left')
        df = df.merge(helmet, 
                         left_on=['game_play', 'frame', 'nfl_player_id_2'],
                         right_on=['game_play', 'frame', 'nfl_player_id'],
                         how='left', suffixes=['_p1','_p2'])
        df = df.drop(columns=['nfl_player_id_p1', 'nfl_player_id_p2', 'game_key_p1', 'game_key_p2'])
        return df

In [5]:
train_labels = pd.read_csv(CFG.default_labels, index_col=0)
train_tracking = pd.read_csv(CFG.train_tracking_dir)
# train_helmets = pd.read_csv("/kaggle/input/nfl-player-contact-detection/train_baseline_helmets.csv")
# train_video_metadata = pd.read_csv("/kaggle/input/nfl-player-contact-detection/train_video_metadata.csv")

In [6]:
def create_features(df_label, df_tracking, cols, merge_col="step"):
    df_combo = (
        df_label.astype({"nfl_player_id_1": str, "nfl_player_id_2": str})
        .merge(
            df_tracking[["game_play", merge_col, "nfl_player_id"] + cols]
                .astype({"nfl_player_id": "str"}),
            left_on=["game_play", merge_col, "nfl_player_id_1"],
            right_on=["game_play", merge_col, "nfl_player_id"],
            how="left",)
        .drop(columns=["nfl_player_id"])
        .merge(
            df_tracking[["game_play", merge_col, "nfl_player_id"] + cols]
                .astype({"nfl_player_id": "str"}),
            left_on=["game_play", merge_col, "nfl_player_id_2"],
            right_on=["game_play", merge_col, "nfl_player_id"],
            how="left",
            suffixes=['_1', '_2']
        )
        .drop(columns=["nfl_player_id"])
        .sort_values(["game_play", merge_col, "nfl_player_id_1", "nfl_player_id_2"])
        .reset_index(drop=True)
    )
    df_combo['G_flag'] = (df_combo['nfl_player_id_2']=='G').astype(int)
    
    df_combo['distance_x'] = np.abs(df_combo['x_position_1']-df_combo['x_position_2'])
    df_combo['distance_y'] = np.abs(df_combo['y_position_1']-df_combo['y_position_2'])
    dist_pow = df_combo['distance_x']**2 + df_combo['distance_y']**2
    df_combo['distance'] = np.sqrt(dist_pow)
        
    return df_combo

In [7]:
use_cols = [
    'x_position', 'y_position', 'speed', 'distance',
    'direction', 'orientation', 'acceleration', 'sa'
]

feats = ['contact_id', 'contact', 'G_flag', 'x_position_1',
       'y_position_1', 'speed_1', 'distance_1', 'direction_1', 'orientation_1',
       'acceleration_1', 'sa_1', 'x_position_2', 'y_position_2', 'speed_2',
       'distance_2', 'direction_2', 'orientation_2', 'acceleration_2', 'sa_2',
       'distance', ]

df_train = create_features(train_labels, train_tracking, use_cols)

### Add Helmets Features

In [8]:
df_train['frame'] = np.round(df_train['step']/10*59.94+5*59.94).astype(int)

In [9]:
helmets = pd.read_csv(CFG.train_helmets_dir)
# merge two helmet views of the same player
df_helmets = CFG.merge_helmet_views(helmets)
# merge helmet views of two players
df_feats = CFG.merge_label_helmet(df_train, df_helmets)

In [10]:
df_feats.columns

Index(['contact_id', 'game_play', 'datetime', 'step', 'nfl_player_id_1',
       'nfl_player_id_2', 'contact', 'game', 'play', 'x_position_1',
       'y_position_1', 'speed_1', 'distance_1', 'direction_1', 'orientation_1',
       'acceleration_1', 'sa_1', 'x_position_2', 'y_position_2', 'speed_2',
       'distance_2', 'direction_2', 'orientation_2', 'acceleration_2', 'sa_2',
       'G_flag', 'distance_x', 'distance_y', 'distance', 'frame',
       'left_end_p1', 'width_end_p1', 'top_end_p1', 'height_end_p1',
       'left_side_p1', 'width_side_p1', 'top_side_p1', 'height_side_p1',
       'left_end_p2', 'width_end_p2', 'top_end_p2', 'height_end_p2',
       'left_side_p2', 'width_side_p2', 'top_side_p2', 'height_side_p2'],
      dtype='object')

In [11]:
feats = ['contact_id', 'contact', 'G_flag', 'x_position_1',
       'y_position_1', 'speed_1', 'distance_1', 'direction_1', 'orientation_1',
       'acceleration_1', 'sa_1', 'x_position_2', 'y_position_2', 'speed_2',
       'distance_2', 'direction_2', 'orientation_2', 'acceleration_2', 'sa_2',
       'distance', ] + ['left_side_p1', 'width_side_p1', 'top_side_p1', 'height_side_p1',
       'left_end_p2', 'width_end_p2', 'top_end_p2', 'height_end_p2',
       'left_side_p2', 'width_side_p2', 'top_side_p2', 'height_side_p2']

### Save to dataset

In [12]:
for label, dir_ in CFG.labels_dict.items():
    print(label, dir_)
    train_labels = pd.read_csv(dir_)
    df_train = create_features(train_labels, train_tracking, use_cols)
    df_train['frame'] = np.round(df_train['step']/10*59.94+5*59.94).astype(int)
    helmets = pd.read_csv(CFG.train_helmets_dir)
    # merge two helmet views of the same player
    df_helmets = CFG.merge_helmet_views(helmets)
    # merge helmet views of two players
    df_feats = CFG.merge_label_helmet(df_train, df_helmets)
    df_train.to_csv(f'{label}_feats.csv')

train /kaggle/input/nfl-split/train_labels.csv
train_all /kaggle/input/nfl-split/train_labels_all.csv
train_p2p /kaggle/input/nfl-split/train_labels_p2p.csv
train_p2g /kaggle/input/nfl-split/train_labels_p2g.csv
valid_all /kaggle/input/nfl-split/valid_labels_all.csv
valid_p2p /kaggle/input/nfl-split/valid_labels_p2p.csv
valid_p2g /kaggle/input/nfl-split/valid_labels_p2g.csv
p2p_all /kaggle/input/nfl-split/train_p2p_all.csv
p2g_all /kaggle/input/nfl-split/train_p2g_all.csv


In [13]:
df_train

Unnamed: 0.1,Unnamed: 0,contact_id,game_play,datetime,step,nfl_player_id_1,nfl_player_id_2,contact,game,play,...,distance_2,direction_2,orientation_2,acceleration_2,sa_2,G_flag,distance_x,distance_y,distance,frame
0,244,58168_003392_0_37084_G,58168_003392,2020-09-11T03:01:48.100Z,0,37084,G,0,58168,3392,...,,,,,,1,,,,300
1,241,58168_003392_0_37211_G,58168_003392,2020-09-11T03:01:48.100Z,0,37211,G,0,58168,3392,...,,,,,,1,,,,300
2,240,58168_003392_0_38556_G,58168_003392,2020-09-11T03:01:48.100Z,0,38556,G,0,58168,3392,...,,,,,,1,,,,300
3,242,58168_003392_0_38567_G,58168_003392,2020-09-11T03:01:48.100Z,0,38567,G,0,58168,3392,...,,,,,,1,,,,300
4,231,58168_003392_0_38590_G,58168_003392,2020-09-11T03:01:48.100Z,0,38590,G,0,58168,3392,...,,,,,,1,,,,300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410628,4721613,58582_003121_91_48220_G,58582_003121,2021-10-12T02:42:29.100Z,91,48220,G,0,58582,3121,...,,,,,,1,,,,845
410629,4721606,58582_003121_91_52493_G,58582_003121,2021-10-12T02:42:29.100Z,91,52493,G,0,58582,3121,...,,,,,,1,,,,845
410630,4721612,58582_003121_91_52500_G,58582_003121,2021-10-12T02:42:29.100Z,91,52500,G,0,58582,3121,...,,,,,,1,,,,845
410631,4721596,58582_003121_91_52609_G,58582_003121,2021-10-12T02:42:29.100Z,91,52609,G,0,58582,3121,...,,,,,,1,,,,845


In [14]:
!ls /kaggle/working

__notebook__.ipynb   train_feats.csv	  valid_p2g_feats.csv
p2g_all_feats.csv    train_p2g_feats.csv  valid_p2p_feats.csv
p2p_all_feats.csv    train_p2p_feats.csv
train_all_feats.csv  valid_all_feats.csv


## Test

In [15]:
test_labels = CFG.expand_contact_id(pd.read_csv(CFG.test_sub_dir))
test_tracking = pd.read_csv(CFG.test_tracking_dir)
# test_helmets = pd.read_csv("/kaggle/input/nfl-player-contact-detection/test_baseline_helmets.csv")
use_cols = [
    'x_position', 'y_position', 'speed', 'distance',
    'direction', 'orientation', 'acceleration', 'sa'
]

feats = ['contact_id', 'contact', 'G_flag', 'x_position_1',
       'y_position_1', 'speed_1', 'distance_1', 'direction_1', 'orientation_1',
       'acceleration_1', 'sa_1', 'x_position_2', 'y_position_2', 'speed_2',
       'distance_2', 'direction_2', 'orientation_2', 'acceleration_2', 'sa_2',
       'distance', ]

df_test = create_features(test_labels, test_tracking, use_cols)


In [16]:
df_test['frame'] = np.round(df_test['step']/10*59.94+5*59.94).astype(int)
test_helmets = pd.read_csv(CFG.test_helmets_dir)
# merge two helmet views of the same player
df_test_helmets = CFG.merge_helmet_views(test_helmets)
# merge helmet views of two players
df_feats = CFG.merge_label_helmet(df_test, df_test_helmets)

In [17]:
df_feats = CFG.expand_contact_id(df_feats)
df_feats

Unnamed: 0,contact_id,contact,game_play,game,play,step,nfl_player_id_1,nfl_player_id_2,x_position_1,y_position_1,...,top_side_p1,height_side_p1,left_end_p2,width_end_p2,top_end_p2,height_end_p2,left_side_p2,width_side_p2,top_side_p2,height_side_p2
0,58168_003392_0_37084_37211,0,58168_003392,58168,003392,0,37084,37211,41.90,20.08,...,463.0,17.0,149.0,26.0,264.0,32.0,374.0,15.0,511.0,18.0
1,58168_003392_0_37084_38556,0,58168_003392,58168,003392,0,37084,38556,41.90,20.08,...,463.0,17.0,1212.0,28.0,284.0,22.0,544.0,14.0,282.0,18.0
2,58168_003392_0_37084_38567,0,58168_003392,58168,003392,0,37084,38567,41.90,20.08,...,463.0,17.0,380.0,26.0,274.0,35.0,439.0,15.0,457.0,20.0
3,58168_003392_0_37084_38590,0,58168_003392,58168,003392,0,37084,38590,41.90,20.08,...,463.0,17.0,808.0,25.0,287.0,34.0,468.0,14.0,370.0,18.0
4,58168_003392_0_37084_39947,0,58168_003392,58168,003392,0,37084,39947,41.90,20.08,...,463.0,17.0,940.0,26.0,278.0,33.0,474.0,14.0,340.0,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49583,58172_003247_125_52521_52939,0,58172_003247,58172,003247,125,52521,52939,23.44,4.04,...,,,681.0,15.0,313.0,15.0,837.0,30.0,376.0,30.0
49584,58172_003247_125_52521_G,0,58172_003247,58172,003247,125,52521,G,23.44,4.04,...,,,,,,,,,,
49585,58172_003247_125_52852_52939,0,58172_003247,58172,003247,125,52852,52939,32.67,2.18,...,311.0,31.0,681.0,15.0,313.0,15.0,837.0,30.0,376.0,30.0
49586,58172_003247_125_52852_G,0,58172_003247,58172,003247,125,52852,G,32.67,2.18,...,311.0,31.0,,,,,,,,
