In [2]:
import numpy as np
import pandas as pd

import sklearn.metrics as mtr
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import keras
from keras.models import Sequential
from keras.callbacks import Callback, EarlyStopping
from keras.models import Model
from keras.layers import Input, Dense, Concatenate, Reshape, Dropout, merge, Add
from keras.layers.embeddings import Embedding

from sklearn.model_selection import KFold,GroupKFold

import warnings
import random as rn
import math
import datetime
import tensorflow as tf
from keras.models import load_model
import os
import tqdm

warnings.filterwarnings("ignore")
pd.options.display.max_columns = 200

In [3]:
# evaluation metric
def crps(y_true, y_pred):
    y_true = np.clip(np.cumsum(y_true, axis=1), 0, 1)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
    return ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * y_true.shape[0]) 


# author : nlgn
# Link : https://www.kaggle.com/kingychiu/keras-nn-starter-crps-early-stopping
class Metric(Callback):
    def __init__(self, model, callbacks, data):
        super().__init__()
        self.model = model
        self.callbacks = callbacks
        self.data = data

    def on_train_begin(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_begin(logs)

    def on_train_end(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_end(logs)

    def on_epoch_end(self, batch, logs=None):
        X_train, y_train = self.data[0][0], self.data[0][1]
        y_pred = self.model.predict(X_train)
        y_true = np.clip(np.cumsum(y_train, axis=1), 0, 1)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
        tr_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_train[-1].shape[0])
        tr_s = np.round(tr_s, 6)
        logs['tr_CRPS'] = tr_s

        X_valid, y_valid = self.data[1][0], self.data[1][1]

        y_pred = self.model.predict(X_valid)
        y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
        val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_valid[-1].shape[0])
        val_s = np.round(val_s, 6)
        logs['val_CRPS'] = val_s
        print('tr CRPS', tr_s, 'val CRPS', val_s)

        for callback in self.callbacks:
            callback.on_epoch_end(batch, logs)

In [4]:
def create_features(df):

    def back_features(df):
        carriers = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','NflIdRusher','X','Y','Orientation','Dir','YardLine']]
        carriers['RusherDisYardLine'] = carriers['YardLine'] - carriers['X']
        carriers['back_oriented_down_field'] = carriers['Orientation'].apply(lambda x: back_direction(x))
        carriers['back_moving_down_field'] = carriers['Dir'].apply(lambda x: back_direction(x))
        carriers = carriers.rename(columns={'X':'back_X',
                                            'Y':'back_Y'})
        carriers = carriers[['GameId','PlayId','NflIdRusher','back_X','back_Y',
                             'RusherDisYardLine','back_oriented_down_field','back_moving_down_field']]
        return carriers

    def features_relative_to_back(df, carriers):
        player_distance = df[['GameId','PlayId','NflId','X','Y']]
        player_distance = pd.merge(player_distance, carriers, on=['GameId','PlayId'], how='inner')
        player_distance = player_distance[player_distance['NflId'] != player_distance['NflIdRusher']]
        player_distance['dist_to_back'] = player_distance[['X','Y','back_X','back_Y']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        player_distance = player_distance.groupby(['GameId','PlayId','RusherDisYardLine','back_oriented_down_field','back_moving_down_field'])\
                                         .agg({'dist_to_back':['min','max','mean','std']})\
                                         .reset_index()
        player_distance.columns = ['GameId','PlayId','RusherDisYardLine','back_oriented_down_field','back_moving_down_field',
                                   'min_dist','max_dist','mean_dist','std_dist']
        return player_distance
def create_features(df):
    
    def new_X(x_coordinate, play_direction):
        if play_direction == 'left':
            return 120.0 - x_coordinate
        else:
            return x_coordinate

    def new_line(rush_team, field_position, yardline):
        if rush_team == field_position:
            # offense starting at X = 0 plus the 10 yard endzone plus the line of scrimmage
            return 10.0 + yardline
        else:
            # half the field plus the yards between midfield and the line of scrimmage
            return 60.0 + (50 - yardline)

    def new_orientation(angle, play_direction):
        if play_direction == 'left':
            new_angle = 360.0 - angle
            if new_angle == 360.0:
                new_angle = 0.0
            return new_angle
        else:
            return angle

    def euclidean_distance(x1,y1,x2,y2):
        x_diff = (x1-x2)**2
        y_diff = (y1-y2)**2
        return np.sqrt(x_diff + y_diff)

    def back_direction(orientation):
        if orientation > 180.0:
            return 1
        else:
            return 0

    def map_team_name(df):
        map_abbr = {'ARI': 'ARZ', 'BAL': 'BLT', 'CLE': 'CLV', 'HOU': 'HST'}
        for abb in df['PossessionTeam'].unique():
            map_abbr[abb] = abb
        df['PossessionTeam'] = df['PossessionTeam'].map(map_abbr)
        for abb in df['HomeTeamAbbr'].unique():
            map_abbr[abb] = abb
        df['HomeTeamAbbr'] = df['HomeTeamAbbr'].map(map_abbr)
        for abb in df['VisitorTeamAbbr'].unique():
            map_abbr[abb] = abb
        df['VisitorTeamAbbr'] = df['VisitorTeamAbbr'].map(map_abbr)
        for abb in df['FieldPosition'].unique():
            map_abbr[abb] = abb
        df['FieldPosition'] = df['FieldPosition'].map(map_abbr)
        return df
    
    def clean_position(df):
        def get_position(pos):
            if pos == 'SAF':
                return 'DB'
            if pos == 'S':
                return 'DB'
            elif pos == 'OG':
                return 'G'
            elif pos == "OT":
                return 'T'
            else:
                return pos
        df['Position'] = df['Position'].apply(get_position)
        return df
            
    def update_yardline(df):
        new_yardline = df[df['NflId'] == df['NflIdRusher']]
        new_yardline['YardLine'] = new_yardline[['PossessionTeam','FieldPosition','YardLine']].apply(lambda x: new_line(x[0],x[1],x[2]), axis=1)
        new_yardline = new_yardline[['GameId','PlayId','YardLine']]
        return new_yardline

    def update_orientation(df, yardline):
        df['X'] = df[['X','PlayDirection']].apply(lambda x: new_X(x[0],x[1]), axis=1)
        df['Orientation'] = df[['Orientation','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)
        df['Dir'] = df[['Dir','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)
        df = df.drop('YardLine', axis=1)
        df = pd.merge(df, yardline, on=['GameId','PlayId'], how='inner')
        return df

    def back_features(df):
        carriers = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','NflIdRusher','X','Y','Orientation','Dir','YardLine']]
        carriers['RusherDisYardLine'] = carriers['YardLine'] - carriers['X']
        carriers['back_oriented_down_field'] = carriers['Orientation'].apply(lambda x: back_direction(x))
        carriers['back_moving_down_field'] = carriers['Dir'].apply(lambda x: back_direction(x))
        carriers = carriers.rename(columns={'X':'back_X',
                                            'Y':'back_Y'})
        carriers = carriers[['GameId','PlayId','NflIdRusher','back_X','back_Y',
                             'RusherDisYardLine','back_oriented_down_field','back_moving_down_field']]
        return carriers

    def features_relative_to_back(df, carriers):
        player_distance = df[['GameId','PlayId','NflId','X','Y']]
        player_distance = pd.merge(player_distance, carriers, on=['GameId','PlayId'], how='inner')
        player_distance = player_distance[player_distance['NflId'] != player_distance['NflIdRusher']]
        player_distance['dist_to_back'] = player_distance[['X','Y','back_X','back_Y']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        player_distance = player_distance.groupby(['GameId','PlayId','RusherDisYardLine','back_oriented_down_field','back_moving_down_field'])\
                                         .agg({'dist_to_back':['min','max','mean','std']})\
                                         .reset_index()
        player_distance.columns = ['GameId','PlayId','RusherDisYardLine','back_oriented_down_field','back_moving_down_field',
                                   'min_dist','max_dist','mean_dist','std_dist']
        return player_distance
    
    def create_general_position(df):
        def get_general_position(pos):
            if pos == 'SS' or pos == 'FS' or pos == 'CB' or pos == 'DB':
                return 'DB'
            elif pos == 'DE' or pos == 'DT' or pos == 'DL':
                return 'DL'
            elif pos == 'ILB' or pos == 'OLB' or pos == 'MLB' or pos == 'LB':
                return 'LB'
            elif pos == 'WR':
                return 'WR'
            elif pos == 'TE':
                return 'TE'
            elif pos == 'T' or pos == 'G' or pos == 'C' or pos == 'NT' or pos == 'OL':
                return 'OL'
            elif pos == 'QB' or pos == 'RB' or pos == 'FB' or pos == 'HB' or pos == 'TB' or pos == 'WB':
                return 'OB'
            else:
                return 'Other'
        df['GeneralPosition'] = df['Position'].apply(get_general_position)
        return df
    
    def get_team_on_offense(df):
        df['TeamOnOffense'] = "home"
        df.loc[df.PossessionTeam != df.HomeTeamAbbr, 'TeamOnOffense'] = "away"
        df['IsOnOffense'] = df.Team == df.TeamOnOffense 
        return df
    
    def map_offense_defense_team(df):
        df['OffenseTeam'] = df['VisitorTeamAbbr']
        df.loc[df.TeamOnOffense == 'home', 'OffenseTeam'] = df['HomeTeamAbbr']    
        df['DefenseTeam'] = df['VisitorTeamAbbr']
        df.loc[df.TeamOnOffense == 'away', 'DefenseTeam'] = df['HomeTeamAbbr']
        df['IsOffenseAtHome'] = True
        df.loc[df.TeamOnOffense == 'away', 'IsOffenseAtHome'] = False
        return df
    
    def get_is_offense_winning(df):
        df['OffenseScore'] = df['HomeScoreBeforePlay']
        df.loc[df.TeamOnOffense == 'away', 'OffenseScore'] = df['VisitorScoreBeforePlay']
        df['DefenseScore'] = df['VisitorScoreBeforePlay']
        df.loc[df.TeamOnOffense == 'away', 'DefenseScore'] = df['HomeScoreBeforePlay']
        df['OffenseLessDefenseScore'] = df['OffenseScore'] - df['DefenseScore']
        df['OffenseInOwnTerritory'] = False
        df.loc[df.FieldPosition == df.OffenseTeam, 'OffenseInOwnTerritory'] = True
        df.drop(['OffenseScore','DefenseScore'], axis=1, inplace=True)
        return df

    def get_general_pos_counts(df):
        df['NumberOfBacksOnPlay'] = 0
        df['NumberOfOLinemenOnPlay'] = 0
        df['NumberOfWRsOnPlay'] = 0
        df['NumberOfTEsOnPlay'] = 0
        df['NumberOfDBsOnPlay'] = 0
        df['NumberOfDLinemenOnPlay'] = 0 
        df['NumberOfLBsOnPlay'] = 0
        # Pivot to find counts of each general position
        gen_pos_counts = df[['PlayId','GeneralPosition']].pivot_table(index='PlayId', columns='GeneralPosition', 
                                                                      aggfunc=len, fill_value=0)
        gen_pos_counts = gen_pos_counts.rename(columns = 
                              {'DB':'NumberOfDBsOnPlay', 'DL':'NumberOfDLinemenOnPlay', 
                               'LB':'NumberOfLBsOnPlay', 'OB':'NumberOfBacksOnPlay',
                               'OL':'NumberOfOLinemenOnPlay', 'TE':'NumberOfTEsOnPlay',
                               'WR':'NumberOfWRsOnPlay'})
        gen_pos_counts = gen_pos_counts.reset_index(drop=False)
        del gen_pos_counts.columns.name
        gen_pos_counts_cols = gen_pos_counts.columns.values.tolist()
        gen_pos_counts = gen_pos_counts.loc[gen_pos_counts.index.repeat(22)].reset_index(drop=True)
        df.update(gen_pos_counts)
        return df

    def utc2sec(x):
        return int(x.split("-")[2].split(":")[2].split(".")[0])
    def gameclock2secs(x):
        clock = x.split(":")
        return (60 * int(clock[0])) + int(clock[1])        
    
    def str_to_float(txt):
        try:
            return float(txt)
        except:
            return -1

    def get_time_features(df):
        df['TimeBetweenSnapHandoff'] = df['TimeHandoff'].apply(utc2sec) - df['TimeSnap'].apply(utc2sec)
        df['QuarterGameSecs'] = df['GameClock'].apply(gameclock2secs)
        df['TotalGameSecsPlayed'] = (900 - df['QuarterGameSecs']) + ((df['Quarter'] - 1) * 900)
        df['HalfGameSecsLeft'] = df['QuarterGameSecs']
        df.loc[(df['Quarter'].isin([1,3])), 'HalfGameSecsLeft'] = (900 + df['QuarterGameSecs'])
        return(df)
    
    def get_player_age(df):
        def timesnap2day(x):
            days = x.split("-")
            return 365 * int(days[0]) + 30 * int(days[1]) + int(days[2][:2])    
        def birthday2day(x):
            days = x.split("/")
            return 30 * int(days[0]) + int(days[1]) + 365 * int(days[2])        
        df['PlayerAge'] = df['TimeSnap'].apply(timesnap2day) - df['PlayerBirthDate'].apply(birthday2day)
        df.drop('PlayerBirthDate', axis=1, inplace=True)
        return df
        
    def get_player_weights_bmi(df):
        def height2inch(x):
            height = x.split("-")
            return 12 * int(height[0]) + int(height[1])
        df['PlayerHeight'] = df['PlayerHeight'].apply(height2inch)
        df = df.rename(columns={'PlayerWeight':'PlayerMass'})
        df['PlayerBMI'] = df['PlayerMass'] / df['PlayerHeight']
        return df
    def get_is_rusher(df):
        df['IsRusher'] = df.NflId == df.NflIdRusher 
        return df

    def get_redzone(df):
        df['InOffenseRedzone'] = False
        df.loc[df.YardLine <= 30, 'InOffenseRedzone'] = True
        df['InDefenseRedzone'] = False
        df.loc[df.YardLine >= 90, 'InDefenseRedzone'] = True    
        return df
    
    def get_qb_kneel(df):
        df['QBKneel'] = False
        df.loc[
            ((df.Quarter == 2) | (df.Quarter == 4)) &
            (df.GameClock <= '02:00') &
            (df.OffenseLessDefenseScore > 0) &
            (df.NumberOfBacksOnPlay >= 3) &
            (df.NumberOfTEsOnPlay >= 2), 'QBKneel'
        ] = True
        return df

    def get_dis_yardline(df):
        """
        For defender use only
        """
        df['DisYardLine'] = 0
        df.loc[df.IsOnOffense == True, 'DisYardLine'] = df['YardLine'] - df['X']
        df.loc[df.IsOnOffense == False, 'DisYardLine'] = df['X'] - df['YardLine']
        return df
    
    def get_no_defenders_yl(df):
        df['NoDefenderYL'] = 'NaN'
        df.loc[(df.IsOnOffense == False) & (df.DisYardLine < 0), 'NoDefenderYL'] = 'NoDefendersBelow0YL'
        df.loc[(df.IsOnOffense == False) & ((df.DisYardLine >= 0) & (df.DisYardLine < 3)), 'NoDefenderYL'] = 'NoDefenders0_2YL'
        df.loc[(df.IsOnOffense == False) & ((df.DisYardLine >= 3) & (df.DisYardLine < 6)), 'NoDefenderYL'] = 'NoDefenders3_5YL'
        df.loc[(df.IsOnOffense == False) & ((df.DisYardLine >= 6) & (df.DisYardLine < 9)), 'NoDefenderYL'] = 'NoDefenders6_8YL'
        df.loc[(df.IsOnOffense == False) & (df.DisYardLine >= 9), 'NoDefenderYL'] = 'NoDefendersAbove9YL'
        df['NoDefendersBelow0YL'] = 0
        df['NoDefenders0_2YL'] = 0
        df['NoDefenders3_5YL'] = 0
        df['NoDefenders6_8YL'] = 0
        df['NoDefendersAbove9YL'] = 0
        # Pivot to find counts of each general position
        no_defenders = df[['PlayId','NoDefenderYL']].pivot_table(index='PlayId', columns='NoDefenderYL', 
                                                                      aggfunc=len, fill_value=0)
        no_defenders = no_defenders.reset_index(drop=False).drop('NaN', axis=1)
        del no_defenders.columns.name
        no_defenders_cols = no_defenders.columns.values.tolist()
        no_defenders = no_defenders.loc[no_defenders.index.repeat(22)].reset_index(drop=True)
        df.update(no_defenders)
        return df
    
    def get_inside_runs(df):
        # Outside runs
        #df['IsOutsideRun'] = 0
        #outside1 = df[ # Outside seams and running out
        #    (((df.RusherY > -2.00) & (df.RusherY <= 23.55)) & ((df.RusherDir > 90) & (df.RusherDir <= 270))) | 
        #    (((df.RusherY > 29.75) & (df.RusherY <= 55.00)) & ((df.RusherDir > 270) | (df.RusherDir <=  90)))
        #]['PlayId']
        #outside2 = df[ # Inside seams and running out
        #    (((df.RusherY > 23.55) & (df.RusherY <= 29.75)) & ((df.RusherDir > 270) | (df.RusherDir <= 40))) |
        #    (((df.RusherY > 23.55) & (df.RusherY <= 29.75)) & ((df.RusherDir > 140) & (df.RusherDir <= 270)))
        #]['PlayId']
        #outside = outside1.tolist() + outside2.tolist()
        
        # Inside runs
        df['IsInside'] = 0
        inside1 = df[ # Outside seams and running in
            (((df.RusherY > -2.00) & (df.RusherY <= 23.55)) & ((df.RusherDir > 270) | (df.RusherDir <= 90))) |
            (((df.RusherY > 29.75) & (df.RusherY <= 55.00)) & ((df.RusherDir > 90) & (df.RusherDir <= 270)))
        ]['PlayId']
        inside2 = df[ # Inside the seams and running in
            (((df.RusherY > 23.55) & (df.RusherY <= 29.75)) & ((df.RusherDir > 40) & (df.RusherDir <= 140)))
        ]['PlayId']
        inside = inside1.tolist() + inside2.tolist()
        df.loc[df.PlayId.isin(inside), 'IsInside'] = 1
        return df
    
    def get_dis_from_yl(df):
        """
        For both off and def
        """
        df['DisFromYL'] = abs(df['YardLine'] - df['X'])
        return df
    
    def get_dis_rusher(df):
        rusher_xy = df.loc[df.IsRusher == True, ['GameId','PlayId','X','Y']].rename(columns={'X':'RusherX','Y':'RusherY'})
        df = df.merge(rusher_xy, on=['GameId','PlayId'])
        df['DisRusher'] = df[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        df.drop(['RusherX','RusherY'], axis=1,inplace=True)
        return df

    def get_dis_features(df):
        """
        Returns DisRusherNearestYardLine, RusherDisQB, RusherDisC and RusherDisMLB, DisC, DisQB
        """
        def get_rusher_dis_mlb(df):
            lb_xy = df.loc[(df.Position == 'MLB') | (df.Position == 'ILB'), 
                                   ['PlayId','X','Y']].rename(columns={'X':'MLBX', 'Y':'MLBY'})
            rusher_lb_xy = lb_xy.merge(rusher_xy, on=['PlayId'], how='left')
            rusher_lb_xy['RusherDisMLB'] = rusher_lb_xy[
                ['RusherX','RusherY','MLBX','MLBY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
            rusher_lb_xy.drop(['RusherX','RusherY','MLBX','MLBY'],axis=1, inplace=True)
            rusher_lb_dis = rusher_lb_xy.groupby(['PlayId']).agg({'RusherDisMLB':['min'],}).reset_index()
            rusher_lb_dis.columns = ['PlayId','RusherDisMLB']
            return rusher_lb_dis
        
        rusher_xy = df.loc[df.IsRusher == True, ['PlayId','X','Y']].rename(columns={'X':'RusherX','Y':'RusherY'})
        qb_xy = df.loc[df.Position == 'QB', ['PlayId','X','Y']].rename(columns={'X':'QBX','Y':'QBY'})
        c_xy = df.loc[df.Position == 'C', ['PlayId','X','Y']].rename(columns={'X':'CX','Y':'CY'})
        try:
            rusher_lb_dis = get_rusher_dis_mlb(df)
        except:
            rusher_lb_dis = np.nan
        rusherxy_qbxy = rusher_xy.merge(qb_xy, on=['PlayId'])
        rusherxy_qbxy_cxy = rusherxy_qbxy.merge(c_xy, on=['PlayId'])
        try:
            dis_total_xy = rusherxy_qbxy_cxy.merge(rusher_lb_dis, on=['PlayId'])
        except:
            dis_total_xy = rusherxy_qbxy_cxy
            dis_total_xy['RusherDisMLB'] = np.nan
        dis_total_xy = dis_total_xy.loc[dis_total_xy.index.repeat(22)].reset_index(drop=True)
        dis_total_xy.drop(['PlayId'], axis=1, inplace=True)
        df['RusherX'] = 0
        df['RusherY'] = 0
        df['QBX'] = 0
        df['QBY'] = 0
        df['CX'] = 0
        df['CY'] = 0
        df['RusherDisMLB'] = 0
        df.update(dis_total_xy) 

        df['DisRusherNearestYardLine'] = df[['YardLine','RusherY','X','Y']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)    
        df['RusherDisQB'] = df[['RusherX','RusherY','QBX','QBY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)    
        df['RusherDisC'] = df[['RusherX','RusherY','CX','CY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        df['DisC'] = df[['X','Y','CX','CY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)    
        df['DisQB'] = df[['X','Y','QBX','QBY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)    

        df.drop(['RusherX','RusherY','QBX','QBY','CX','CY'], axis=1,inplace=True)
        return df 

    
    def get_team_aggs(df, col, for_offense=True):
        aggs = ['Avg','Min','Max','Std']
        if for_offense == True:
            team_agg = df[df.IsOnOffense == True][['PlayId'] + [col]]
            team_agg = df[['PlayId'] + [col]]
            team_agg = team_agg.groupby(['PlayId']).agg({col:['mean','min','max','std']}).reset_index()
            avg_col = 'AvgOffense' + col
            min_col = 'MinOffense' + col
            max_col = 'MaxOffense' + col
            std_col = 'StdOffense' + col
        if for_offense == False:
            team_agg = df[df.IsOnOffense == False][['PlayId'] + [col]]
            team_agg = team_agg.groupby(['PlayId']).agg({col:['mean','min','max','std']}).reset_index()
            avg_col = 'AvgDefense' + col
            min_col = 'MinDefense' + col
            max_col = 'MaxDefense' + col
            std_col = 'StdDefense' + col
                
        team_agg.drop(['PlayId'], axis=1, inplace=True)
        team_agg_cols = [avg_col,min_col,max_col,std_col]
        team_agg.columns = team_agg_cols
        team_agg = team_agg.loc[team_agg.index.repeat(22)].reset_index(drop=True)
        for col in team_agg_cols:
            df[col] = 0
        df.update(team_agg)
        return df
    
    def get_rusher_dis_mlb_inside(df):
        try:
            df['RusherDisMLBByIsInside'] = (1 / df['RusherDisMLB']) * df['IsInside']
            df['RusherDisMLBByIsInside'] = df['RusherDisMLBByIsInside'].replace([np.inf, -np.inf], np.nan)
            return df
        except:
            df['RusherDisMLBByIsInside'] = np.nan
            return df
        
    def get_yards_by_down(df):
        df['YardsByDownSqrt'] = (df['Distance'] * df['Down']) **(1/2)
        return df
    
    def get_diff_rusher_dir_otation(df):
        df['DiffRusherDirOtation'] = df['RusherDir'] - df['RusherOrientation']
        return df
    
    def get_mech_feats(df):
        df['Weight'] = df['PlayerMass'] * 9.806 # acceleration gravity
        df['ChangeTime'] = df['Dis'] / df['S']
        df['Force'] = df['PlayerMass'] * df['A']
        df['Momentum'] = df['PlayerMass'] * df['S']
        df['KE'] = 0.5 * df['PlayerMass'] * (df['S']**2)
        df['Work'] = df['Force'] * df['Dis']
        df['Power'] = df['Work'] / df['ChangeTime']
        df['Impulse'] = df['Force'] * df['ChangeTime']
        df['theta'] = 0
        dir_0_to_90 = (df.Dir > 0) & (df.Dir < 90)
        df.loc[dir_0_to_90, 'theta'] = (df.loc[dir_0_to_90, 'Dir']).apply(math.radians)
        dir_90_to_180 = (df.Dir > 90) & (df.Dir < 180)
        df.loc[dir_90_to_180, 'theta'] = (180 - df.loc[dir_90_to_180, 'Dir']).apply(math.radians)
        dir_180_to_270 = (df.Dir > 180) & (df.Dir < 270)
        df.loc[dir_180_to_270, 'theta'] = (df.loc[dir_180_to_270, 'Dir'] - 180).apply(math.radians)
        dir_270_to_360 = (df.Dir > 270) & (df.Dir < 360)
        df.loc[dir_270_to_360, 'theta'] = (360 - df.loc[dir_270_to_360, 'Dir']).apply(math.radians)
        dir_0_180_360 = (df.Dir == 0) | (df.Dir == 180) | (df.Dir == 360)
        df.loc[dir_0_180_360, 'theta'] = 0
        dir_90_270 = (df.Dir == 90) | (df.Dir == 270)
        df.loc[dir_90_270, 'theta'] = 90
        df['SY'] = np.abs(df['S'] * np.cos(df.theta))
        df['SX'] = np.abs(df['S'] * np.sin(df.theta))    
        df['ForceY'] = np.abs(df['Force'] * np.cos(df.theta))
        df['ForceX'] = np.abs(df['Force'] * np.sin(df.theta))
        df['MomentumY'] = np.abs(df['Momentum'] * np.cos(df.theta))
        df['MomentumX'] = np.abs(df['Momentum'] * np.sin(df.theta))
        df['WorkY'] = np.abs(df['Work'] * np.cos(df.theta))
        df['WorkX'] = np.abs(df['Work'] * np.sin(df.theta))
        df['PowerY'] = np.abs(df['Power'] * np.cos(df.theta))
        df['PowerX'] = np.abs(df['Power'] * np.sin(df.theta))
        df['ImpulseY'] = np.abs(df['Impulse'] * np.cos(df.theta))
        df['ImpulseX'] = np.abs(df['Impulse'] * np.sin(df.theta))
        del [dir_0_to_90,dir_90_to_180,dir_180_to_270,dir_270_to_360,dir_0_180_360,dir_90_270]
        df['player_adj'] = np.abs(df.RusherY - df.Y)
        df['player_opp'] = np.abs(df.RusherX - df.X)
        df['player_theta'] = np.degrees((df.player_adj / df.player_opp).apply(math.atan))
        df.loc[(df.Y > df.RusherY) & (df.X > df.RusherX), 'player_theta'] = np.radians(df.player_theta)
        df.loc[(df.Y < df.RusherY) & (df.X > df.RusherX), 'player_theta'] = np.radians(180 - df.player_theta)
        df.loc[(df.Y < df.RusherY) & (df.X < df.RusherX), 'player_theta'] = np.radians(180 + df.player_theta)
        df.loc[(df.Y > df.RusherY) & (df.X < df.RusherX), 'player_theta'] = np.radians(360 - df.player_theta)
        df['PlayerAngleToRusher'] = np.abs(df.rusher_theta - df.player_theta)
        df.drop(['ChangeTime','theta'],axis=1,inplace=True)
        return df
    
    def get_gen_position_feats(df, position): 
        pos_feat = df.loc[df.GeneralPosition == position, ['PlayId','A','S','Dir',
                                                    'Orientation','Dis']]
        pos_feat = pos_feat.rename(columns={'A':position+'A','S':position+'S','Dir':position+'Dir',
                                            'Orientation':position+'Orientation',
                                            'Dis':position+'Dis','PlayerMass':position+'Weight',
                                            'PlayerHeight':position+'Height'})
        pos_feat = pos_feat.groupby(['PlayId']).agg(
            {position+'A':['mean','min','max'], 
             position+'S':['mean','min','max'], 
             position+'Dir':['mean','min','max'], 
             position+'Orientation':['mean','min','max'], 
             position+'Dis':['mean','min','max']}).reset_index()
        pos_feat.columns = [''.join(col) for col in pos_feat.columns.values]
        pos_feat_columns = pos_feat.columns.tolist()
        pos_feat_columns.remove('PlayId')
        pos_feat.drop('PlayId',axis=1,inplace=True)
        pos_feat = pos_feat.loc[pos_feat.index.repeat(22)].reset_index(drop=True)
        for feat in pos_feat_columns:
            df[feat] = 0
        df.update(pos_feat)
        return df

    def get_off_less_def_feats(df, feat):
        off_feat = df.loc[df.IsOnOffense == True, ['PlayId',feat]]
        off_feat = off_feat.groupby(['PlayId']).agg({feat:['sum']}).reset_index()
        off_feat.drop('PlayId', axis=1,inplace=True)
        off_feat.columns = ['Off'+feat]
        def_feat = df.loc[df.IsOnOffense == False, ['PlayId',feat]]
        def_feat = def_feat.groupby(['PlayId']).agg({feat:['sum']}).reset_index()
        def_feat.drop('PlayId', axis=1,inplace=True)
        def_feat.columns = ['Def'+feat]
        off_def_feat = pd.DataFrame(off_feat['Off'+feat] - def_feat['Def'+feat], columns=['OffLessDef'+feat])

        df['OffLessDef'+feat] = 0
        off_def_feat = off_def_feat.loc[off_def_feat.index.repeat(22)].reset_index(drop=True)
        df.update(off_def_feat)
        return df
    
    def get_rusher_feats(df):
        rusher_feats = df.loc[df.IsRusher == True,['X','Y','S','A','Dis',
                                                   'Orientation','Dir','DisFromYL',
                                                   'PlayerMass','PlayerHeight']]
        rusher_feats = rusher_feats.loc[rusher_feats.index.repeat(22)].reset_index(drop=True)
        rusher_feats = rusher_feats.rename(columns={'X':'RusherX','Y':'RusherY','S':'RusherS',
                                                    'A':'RusherA','Dis':'RusherDis',
                                                    'Orientation':'RusherOrientation',
                                                    'Dir':'RusherDir','DisFromYL':'RusherDisYL',
                                                    'PlayerMass':'RusherMass',
                                                    'PlayerHeight':'RusherHeight'})
        df['RusherX'] = 0
        df['RusherY'] = 0 
        df['RusherS'] = 0 
        df['RusherA'] = 0 
        df['RusherDis'] = 0 
        df['RusherOrientation'] = 0 
        df['RusherDir'] = 0 
        df['RusherDisYL'] = 0 
        df['RusherMass'] = 0 
        df['RusherHeight'] = 0 
    
        df.update(rusher_feats)
        df['RusherWeight'] = df['RusherMass'] * 9.806 # acceleration gravity
        df['ChangeTime'] = df['RusherDis'] / df['RusherS']
        df['RusherForce'] = df['RusherMass'] * df['RusherA']
        df['RusherMomentum'] = df['RusherMass'] * df['RusherS']
        df['RusherKE'] = 0.5 * df['RusherMass'] * (df['RusherS']**2)
        df['RusherWork'] = df['RusherForce'] * df['RusherDis']
        df['RusherPower'] = df['RusherWork'] / df['ChangeTime']
        df['RusherImpulse'] = df['RusherForce'] * df['ChangeTime']
        
        rusher_feats = df.loc[df.IsRusher == True, ['RusherX','RusherY','RusherDir']]
        rusher_feats['rusher_theta'] = 0
        rusher_feats['rusher_adj'] = 0
        rusher_feats['rusher_hyp'] = 0
        rusher_feats['rusher_opp'] = 0
        rusher_feats['rusher_gradient'] = 0
        rusher_feats['rusher_con'] = 0
        rusher_feats['rusher_RusherDir'] = np.round(rusher_feats.RusherDir,2)
        dir_0_to_90 = (df.RusherDir > 0) & (rusher_feats.RusherDir < 90)
        rusher_feats.loc[dir_0_to_90, 'rusher_theta'] = (rusher_feats.loc[dir_0_to_90, 'RusherDir']).apply(math.radians)
        rusher_feats.loc[dir_0_to_90, 'rusher_adj'] = 53.3 - rusher_feats.loc[dir_0_to_90, 'RusherY']
        rusher_feats.loc[dir_0_to_90, 'rusher_con'] = rusher_feats.loc[dir_0_to_90, 'RusherY']
        dir_90_to_180 = (rusher_feats.RusherDir > 90) & (rusher_feats.RusherDir < 180)
        rusher_feats.loc[dir_90_to_180, 'rusher_theta'] = (180 - rusher_feats.loc[dir_90_to_180, 'RusherDir']).apply(math.radians)
        rusher_feats.loc[dir_90_to_180, 'rusher_adj'] = rusher_feats.loc[dir_90_to_180, 'RusherY']
        rusher_feats.loc[dir_90_to_180, 'rusher_con'] = rusher_feats.loc[dir_90_to_180, 'RusherY']
        dir_180_to_270 = (rusher_feats.RusherDir > 180) & (rusher_feats.RusherDir < 270)
        rusher_feats.loc[dir_180_to_270, 'rusher_theta'] = (rusher_feats.loc[dir_180_to_270, 'RusherDir'] - 180).apply(math.radians)
        rusher_feats.loc[dir_180_to_270, 'rusher_adj'] = rusher_feats.loc[dir_180_to_270, 'RusherY']
        rusher_feats.loc[dir_180_to_270, 'rusher_con'] = rusher_feats.loc[dir_180_to_270, 'RusherY']
        dir_270_to_360 = (rusher_feats.RusherDir > 270) & (rusher_feats.RusherDir < 360)
        rusher_feats.loc[dir_270_to_360, 'rusher_theta'] = (360 - rusher_feats.loc[dir_270_to_360, 'RusherDir']).apply(math.radians)
        rusher_feats.loc[dir_270_to_360, 'rusher_adj'] = 53.3 - rusher_feats.loc[dir_270_to_360, 'RusherY']
        rusher_feats.loc[dir_270_to_360, 'rusher_con'] = rusher_feats.loc[dir_270_to_360, 'RusherY']
        rusher_feats['rusher_opp'] = rusher_feats.rusher_adj * np.tan(rusher_feats.rusher_theta)
        rusher_feats['rusher_hyp'] = rusher_feats.rusher_adj / np.cos(rusher_feats.rusher_theta)
        rusher_feats['rusher_gradient'] = rusher_feats.rusher_adj / rusher_feats.rusher_opp
        dir_0_90_180_270_360 = (rusher_feats.RusherDir == 0) | (rusher_feats.RusherDir == 90) | (rusher_feats.RusherDir == 180) | (rusher_feats.RusherDir == 270) | (rusher_feats.RusherDir == 360)
        rusher_feats.loc[dir_0_90_180_270_360, 'rusher_hyp'] = 0
        rusher_feats.loc[dir_0_90_180_270_360, 'rusher_opp'] = 0
        rusher_feats.loc[dir_0_90_180_270_360, 'rusher_gradient'] = 0
        dir_0_180_360 = (rusher_feats.RusherDir == 0) | (rusher_feats.RusherDir == 180) | (rusher_feats.RusherDir == 360)
        rusher_feats.loc[dir_0_180_360, 'rusher_theta'] = 0
        dir_90_270 = (rusher_feats.RusherDir == 90) | (rusher_feats.RusherDir == 270)
        rusher_feats.loc[dir_90_270, 'rusher_theta'] = 90
        dir_90_180_270 = ((rusher_feats.RusherDir == 90) | (rusher_feats.RusherDir == 180) | (rusher_feats.RusherDir == 270))
        rusher_feats.loc[dir_90_180_270, 'rusher_adj'] = rusher_feats.loc[dir_90_180_270, 'RusherY']
        dir_0 = (rusher_feats.RusherDir == 0)
        rusher_feats.loc[dir_0, 'rusher_adj'] = 53.3 - rusher_feats.loc[dir_90_180_270, 'RusherY']
        dir_0 = (rusher_feats.RusherDir == 0)
        rusher_feats.loc[dir_0, 'rusher_hyp'] = 53.3 - rusher_feats.loc[dir_0, 'RusherY']
        rusher_feats.loc[dir_0, 'rusher_opp'] = 0
        rusher_feats.loc[dir_0, 'rusher_gradient'] = 0
        dir_90 = (rusher_feats.RusherDir == 90)
        rusher_feats.loc[dir_90, 'rusher_hyp'] = 120 - rusher_feats.loc[dir_90, 'RusherX']
        rusher_feats.loc[dir_90, 'rusher_opp'] = 0
        rusher_feats.loc[dir_90, 'rusher_gradient'] = 0
        dir_180 = (rusher_feats.RusherDir == 180)
        rusher_feats.loc[dir_180, 'rusher_hyp'] = rusher_feats.loc[dir_180, 'RusherY']
        rusher_feats.loc[dir_180, 'rusher_opp'] = 0
        rusher_feats.loc[dir_180, 'rusher_gradient'] = 0
        dir_270 = (rusher_feats.RusherDir == 270)
        rusher_feats.loc[dir_270, 'rusher_hyp'] = rusher_feats.loc[dir_270, 'RusherX']
        rusher_feats.loc[dir_270, 'rusher_opp'] = 0
        rusher_feats.loc[dir_270, 'rusher_gradient'] = 0
        rusher_feats['gradient_dir'] = 1
        rusher_feats.loc[
            ((rusher_feats.RusherDir > 90) & (rusher_feats.RusherDir < 180)) | ((rusher_feats.RusherDir > 270) & (rusher_feats.RusherDir < 360)), 'gradient_dir'
        ] = -1
        df['rusher_theta'] = 0
        df['rusher_adj'] = 0
        df['rusher_hyp'] = 0
        df['rusher_opp'] = 0
        df['rusher_gradient'] = 0
        df['rusher_con'] = 0
        df['gradient_dir'] = 0
        rusher_feats = rusher_feats.loc[rusher_feats.index.repeat(22)].reset_index(drop=True)
        df.update(rusher_feats)
        
        df['RusherSY'] = df['RusherS'] * np.cos(df.rusher_theta)
        df['RusherSX'] = df['RusherS'] * np.sin(df.rusher_theta) 
        df['RusherForceY'] = df['RusherForce'] * np.cos(df.rusher_theta)
        df['RusherForceX'] = df['RusherForce'] * np.sin(df.rusher_theta)
        df['RusherMomentumY'] = df['RusherMomentum'] * np.cos(df.rusher_theta)
        df['RusherMomentumX'] = df['RusherMomentum'] * np.sin(df.rusher_theta)
        df['RusherWorkY'] = df['RusherWork'] * np.cos(df.rusher_theta)
        df['RusherWorkX'] = df['RusherWork'] * np.sin(df.rusher_theta)
        df['RusherPowerY'] = df['RusherPower'] * np.cos(df.rusher_theta)
        df['RusherPowerX'] = df['RusherPower'] * np.sin(df.rusher_theta)
        df['RusherImpulseY'] = df['RusherImpulse'] * np.cos(df.rusher_theta)
        df['RusherImpulseX'] = df['RusherImpulse'] * np.sin(df.rusher_theta)
        df['fe1'] = pd.Series(np.sqrt(np.absolute(np.square(df.RusherX.values) - np.square(df.RusherY.values))))
        df['fe5'] = np.square(df['RusherS'].values) + 2 * df['RusherA'].values * df['RusherDis'].values  # N
        df['fe8'] = df['RusherS'].values / np.clip(df['fe1'].values, 0.6, None)
        df.drop(['ChangeTime'],axis=1,inplace=True)
        df = df.replace([np.inf, -np.inf], np.nan)
        df = df.fillna(0)
        return df
    
    def get_gap_feats(df):
        df['X_gapmedian'] = 0
        df['X_gapmax'] = 0
        df['Y_gapmedian'] = 0
        df['Y_gapmax'] = 0
        plays = df.loc[df.IsOnOffense == False, ['PlayId','X','Y','RusherX']]
        gaps_df = pd.DataFrame(columns=['PlayId','X_gap','Y_gap'])
        for play in plays['PlayId'].unique():
            RusherX_val = df.loc[df.PlayId == play, 'RusherX'].unique()[0]
            X_vals = plays.loc[plays.PlayId == play, 'X']
            X_vals = X_vals.append(pd.Series([RusherX_val,120]), ignore_index=True).sort_values().reset_index(drop=True)
            X_vals = np.diff(X_vals)
            Y_vals = plays.loc[plays.PlayId == play, 'Y']
            Y_vals = Y_vals.append(pd.Series([0,53.3]), ignore_index=True).sort_values().reset_index(drop=True)
            Y_vals = np.diff(Y_vals)
            gaps_play = pd.DataFrame()
            gaps_play['X_gap'] = X_vals
            gaps_play['Y_gap'] = Y_vals
            gaps_play['PlayId'] = play
            gaps_df = pd.concat([gaps_df, gaps_play], axis=0, ignore_index=True)
        gaps_agg_x = gaps_df.groupby('PlayId').agg({'X_gap':['median','max']}).reset_index()
        gaps_agg_x.columns = [''.join(col) for col in gaps_agg_x.columns.values]
        gaps_agg_x = gaps_agg_x.loc[gaps_agg_x.index.repeat(22)].reset_index(drop=True)
        gaps_agg_y = gaps_df.groupby('PlayId').agg({'Y_gap':['median','max']}).reset_index()
        gaps_agg_y.columns = [''.join(col) for col in gaps_agg_y.columns.values]
        gaps_agg_y = gaps_agg_y.loc[gaps_agg_y.index.repeat(22)].reset_index(drop=True)
        df.update(gaps_agg_x)
        df.update(gaps_agg_y)
        df['XY_gap_area'] = df['X_gapmax'] * df['Y_gapmax']
        #df.drop(['X','Y'], axis=1, inplace=True)
        return df
    
    def combine_features(df): 
        df = map_team_name(df)
        df = get_team_on_offense(df)
        df = map_offense_defense_team(df)
        df = clean_position(df)
        df = get_is_rusher(df)
        df = get_is_offense_winning(df)
        df = create_general_position(df)
        #df = get_general_pos_counts(df)
        df = get_time_features(df)
        df = get_player_age(df)
        df = get_player_weights_bmi(df)
        yardline = update_yardline(df)
        df = update_orientation(df, yardline)         
        df = get_redzone(df)
        #df = get_qb_kneel(df)
        df = get_dis_yardline(df) # use for defender distance only
        #df = get_no_defenders_yl(df)
        df = get_dis_from_yl(df) # absolute distance for both off and def
        df = get_dis_rusher(df)
        df = get_dis_features(df)
        df = get_rusher_feats(df)
        df = get_mech_feats(df)
        #df = get_inside_runs(df)
        #df = get_rusher_dis_mlb_inside(df)
        df = get_yards_by_down(df)
        #df = get_diff_rusher_dir_otation(df)
        df = get_gap_feats(df)
        agg_cols = ['X','Y','A','Dir','DisFromYL','DisRusher','Force','Momentum','Dis',
                    'ForceX','ForceY','MomentumX',
                    #'Orientation','DisRusherNearestYardLine',
                    #'S','PlayerBMI','DisC','DisQB','KE','Work',
                    #'Power','Impulse','SX','WorkX','PowerX','ImpulseX',
                    #'player_adj','player_opp','player_theta',
                    'PlayerAngleToRusher'
                   ]
        
        for agg_col in agg_cols:
            df = get_team_aggs(df, col=agg_col, for_offense=True)
            df = get_team_aggs(df, col=agg_col, for_offense=False)
        del agg_cols
        df.drop([#'MinOffenseDisC', 'MinOffenseDisQB',
            'DisQB','DisC','MinOffenseDisRusher'],axis=1,inplace=True)
        #df = get_gen_position_feats(df, position='OL')
        #df = get_gen_position_feats(df, position='DL')
        #df = get_gen_position_feats(df, position='LB')
        off_less_def_feats = ['X'#,'Y','A','Momentum','Force','ForceX','ForceY','MomentumX','MomentumY',
                             #'SX','SY','Weight','Impulse','KE','Work','Power','WorkX','WorkY','PowerX','PowerY','ImpulseX','ImpulseY'
                             ]
        for feat in off_less_def_feats:
            df = get_off_less_def_feats(df, feat)

        return df
    
    df = combine_features(df)
    df = df.fillna(-999)
    df = df.select_dtypes(exclude=['object'])
    df.drop(['X','Y','S','A','Dis','Orientation','Dir','PlayerAge','PlayerMass','PlayerHeight',
             'PlayerBMI','DisYardLine','DisFromYL',
             'DisRusher','NflIdRusher','IsOnOffense',
             'NflId','JerseyNumber','IsRusher','DisRusherNearestYardLine',
             'Weight','Force','Momentum','KE','Work','Power','Impulse',
             'SX','SY','ForceX','ForceY','MomentumX','MomentumY','WorkX',
             'WorkY','PowerX','PowerY','ImpulseX','ImpulseY',
             'player_adj','player_opp','player_theta','PlayerAngleToRusher'
    ], axis=1, inplace=True)

    df = df.drop_duplicates().reset_index(drop=True)
    return df

In [5]:
train = pd.read_csv('../input/nfl-big-data-bowl-2020/train.csv')
outcomes = train[['GameId','PlayId','Yards']].drop_duplicates()

train_basetable = create_features(train)

X = train_basetable.copy()
X = X.sample(frac=1).reset_index(drop=True)

yards = X.Yards

y = np.zeros((yards.shape[0], 199))
for idx, target in enumerate(list(yards)):
    y[idx][99 + target] = 1
print(train_basetable.shape)
train_basetable.head()

(23171, 175)


Unnamed: 0,GameId,PlayId,Season,Quarter,Down,Distance,HomeScoreBeforePlay,VisitorScoreBeforePlay,DefendersInTheBox,Yards,Week,Temperature,Humidity,IsOffenseAtHome,OffenseLessDefenseScore,OffenseInOwnTerritory,TimeBetweenSnapHandoff,QuarterGameSecs,TotalGameSecsPlayed,HalfGameSecsLeft,YardLine,InOffenseRedzone,InDefenseRedzone,RusherDisMLB,RusherDisQB,RusherDisC,RusherX,RusherY,RusherS,RusherA,RusherDis,RusherOrientation,RusherDir,RusherDisYL,RusherMass,RusherHeight,RusherWeight,RusherForce,RusherMomentum,RusherKE,RusherWork,RusherPower,RusherImpulse,rusher_theta,rusher_adj,rusher_hyp,rusher_opp,rusher_gradient,rusher_con,gradient_dir,RusherSY,RusherSX,RusherForceY,RusherForceX,RusherMomentumY,RusherMomentumX,RusherWorkY,RusherWorkX,RusherPowerY,RusherPowerX,RusherImpulseY,RusherImpulseX,fe1,fe5,fe8,YardsByDownSqrt,X_gapmedian,X_gapmax,Y_gapmedian,Y_gapmax,XY_gap_area,AvgOffenseX,MinOffenseX,MaxOffenseX,StdOffenseX,AvgDefenseX,MinDefenseX,MaxDefenseX,StdDefenseX,AvgOffenseY,MinOffenseY,MaxOffenseY,StdOffenseY,AvgDefenseY,MinDefenseY,MaxDefenseY,StdDefenseY,AvgOffenseA,MinOffenseA,MaxOffenseA,StdOffenseA,AvgDefenseA,MinDefenseA,MaxDefenseA,StdDefenseA,AvgOffenseDir,MinOffenseDir,MaxOffenseDir,StdOffenseDir,AvgDefenseDir,MinDefenseDir,MaxDefenseDir,StdDefenseDir,AvgOffenseDisFromYL,MinOffenseDisFromYL,MaxOffenseDisFromYL,StdOffenseDisFromYL,AvgDefenseDisFromYL,MinDefenseDisFromYL,MaxDefenseDisFromYL,StdDefenseDisFromYL,AvgOffenseDisRusher,MaxOffenseDisRusher,StdOffenseDisRusher,AvgDefenseDisRusher,MinDefenseDisRusher,MaxDefenseDisRusher,StdDefenseDisRusher,AvgOffenseForce,MinOffenseForce,MaxOffenseForce,StdOffenseForce,AvgDefenseForce,MinDefenseForce,MaxDefenseForce,StdDefenseForce,AvgOffenseMomentum,MinOffenseMomentum,MaxOffenseMomentum,StdOffenseMomentum,AvgDefenseMomentum,MinDefenseMomentum,MaxDefenseMomentum,StdDefenseMomentum,AvgOffenseDis,MinOffenseDis,MaxOffenseDis,StdOffenseDis,AvgDefenseDis,MinDefenseDis,MaxDefenseDis,StdDefenseDis,AvgOffenseForceX,MinOffenseForceX,MaxOffenseForceX,StdOffenseForceX,AvgDefenseForceX,MinDefenseForceX,MaxDefenseForceX,StdDefenseForceX,AvgOffenseForceY,MinOffenseForceY,MaxOffenseForceY,StdOffenseForceY,AvgDefenseForceY,MinDefenseForceY,MaxDefenseForceY,StdDefenseForceY,AvgOffenseMomentumX,MinOffenseMomentumX,MaxOffenseMomentumX,StdOffenseMomentumX,AvgDefenseMomentumX,MinDefenseMomentumX,MaxDefenseMomentumX,StdDefenseMomentumX,AvgOffensePlayerAngleToRusher,MinOffensePlayerAngleToRusher,MaxOffensePlayerAngleToRusher,StdOffensePlayerAngleToRusher,AvgDefensePlayerAngleToRusher,MinDefensePlayerAngleToRusher,MaxDefensePlayerAngleToRusher,StdDefensePlayerAngleToRusher,OffLessDefX
0,2017090700,20170907000118,2017,1,3,2,0,0,6.0,8,1,63.0,77.0,True,0,True,1,854,46,1754,45.0,False,False,7.820038,1.449724,4.401931,41.25,30.53,3.63,3.35,0.38,198.02,114.26,3.75,205,70,2010.23,686.75,744.15,1350.63225,260.965,2492.9025,71.891185,1.147379,30.53,74.304299,67.742512,0.450677,30.53,-1,1.491487,3.309436,282.170452,626.103345,305.754848,678.434371,107.224772,237.919271,1024.278741,2272.755142,29.538505,65.542499,27.739531,15.7229,0.13086,2.44949,0.53,56.63,2.495,16.64,942.3232,46.118182,40.24,63.37,4.392297,48.179091,44.94,63.37,5.294079,28.713182,16.64,38.83,6.308114,28.890909,16.64,38.83,7.014714,1.191818,0.32,3.35,0.691807,1.025455,0.32,2.43,0.631338,152.131818,37.41,304.69,69.10972,173.046364,37.41,304.69,81.429631,2.226364,0.06,18.37,3.925483,3.19,0.06,18.37,5.286855,7.680806,22.415872,5.056308,9.752491,4.59331,22.415872,5.327299,286.34,80.64,686.75,153.918266,233.012727,80.64,500.58,133.860076,399.645,54.08,1081.5,253.965203,287.580909,54.08,896.35,226.936523,0.245,0.0,0.59,0.184462,0.184545,0.01,0.51,0.17201,201.117822,11.785973,626.103345,158.975296,122.749438,11.785973,318.109521,87.513997,155.502527,0.099707,481.927869,128.862687,162.540048,0.099707,481.927869,156.041288,248.476002,17.626808,678.434371,175.826729,130.85271,17.626808,253.547599,81.889063,0.981352,0.080194,2.794244,0.645974,0.926503,0.153382,1.831558,0.542587,-45.34
1,2017090700,20170907000139,2017,1,1,10,0,0,6.0,3,1,63.0,77.0,True,0,True,1,832,68,1732,53.0,False,False,7.760058,0.792023,3.787941,48.93,27.16,3.06,2.41,0.34,149.3,47.8,4.07,205,70,2010.23,494.05,627.3,959.769,167.977,1511.793,54.894444,0.834267,26.14,38.91499,28.828403,0.906745,27.16,1,2.055465,2.266862,331.863557,365.994511,421.370326,464.706723,112.833609,124.438134,1015.502485,1119.943203,36.873729,40.666057,40.699869,11.0024,0.075185,3.162278,0.61,48.05,2.395,19.42,933.131,54.214545,48.21,71.95,4.647315,56.561818,53.2,71.95,5.406292,24.827727,9.43,33.88,6.892898,24.892727,9.43,33.88,7.190716,1.567273,0.55,2.67,0.617554,1.592727,0.55,2.67,0.660425,166.613182,12.72,355.85,110.152689,207.282727,12.72,355.85,118.90526,2.5,0.08,18.95,4.073731,3.561818,0.2,18.95,5.406292,8.223049,23.025872,5.76419,10.297028,4.287773,23.025872,5.833217,365.933182,169.4,556.1,114.967154,351.486364,169.4,550.02,113.172284,440.061364,144.0,806.96,178.123475,367.016364,197.1,574.74,107.84506,0.236364,0.01,0.5,0.121088,0.197273,0.01,0.34,0.101793,222.593726,29.725817,548.977479,138.988588,168.186969,29.725817,337.725662,110.610771,241.921652,31.67445,545.034822,144.785376,276.797314,80.143942,545.034822,145.176386,272.296165,21.519303,763.93854,191.964628,179.920402,21.519303,360.523762,121.54326,1.052558,0.035171,5.019156,1.092825,0.963307,0.065063,2.24585,0.676699,-51.64
2,2017090700,20170907000189,2017,1,1,10,0,0,7.0,5,1,63.0,77.0,True,0,False,2,782,118,1682,75.0,False,False,6.69003,1.64639,3.329625,71.34,19.11,5.77,2.42,0.6,219.18,138.04,3.66,205,70,2010.23,496.1,1182.85,3412.52225,297.66,2862.497,51.587522,0.73234,19.11,25.698893,17.182579,1.112173,19.11,-1,4.29064,3.857889,368.905807,331.69823,879.581201,790.867267,221.343484,199.018938,2128.586506,1913.898786,38.361089,34.492017,68.732841,36.1969,0.083948,3.162278,0.88,28.24,2.215,20.41,576.3784,76.618636,70.49,91.76,4.003402,78.389091,75.25,91.76,4.720893,22.026818,9.08,32.89,6.192438,22.602727,9.08,32.89,6.57762,1.755909,0.46,3.63,0.935733,2.092727,1.04,3.11,0.749134,157.578636,74.03,232.83,36.419174,164.201818,74.03,193.63,34.4427,2.435909,0.12,16.76,3.543412,3.389091,0.25,16.76,4.720893,8.097011,20.726285,4.877886,9.903689,4.22167,20.726285,5.07329,410.418636,84.18,708.48,179.398905,477.791818,197.6,708.48,156.435161,845.866364,248.9,1252.4,244.104383,754.953636,248.9,1028.7,208.588768,0.380455,0.12,0.6,0.103853,0.360909,0.12,0.45,0.094282,158.65386,19.798554,429.532477,118.235118,125.771761,19.798554,268.548964,86.000012,356.822781,20.279439,687.255253,186.8525,443.63465,54.366479,687.255253,185.192382,347.884308,50.031481,949.772463,260.539798,186.875741,50.031481,417.742833,124.134572,0.872223,0.05655,4.522552,1.058777,0.574583,0.05655,1.780604,0.539388,-38.95
3,2017090700,20170907000345,2017,1,2,2,0,0,9.0,2,1,63.0,77.0,True,0,False,2,732,168,1632,108.0,False,True,5.704849,0.918096,5.24187,104.47,25.36,4.45,3.2,0.46,173.78,84.56,3.53,210,71,2059.26,672.0,934.5,2079.2625,309.12,2990.4,69.465169,1.47585,27.94,294.715406,293.388014,0.095232,25.36,1,0.421875,4.429957,63.707834,668.973327,88.593706,930.291033,29.305603,307.72773,283.499859,2976.931304,6.585529,69.152299,101.345209,22.7465,0.043909,2.0,0.3,9.2,1.54,19.65,180.78,108.515,103.7,110.8,1.679696,109.310909,107.79,110.8,0.962418,25.766818,18.48,33.65,3.795418,25.733636,18.48,33.65,4.505029,1.087273,0.07,3.2,0.825499,1.293636,0.27,2.62,0.762119,119.335909,5.41,348.87,125.543391,146.282727,5.41,348.87,150.729268,1.304091,0.05,4.3,1.147866,1.349091,0.09,2.8,0.902513,5.297134,9.791231,2.26833,6.309354,4.528002,9.791231,1.834174,273.020909,23.45,672.0,181.174812,318.250909,72.9,641.9,174.044609,474.225,46.2,1136.2,265.785962,389.686364,167.4,695.36,163.680695,0.226818,0.09,0.5,0.116062,0.199091,0.13,0.32,0.064102,167.614868,19.055293,668.973327,161.756125,148.42973,21.086185,493.893927,139.329047,175.555447,13.667418,629.826968,151.754142,249.595102,64.551109,629.826968,171.839863,293.684819,45.696102,930.291033,236.189264,161.67956,46.627198,338.149509,104.032039,1.042222,0.391438,2.241661,0.399535,0.946594,0.466143,1.453324,0.261819,-17.51
4,2017090700,20170907000395,2017,1,1,10,7,0,7.0,7,1,63.0,77.0,False,-7,True,1,728,172,1628,35.0,False,False,7.188992,0.502892,4.92062,29.99,27.12,3.9,2.53,0.44,34.27,157.92,5.01,216,71,2118.096,546.48,842.4,1642.68,240.4512,2131.272,61.654154,0.385369,27.12,29.266401,11.001264,2.465171,27.12,-1,3.613974,1.466013,506.401103,205.422281,780.618301,316.658851,222.816485,90.385803,1974.964302,801.146894,57.132432,23.175847,12.802566,17.4364,0.304626,3.162278,1.14,70.59,3.17,14.12,996.7308,35.774545,29.51,49.41,4.811638,38.413636,33.95,49.41,5.391251,26.824091,10.91,39.18,7.466987,26.586364,12.4,39.18,7.864325,1.870909,0.48,3.26,0.627489,2.121818,1.26,3.26,0.607055,170.335,30.63,341.78,76.029425,198.156364,141.22,322.72,46.7747,3.026364,0.07,14.41,3.766343,3.959091,0.53,14.41,4.964066,8.752055,21.214806,5.81445,11.056456,4.288088,21.214806,5.900009,475.914091,110.4,941.5,179.104215,518.136364,313.9,941.5,202.298265,654.256818,206.4,1165.5,286.321443,580.593636,206.4,1165.5,266.876529,0.288182,0.0,0.49,0.125381,0.245455,0.0,0.37,0.121767,145.861543,14.137867,254.902552,79.935506,156.934523,44.066344,254.902552,62.63771,434.51801,0.448672,930.316238,207.081928,482.890984,264.534322,930.316238,220.725087,198.508763,26.190403,439.587784,120.773502,188.747562,26.190403,287.037345,95.354948,1.583429,0.087147,5.594932,1.255504,1.342073,0.087147,2.705731,0.983452,-58.06


In [6]:
cat = ['InDefenseRedzone'#,'OffenseInOwnTerritory','InOffenseRedzone',
       #'IsInside'#,'IsOffenseAtHome','QBKneel',
      ]

num = list(set(X.columns.values.tolist()) - set(cat))
num.remove('GameId')
num.remove('PlayId')
print(len(cat))
print(len(num))

1
172


In [7]:
features = list(set(['GameId','PlayId',
                     'RusherX',
                     'RusherY',
                     'RusherA',
                     'RusherDir',
                     'RusherDis',
                     'RusherDisYL', 
                     'YardLine',
                     'StdDefenseX', 
                     'StdOffenseX',
                     'StdOffenseY', 
                     'StdDefenseY', 
                     'AvgOffenseA', 
                     'AvgDefenseA', 
                     'StdOffenseDir',
                     'StdDefenseDir',
                     'MaxDefenseDisFromYL',
                     'AvgDefenseDisRusher',
                     'MinDefenseDisRusher',
                     
                     'AvgOffenseForce',
                     'AvgDefenseForce',
                     
                     'OffLessDefX', 
                     'InDefenseRedzone',
                     'AvgOffenseDis',
                     'AvgDefenseDis',
                     'AvgOffenseDisFromYL',
                     'AvgDefenseDisFromYL',
                                          
                     'AvgDefenseForceX',
                     'AvgOffenseForceX',
                     'AvgDefenseForceY',
                     'AvgOffenseForceY',
                     
                     'RusherForce',
                     'RusherMomentum',
                     'RusherKE',
                     'RusherWork', 
                     
                     'RusherForceX',
                     'RusherForceY',
                     'RusherMomentumX',
                     'RusherImpulseX',
                     'RusherSX',
                     'rusher_hyp',
                     
                     'AvgOffensePlayerAngleToRusher',
                     'StdOffensePlayerAngleToRusher',
                     'AvgDefensePlayerAngleToRusher',
                     'StdDefensePlayerAngleToRusher',
                     
                     'X_gapmax',
                     'Y_gapmax'
                    ]))
X = X[features]

In [8]:
scaler = StandardScaler()
num = list(set(features) & set(num)) # update num to only show intersection with features selected
X[num] = scaler.fit_transform(X[num])

In [9]:
import keras
def model_396_1():
    inputs = []
    embeddings = []
    for i in cat:
        input_ = Input(shape=(1,))
        embedding = Embedding(int(np.absolute(X[i]).max() + 1), 10, input_length=1)(input_)
        embedding = Reshape(target_shape=(10,))(embedding)
        inputs.append(input_)
        embeddings.append(embedding)
    input_numeric = Input(shape=(len(num),))
    embedding_numeric = Dense(512, activation='relu')(input_numeric) 
    inputs.append(input_numeric)
    embeddings.append(embedding_numeric)
    
    x = Concatenate()(embeddings)
    #fc1 = keras.layers.Dense(units=450)(x)
    #act1 = keras.layers.PReLU()(fc1)
    #bn1 = keras.layers.BatchNormalization()(act1)
    #dp1 = keras.layers.Dropout(0.55)(bn1)
    #gn1 = keras.layers.GaussianNoise(0.15)(dp1)
    #concat1 = keras.layers.Concatenate()([x, gn1])
    #fc2 = keras.layers.Dense(units=600)(concat1)
    #act2 = keras.layers.PReLU()(fc2)
    #bn2 = keras.layers.BatchNormalization()(act2)
    #dp2 = keras.layers.Dropout(0.55)(bn2)
    #gn2 = keras.layers.GaussianNoise(0.15)(dp2)
    #concat2 = keras.layers.Concatenate()([concat1, gn2])
    #fc3 = keras.layers.Dense(units=400)(concat2)
    #act3 = keras.layers.PReLU()(fc3)
    #bn3 = keras.layers.BatchNormalization()(act3)
    #dp3 = keras.layers.Dropout(0.55)(bn3)
    #gn3 = keras.layers.GaussianNoise(0.15)(dp3)
    #concat3 = keras.layers.Concatenate([concat2, gn3])
    #output = keras.layers.Dense(units=199, activation='softmax')(concat2)
    #model = keras.models.Model(inputs, output)
    x = Dense(256, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = keras.layers.GaussianNoise(0.2)(x)
    output = Dense(199, activation='softmax')(x)
    model = Model(inputs, output)
    return model


n_splits = 5
kf = GroupKFold(n_splits=n_splits)
score = []
for i_369, (tdx, vdx) in enumerate(kf.split(X, y, X['GameId'])):
    print(f'Fold : {i_369}')
    X_train, X_val, y_train, y_val = X.iloc[tdx], X.iloc[vdx], y[tdx], y[vdx]
    X_train = [np.absolute(X_train[i]) for i in cat] + [X_train[num]]
    X_val = [np.absolute(X_val[i]) for i in cat] + [X_val[num]]
    model = model_396_1()
    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=[])
    es = EarlyStopping(monitor='val_CRPS', 
                   mode='min',
                   restore_best_weights=True, 
                   verbose=2, 
                   patience=5)
    es.set_model(model)
    metric = Metric(model, [es], [(X_train,y_train), (X_val,y_val)])
    for i in range(1):
        model.fit(X_train, y_train, verbose=False)
    for i in range(1):
        model.fit(X_train, y_train, batch_size=64, verbose=False)
    for i in range(1):
        model.fit(X_train, y_train, batch_size=128, verbose=False)
    for i in range(1):
        model.fit(X_train, y_train, batch_size=256, verbose=False)
    model.fit(X_train, y_train, callbacks=[metric], epochs=100, batch_size=1024, verbose=False)
    score_ = crps(y_val, model.predict(X_val))
    model.save(f'keras_369_{i_369}.h5')
    print(score_)
    score.append(score_)

Fold : 0
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
tr CRPS 0.0126 val CRPS 0.012537
tr CRPS 0.012573 val CRPS 0.012517
tr CRPS 0.012557 val CRPS 0.012531
tr CRPS 0.012554 val CRPS 0.012521
tr CRPS 0.012519 val CRPS 0.012504
tr CRPS 0.01251 val CRPS 0.012499
tr CRPS 0.01248 val CRPS 0.0125
tr CRPS 0.012483 val CRPS 0.0125
tr CRPS 0.012449 val CRPS 0.012492
tr CRPS 0.012426 val CRPS 0.012485
tr CRPS 0.012416 val CRPS 0.012495
tr CRPS 0.012407 val CRPS 0.01249
tr CRPS 0.012389 val CRPS 0.012505
tr CRPS 0.01236 val CRPS 0.012487
tr CRPS 0.012331 val CRPS 0.012453
tr CRPS 0.012312 val CRPS 0.012477
tr CRPS 0.012291 val CRPS 0.012447
tr CRPS 0.012263 val CRPS 0.012454
tr CRPS 0.01226 val CRPS 0.012473
tr CRPS 0.012221 val CRPS 0.012455
tr CRPS 0.012208 val CRPS 0.012481
tr CRPS 0.012195 val CRPS 0.0124

In [None]:
feature_scores = pd.DataFrame(columns=['feature','score'])
for feature in enumerate(features):
    print('###############################')
    print(f'BEGINNING TRAIN - {feature[1]} Permuted')
    print(f'{np.round(feature[0] / len(num) * 100,1)} %')
    print('###############################')
    X = train_basetable.copy()
    X = X.sample(frac=1).reset_index(drop=True)
    yards = X.Yards
    y = np.zeros((yards.shape[0], 199))
    for idx, target in enumerate(list(yards)):
        y[idx][99 + target] = 1
    cat = ['InDefenseRedzone']
    num = list(set(X.columns.values.tolist()) - set(cat))
    num.remove('GameId')
    num.remove('PlayId')
    
    # Permute feature
    X[feature[1]] = np.random.permutation(X[feature[1]].values)
    
    scaler = StandardScaler()
    num = list(set(features) & set(num)) # update num to only show intersection with features selected
    X[num] = scaler.fit_transform(X[num])
    
    n_splits = 5
    kf = GroupKFold(n_splits=n_splits)
    scores = []
    for i_369, (tdx, vdx) in enumerate(kf.split(X, y, X['GameId'])):
        print(f'Fold : {i_369}')
        X_train, X_val, y_train, y_val = X.iloc[tdx], X.iloc[vdx], y[tdx], y[vdx]
        X_train = [np.absolute(X_train[i]) for i in cat] + [X_train[num]]
        X_val = [np.absolute(X_val[i]) for i in cat] + [X_val[num]]
        model = model_396_1()
        model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=[])
        es = EarlyStopping(monitor='val_CRPS', 
                       mode='min',
                       restore_best_weights=True, 
                       verbose=2, 
                       patience=5)
        es.set_model(model)
        metric = Metric(model, [es], [(X_train,y_train), (X_val,y_val)])
        for i in range(1):
            model.fit(X_train, y_train, verbose=False)
        for i in range(1):
            model.fit(X_train, y_train, batch_size=64, verbose=False)
        for i in range(1):
            model.fit(X_train, y_train, batch_size=128, verbose=False)
        for i in range(1):
            model.fit(X_train, y_train, batch_size=256, verbose=False)
        model.fit(X_train, y_train, callbacks=[metric], epochs=100, batch_size=1024, verbose=False)
        score_ = crps(y_val, model.predict(X_val))
        scores.append(score_)
    cv_score = np.mean(scores)
    feature_scores = feature_scores.append({'feature':feature[1], 'score':cv_score}, ignore_index=True)
    print('')

###############################
BEGINNING TRAIN - StdDefenseDir Permuted
0.0 %
###############################
Fold : 0
tr CRPS 0.012628 val CRPS 0.012541
tr CRPS 0.012597 val CRPS 0.0125
tr CRPS 0.012566 val CRPS 0.012517
tr CRPS 0.012556 val CRPS 0.012497
tr CRPS 0.012525 val CRPS 0.012498
tr CRPS 0.012508 val CRPS 0.012489
tr CRPS 0.012495 val CRPS 0.012485
tr CRPS 0.01248 val CRPS 0.01248
tr CRPS 0.012456 val CRPS 0.012489
tr CRPS 0.012433 val CRPS 0.012467
tr CRPS 0.012421 val CRPS 0.01247
tr CRPS 0.012417 val CRPS 0.012475
tr CRPS 0.012402 val CRPS 0.012468
tr CRPS 0.012365 val CRPS 0.012451
tr CRPS 0.01235 val CRPS 0.012455
tr CRPS 0.012329 val CRPS 0.012453
tr CRPS 0.012328 val CRPS 0.012475
tr CRPS 0.01231 val CRPS 0.012459
tr CRPS 0.012256 val CRPS 0.012452
Restoring model weights from the end of the best epoch
Epoch 00019: early stopping
Fold : 1
tr CRPS 0.012488 val CRPS 0.012943
tr CRPS 0.012457 val CRPS 0.012929
tr CRPS 0.012444 val CRPS 0.012919
tr CRPS 0.012425 val CRPS

tr CRPS 0.012585 val CRPS 0.012351
tr CRPS 0.012569 val CRPS 0.012346
tr CRPS 0.01253 val CRPS 0.012333
tr CRPS 0.012524 val CRPS 0.01233
tr CRPS 0.012502 val CRPS 0.012319
tr CRPS 0.012477 val CRPS 0.012325
tr CRPS 0.012468 val CRPS 0.012313
tr CRPS 0.012451 val CRPS 0.0123
tr CRPS 0.012437 val CRPS 0.012309
tr CRPS 0.012414 val CRPS 0.012304
tr CRPS 0.012398 val CRPS 0.012316
tr CRPS 0.012369 val CRPS 0.012308
tr CRPS 0.012358 val CRPS 0.012292
tr CRPS 0.012335 val CRPS 0.012299
tr CRPS 0.012341 val CRPS 0.012319
tr CRPS 0.012311 val CRPS 0.012299
tr CRPS 0.0123 val CRPS 0.012292
tr CRPS 0.012257 val CRPS 0.012294
Restoring model weights from the end of the best epoch
Epoch 00020: early stopping

###############################
BEGINNING TRAIN - StdOffenseY Permuted
4.4 %
###############################
Fold : 0
tr CRPS 0.012581 val CRPS 0.012534
tr CRPS 0.012558 val CRPS 0.012516
tr CRPS 0.012528 val CRPS 0.012501
tr CRPS 0.012533 val CRPS 0.012515
tr CRPS 0.012497 val CRPS 0.012499