In [1]:
import numpy as np
import pandas as pd

import sklearn.metrics as mtr
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import keras
import keras.backend as K
from keras.models import Sequential
from keras.callbacks import Callback, EarlyStopping
from keras.models import Model
from keras.layers import Input, Dense, Concatenate, Reshape, Dropout, merge, Add
from keras.layers.embeddings import Embedding

from sklearn.model_selection import KFold,GroupKFold

import warnings
import random as rn
import math
import datetime
import time
import tensorflow as tf
from keras.models import load_model
import os
import tqdm

warnings.filterwarnings("ignore")
pd.options.display.max_columns = 200

from kaggle.competitions import nflrush
env = nflrush.make_env()
iter_test = env.iter_test()

import lightgbm as lgb
from keras.callbacks import ModelCheckpoint


Using TensorFlow backend.


In [2]:
# author : nlgn
# Link : https://www.kaggle.com/kingychiu/keras-nn-starter-crps-early-stopping
class Metric(Callback):
    def __init__(self, model, callbacks, data):
        super().__init__()
        self.model = model
        self.callbacks = callbacks
        self.data = data

    def on_train_begin(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_begin(logs)

    def on_train_end(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_end(logs)

    def on_epoch_end(self, batch, logs=None):
        X_train, y_train = self.data[0][0], self.data[0][1]
        y_pred = self.model.predict(X_train)
        y_true = np.clip(np.cumsum(y_train, axis=1), 0, 1)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
        tr_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_train[-1].shape[0])
        tr_s = np.round(tr_s, 6)
        logs['tr_CRPS'] = tr_s

        X_valid, y_valid = self.data[1][0], self.data[1][1]

        y_pred = self.model.predict(X_valid)
        y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
        val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_valid[-1].shape[0])
        val_s = np.round(val_s, 6)
        logs['val_CRPS'] = val_s
        print('tr CRPS', tr_s, 'val CRPS', val_s)

        for callback in self.callbacks:
            callback.on_epoch_end(batch, logs)

In [7]:
def create_features(df):
    
    def new_X(x_coordinate, play_direction):
        if play_direction == 'left':
            return 120.0 - x_coordinate
        else:
            return x_coordinate

    def new_line(rush_team, field_position, yardline):
        if rush_team == field_position:
            # offense starting at X = 0 plus the 10 yard endzone plus the line of scrimmage
            return 10.0 + yardline
        else:
            # half the field plus the yards between midfield and the line of scrimmage
            return 60.0 + (50 - yardline)

    def new_orientation(angle, play_direction):
        if play_direction == 'left':
            new_angle = 360.0 - angle
            if new_angle == 360.0:
                new_angle = 0.0
            return new_angle
        else:
            return angle

    def euclidean_distance(x1,y1,x2,y2):
        x_diff = (x1-x2)**2
        y_diff = (y1-y2)**2
        return np.sqrt(x_diff + y_diff)

    def back_direction(orientation):
        if orientation > 180.0:
            return 1
        else:
            return 0

    def map_team_name(df):
        map_abbr = {'ARI': 'ARZ', 'BAL': 'BLT', 'CLE': 'CLV', 'HOU': 'HST'}
        for abb in df['PossessionTeam'].unique():
            map_abbr[abb] = abb
        df['PossessionTeam'] = df['PossessionTeam'].map(map_abbr)
        for abb in df['HomeTeamAbbr'].unique():
            map_abbr[abb] = abb
        df['HomeTeamAbbr'] = df['HomeTeamAbbr'].map(map_abbr)
        for abb in df['VisitorTeamAbbr'].unique():
            map_abbr[abb] = abb
        df['VisitorTeamAbbr'] = df['VisitorTeamAbbr'].map(map_abbr)
        for abb in df['FieldPosition'].unique():
            map_abbr[abb] = abb
        df['FieldPosition'] = df['FieldPosition'].map(map_abbr)
        return df
            
    def update_yardline(df):
        new_yardline = df[df['NflId'] == df['NflIdRusher']]
        new_yardline['YardLine'] = new_yardline[['PossessionTeam','FieldPosition','YardLine']].apply(lambda x: new_line(x[0],x[1],x[2]), axis=1)
        new_yardline = new_yardline[['GameId','PlayId','YardLine']]
        return new_yardline

    def update_orientation(df, yardline):
        df['X'] = df[['X','PlayDirection']].apply(lambda x: new_X(x[0],x[1]), axis=1)
        df['Orientation'] = df[['Orientation','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)
        df['Dir'] = df[['Dir','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)
        df = df.drop('YardLine', axis=1)
        df = pd.merge(df, yardline, on=['GameId','PlayId'], how='inner')
        return df
    
    def get_team_on_offense(df):
        df['TeamOnOffense'] = "home"
        df.loc[df.PossessionTeam != df.HomeTeamAbbr, 'TeamOnOffense'] = "away"
        df['IsOnOffense'] = df.Team == df.TeamOnOffense 
        return df
    
    def map_offense_defense_team(df):
        df['OffenseTeam'] = df['VisitorTeamAbbr']
        df.loc[df.TeamOnOffense == 'home', 'OffenseTeam'] = df['HomeTeamAbbr']    
        df['DefenseTeam'] = df['VisitorTeamAbbr']
        df.loc[df.TeamOnOffense == 'away', 'DefenseTeam'] = df['HomeTeamAbbr']
        df['IsOffenseAtHome'] = True
        df.loc[df.TeamOnOffense == 'away', 'IsOffenseAtHome'] = False
        return df     
    
    def str_to_float(txt):
        try:
            return float(txt)
        except:
            return -1
        
    def get_player_weights(df):
        df = df.rename(columns={'PlayerWeight':'PlayerMass'})
        return df
    
    def get_is_rusher(df):
        df['IsRusher'] = df.NflId == df.NflIdRusher 
        return df

    def get_redzone(df):
        df['InOffenseRedzone'] = False
        df.loc[df.YardLine <= 30, 'InOffenseRedzone'] = True
        df['InDefenseRedzone'] = False
        df.loc[df.YardLine >= 90, 'InDefenseRedzone'] = True    
        return df
    
    def get_dis_from_yl(df):
        """
        For both off and def
        """
        df['DisFromYL'] = abs(df['YardLine'] - df['X'])
        return df
    
    def get_dis_rusher(df):
        rusher_xy = df.loc[df.IsRusher == True, ['GameId','PlayId','X','Y']].rename(columns={'X':'RusherX','Y':'RusherY'})
        df = df.merge(rusher_xy, on=['GameId','PlayId'])
        df['DisRusher'] = df[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        df.drop(['RusherX','RusherY'], axis=1,inplace=True)
        return df
    
    def get_team_aggs(df, col, agg_type, for_offense=True):
        aggs = ['Avg','Min','Max','Std']
        if for_offense == True:
            team_agg = df[df.IsOnOffense == True][['PlayId'] + [col]]
            team_agg = df[['PlayId'] + [col]]
            team_agg = team_agg.groupby(['PlayId']).agg({col:[agg_type]}).reset_index()
            if agg_type == 'mean':
                col_name = 'AvgOffense' + col
            if agg_type == 'min':
                col_name = 'MinOffense' + col
            if agg_type == 'max':
                col_name = 'MaxOffense' + col
            if agg_type == 'std':
                col_name = 'StdOffense' + col
        if for_offense == False:
            team_agg = df[df.IsOnOffense == False][['PlayId'] + [col]]
            team_agg = team_agg.groupby(['PlayId']).agg({col:[agg_type]}).reset_index()
            if agg_type == 'mean':
                col_name = 'AvgDefense' + col
            if agg_type == 'min':
                col_name = 'MinDefense' + col
            if agg_type == 'max':
                col_name = 'MaxDefense' + col
            if agg_type == 'std':
                col_name = 'StdDefense' + col
        team_agg.drop(['PlayId'], axis=1, inplace=True)
        team_agg_cols = [col_name]
        team_agg.columns = team_agg_cols
        team_agg = team_agg.loc[team_agg.index.repeat(22)].reset_index(drop=True)
        for col in team_agg_cols:
            df[col] = 0
        df.update(team_agg)
        return df
    
    def get_mech_feats(df):
        df['ChangeTime'] = df['Dis'] / df['S']
        df['Force'] = df['PlayerMass'] * df['A']
        df['Momentum'] = df['PlayerMass'] * df['S']
        df['KE'] = 0.5 * df['PlayerMass'] * (df['S']**2)
        df['Work'] = df['Force'] * df['Dis']
        df['Power'] = df['Work'] / df['ChangeTime']
        df['Impulse'] = df['Force'] * df['ChangeTime']
        df['theta'] = 0
        dir_0_to_90 = (df.Dir > 0) & (df.Dir < 90)
        df.loc[dir_0_to_90, 'theta'] = (df.loc[dir_0_to_90, 'Dir']).apply(math.radians)
        dir_90_to_180 = (df.Dir > 90) & (df.Dir < 180)
        df.loc[dir_90_to_180, 'theta'] = (180 - df.loc[dir_90_to_180, 'Dir']).apply(math.radians)
        dir_180_to_270 = (df.Dir > 180) & (df.Dir < 270)
        df.loc[dir_180_to_270, 'theta'] = (df.loc[dir_180_to_270, 'Dir'] - 180).apply(math.radians)
        dir_270_to_360 = (df.Dir > 270) & (df.Dir < 360)
        df.loc[dir_270_to_360, 'theta'] = (360 - df.loc[dir_270_to_360, 'Dir']).apply(math.radians)
        dir_0_180_360 = (df.Dir == 0) | (df.Dir == 180) | (df.Dir == 360)
        df.loc[dir_0_180_360, 'theta'] = 0
        dir_90_270 = (df.Dir == 90) | (df.Dir == 270)
        df.loc[dir_90_270, 'theta'] = 90
        df['SY'] = np.abs(df['S'] * np.cos(df.theta))
        df['SX'] = np.abs(df['S'] * np.sin(df.theta))    
        df['ForceY'] = np.abs(df['Force'] * np.cos(df.theta))
        df['ForceX'] = np.abs(df['Force'] * np.sin(df.theta))
        df['MomentumY'] = np.abs(df['Momentum'] * np.cos(df.theta))
        df['MomentumX'] = np.abs(df['Momentum'] * np.sin(df.theta))
        df['WorkY'] = np.abs(df['Work'] * np.cos(df.theta))
        df['WorkX'] = np.abs(df['Work'] * np.sin(df.theta))
        df['PowerY'] = np.abs(df['Power'] * np.cos(df.theta))
        df['PowerX'] = np.abs(df['Power'] * np.sin(df.theta))
        df['ImpulseY'] = np.abs(df['Impulse'] * np.cos(df.theta))
        df['ImpulseX'] = np.abs(df['Impulse'] * np.sin(df.theta))
        del [dir_0_to_90,dir_90_to_180,dir_180_to_270,dir_270_to_360,dir_0_180_360,dir_90_270]
        df['player_adj'] = np.abs(df.RusherY - df.Y)
        df['player_opp'] = np.abs(df.RusherX - df.X)
        df['player_theta'] = np.degrees((df.player_adj / df.player_opp).apply(math.atan))
        df.loc[(df.Y > df.RusherY) & (df.X > df.RusherX), 'player_theta'] = np.radians(df.player_theta)
        df.loc[(df.Y < df.RusherY) & (df.X > df.RusherX), 'player_theta'] = np.radians(180 - df.player_theta)
        df.loc[(df.Y < df.RusherY) & (df.X < df.RusherX), 'player_theta'] = np.radians(180 + df.player_theta)
        df.loc[(df.Y > df.RusherY) & (df.X < df.RusherX), 'player_theta'] = np.radians(360 - df.player_theta)
        df['PlayerAngleToRusher'] = np.abs(df.rusher_theta - df.player_theta)
        df.drop(['ChangeTime','theta'],axis=1,inplace=True)
        return df
    
    def get_gen_position_feats(df, position): 
        pos_feat = df.loc[df.GeneralPosition == position, ['PlayId','A','S','Dir',
                                                    'Orientation','Dis']]
        pos_feat = pos_feat.rename(columns={'A':position+'A','S':position+'S','Dir':position+'Dir',
                                            'Orientation':position+'Orientation',
                                            'Dis':position+'Dis','PlayerMass':position+'Weight',
                                            'PlayerHeight':position+'Height'})
        pos_feat = pos_feat.groupby(['PlayId']).agg(
            {position+'A':['mean','min','max'], 
             position+'S':['mean','min','max'], 
             position+'Dir':['mean','min','max'], 
             position+'Orientation':['mean','min','max'], 
             position+'Dis':['mean','min','max']}).reset_index()
        pos_feat.columns = [''.join(col) for col in pos_feat.columns.values]
        pos_feat_columns = pos_feat.columns.tolist()
        pos_feat_columns.remove('PlayId')
        pos_feat.drop('PlayId',axis=1,inplace=True)
        pos_feat = pos_feat.loc[pos_feat.index.repeat(22)].reset_index(drop=True)
        for feat in pos_feat_columns:
            df[feat] = 0
        df.update(pos_feat)
        return df

    def get_off_less_def_feats(df, feat):
        off_feat = df.loc[df.IsOnOffense == True, ['PlayId',feat]]
        off_feat = off_feat.groupby(['PlayId']).agg({feat:['sum']}).reset_index()
        off_feat.drop('PlayId', axis=1,inplace=True)
        off_feat.columns = ['Off'+feat]
        def_feat = df.loc[df.IsOnOffense == False, ['PlayId',feat]]
        def_feat = def_feat.groupby(['PlayId']).agg({feat:['sum']}).reset_index()
        def_feat.drop('PlayId', axis=1,inplace=True)
        def_feat.columns = ['Def'+feat]
        off_def_feat = pd.DataFrame(off_feat['Off'+feat] - def_feat['Def'+feat], columns=['OffLessDef'+feat])

        df['OffLessDef'+feat] = 0
        off_def_feat = off_def_feat.loc[off_def_feat.index.repeat(22)].reset_index(drop=True)
        df.update(off_def_feat)
        return df
    
    def get_rusher_feats(df):
        rusher_feats = df.loc[df.IsRusher == True,['X','Y','S','A','Dis',
                                                   'Dir','DisFromYL','PlayerMass']]
        rusher_feats = rusher_feats.loc[rusher_feats.index.repeat(22)].reset_index(drop=True)
        rusher_feats = rusher_feats.rename(columns={'X':'RusherX','Y':'RusherY','S':'RusherS',
                                                    'A':'RusherA','Dis':'RusherDis',
                                                    'Dir':'RusherDir','DisFromYL':'RusherDisYL',
                                                    'PlayerMass':'RusherMass'})
        df['RusherX'] = 0
        df['RusherY'] = 0 
        df['RusherS'] = 0 
        df['RusherA'] = 0 
        df['RusherDis'] = 0 
        df['RusherDir'] = 0 
        df['RusherDisYL'] = 0 
        df['RusherMass'] = 0 

        df.update(rusher_feats)
        df['ChangeTime'] = df['RusherDis'] / df['RusherS']
        df['RusherForce'] = df['RusherMass'] * df['RusherA']
        df['RusherMomentum'] = df['RusherMass'] * df['RusherS']
        df['RusherKE'] = 0.5 * df['RusherMass'] * (df['RusherS']**2)
        df['RusherImpulse'] = df['RusherForce'] * df['ChangeTime']
        df['RusherWork'] = df['RusherForce'] * df['RusherDis']

        rusher_feats = df.loc[df.IsRusher == True, ['RusherX','RusherY','RusherDir']]
        rusher_feats['rusher_theta'] = 0
        rusher_feats['rusher_adj'] = 0
        rusher_feats['rusher_hyp'] = 0
        rusher_feats['rusher_opp'] = 0
        rusher_feats['rusher_gradient'] = 0
        rusher_feats['rusher_con'] = 0
        rusher_feats['rusher_RusherDir'] = np.round(rusher_feats.RusherDir,2)
        dir_0_to_90 = (df.RusherDir > 0) & (rusher_feats.RusherDir < 90)
        rusher_feats.loc[dir_0_to_90, 'rusher_theta'] = (rusher_feats.loc[dir_0_to_90, 'RusherDir']).apply(math.radians)
        rusher_feats.loc[dir_0_to_90, 'rusher_adj'] = 53.3 - rusher_feats.loc[dir_0_to_90, 'RusherY']
        rusher_feats.loc[dir_0_to_90, 'rusher_con'] = rusher_feats.loc[dir_0_to_90, 'RusherY']
        dir_90_to_180 = (rusher_feats.RusherDir > 90) & (rusher_feats.RusherDir < 180)
        rusher_feats.loc[dir_90_to_180, 'rusher_theta'] = (180 - rusher_feats.loc[dir_90_to_180, 'RusherDir']).apply(math.radians)
        rusher_feats.loc[dir_90_to_180, 'rusher_adj'] = rusher_feats.loc[dir_90_to_180, 'RusherY']
        rusher_feats.loc[dir_90_to_180, 'rusher_con'] = rusher_feats.loc[dir_90_to_180, 'RusherY']
        dir_180_to_270 = (rusher_feats.RusherDir > 180) & (rusher_feats.RusherDir < 270)
        rusher_feats.loc[dir_180_to_270, 'rusher_theta'] = (rusher_feats.loc[dir_180_to_270, 'RusherDir'] - 180).apply(math.radians)
        rusher_feats.loc[dir_180_to_270, 'rusher_adj'] = rusher_feats.loc[dir_180_to_270, 'RusherY']
        rusher_feats.loc[dir_180_to_270, 'rusher_con'] = rusher_feats.loc[dir_180_to_270, 'RusherY']
        dir_270_to_360 = (rusher_feats.RusherDir > 270) & (rusher_feats.RusherDir < 360)
        rusher_feats.loc[dir_270_to_360, 'rusher_theta'] = (360 - rusher_feats.loc[dir_270_to_360, 'RusherDir']).apply(math.radians)
        rusher_feats.loc[dir_270_to_360, 'rusher_adj'] = 53.3 - rusher_feats.loc[dir_270_to_360, 'RusherY']
        rusher_feats.loc[dir_270_to_360, 'rusher_con'] = rusher_feats.loc[dir_270_to_360, 'RusherY']
        rusher_feats['rusher_opp'] = rusher_feats.rusher_adj * np.tan(rusher_feats.rusher_theta)
        rusher_feats['rusher_hyp'] = rusher_feats.rusher_adj / np.cos(rusher_feats.rusher_theta)
        rusher_feats['rusher_gradient'] = rusher_feats.rusher_adj / rusher_feats.rusher_opp
        dir_0_90_180_270_360 = (rusher_feats.RusherDir == 0) | (rusher_feats.RusherDir == 90) | (rusher_feats.RusherDir == 180) | (rusher_feats.RusherDir == 270) | (rusher_feats.RusherDir == 360)
        rusher_feats.loc[dir_0_90_180_270_360, 'rusher_hyp'] = 0
        rusher_feats.loc[dir_0_90_180_270_360, 'rusher_opp'] = 0
        rusher_feats.loc[dir_0_90_180_270_360, 'rusher_gradient'] = 0
        dir_0_180_360 = (rusher_feats.RusherDir == 0) | (rusher_feats.RusherDir == 180) | (rusher_feats.RusherDir == 360)
        rusher_feats.loc[dir_0_180_360, 'rusher_theta'] = 0
        dir_90_270 = (rusher_feats.RusherDir == 90) | (rusher_feats.RusherDir == 270)
        rusher_feats.loc[dir_90_270, 'rusher_theta'] = 90
        dir_90_180_270 = ((rusher_feats.RusherDir == 90) | (rusher_feats.RusherDir == 180) | (rusher_feats.RusherDir == 270))
        rusher_feats.loc[dir_90_180_270, 'rusher_adj'] = rusher_feats.loc[dir_90_180_270, 'RusherY']
        dir_0 = (rusher_feats.RusherDir == 0)
        rusher_feats.loc[dir_0, 'rusher_adj'] = 53.3 - rusher_feats.loc[dir_90_180_270, 'RusherY']
        dir_0 = (rusher_feats.RusherDir == 0)
        rusher_feats.loc[dir_0, 'rusher_hyp'] = 53.3 - rusher_feats.loc[dir_0, 'RusherY']
        rusher_feats.loc[dir_0, 'rusher_opp'] = 0
        rusher_feats.loc[dir_0, 'rusher_gradient'] = 0
        dir_90 = (rusher_feats.RusherDir == 90)
        rusher_feats.loc[dir_90, 'rusher_hyp'] = 120 - rusher_feats.loc[dir_90, 'RusherX']
        rusher_feats.loc[dir_90, 'rusher_opp'] = 0
        rusher_feats.loc[dir_90, 'rusher_gradient'] = 0
        dir_180 = (rusher_feats.RusherDir == 180)
        rusher_feats.loc[dir_180, 'rusher_hyp'] = rusher_feats.loc[dir_180, 'RusherY']
        rusher_feats.loc[dir_180, 'rusher_opp'] = 0
        rusher_feats.loc[dir_180, 'rusher_gradient'] = 0
        dir_270 = (rusher_feats.RusherDir == 270)
        rusher_feats.loc[dir_270, 'rusher_hyp'] = rusher_feats.loc[dir_270, 'RusherX']
        rusher_feats.loc[dir_270, 'rusher_opp'] = 0
        rusher_feats.loc[dir_270, 'rusher_gradient'] = 0
        rusher_feats['gradient_dir'] = 1
        rusher_feats.loc[
            ((rusher_feats.RusherDir > 90) & (rusher_feats.RusherDir < 180)) | ((rusher_feats.RusherDir > 270) & (rusher_feats.RusherDir < 360)), 'gradient_dir'
        ] = -1
        df['rusher_theta'] = 0
        df['rusher_adj'] = 0
        df['rusher_hyp'] = 0
        df['rusher_opp'] = 0
        df['rusher_gradient'] = 0
        df['rusher_con'] = 0
        df['gradient_dir'] = 0
        rusher_feats = rusher_feats.loc[rusher_feats.index.repeat(22)].reset_index(drop=True)
        df.update(rusher_feats)
        
        df['RusherSX'] = df['RusherS'] * np.sin(df.rusher_theta) 
        df['RusherForceY'] = df['RusherForce'] * np.cos(df.rusher_theta)
        df['RusherForceX'] = df['RusherForce'] * np.sin(df.rusher_theta)
        df['RusherMomentumX'] = df['RusherMomentum'] * np.sin(df.rusher_theta)
        df['RusherImpulseX'] = df['RusherImpulse'] * np.sin(df.rusher_theta)
        df.drop(['ChangeTime'],axis=1,inplace=True)
        df = df.replace([np.inf, -np.inf], np.nan)
        return df
    
    def get_gap_feats(df):
        df['Y_gapmax'] = 0
        plays = df.loc[df.IsOnOffense == False, ['PlayId','Y']]
        gaps_df = pd.DataFrame(columns=['PlayId','Y_gap'])
        for play in plays['PlayId'].unique():
            Y_vals = plays.loc[plays.PlayId == play, 'Y']
            Y_vals = Y_vals.append(pd.Series([0,53.3]), ignore_index=True).sort_values().reset_index(drop=True)
            Y_vals = np.diff(Y_vals)
            gaps_play = pd.DataFrame()
            gaps_play['Y_gap'] = Y_vals
            gaps_play['PlayId'] = play
            gaps_df = pd.concat([gaps_df, gaps_play], axis=0, ignore_index=True)        
        
        gaps_agg_y = gaps_df.groupby('PlayId').agg({'Y_gap':['max']}).reset_index()
        gaps_agg_y.columns = [''.join(col) for col in gaps_agg_y.columns.values]
        gaps_agg_y = gaps_agg_y.loc[gaps_agg_y.index.repeat(22)].reset_index(drop=True)
        df.update(gaps_agg_y)
        return df
    
    def get_err_feats(df):
        df['AdjustedX'] = df['X'] - df['RusherX']
        df['Y_err'] = np.abs(df['Y'] - ((df['gradient_dir'] * df['rusher_gradient'] * df['X']) + df['rusher_con']))
        df['X_err'] = np.abs(df['X'] - ((df['Y'] - df['rusher_con']) / df['gradient_dir'] * df['rusher_gradient']))
        df.loc[(df.RusherDir > 0)   & (df.RusherDir < 180) & (df.X < df.RusherX), ['Y_err','X_err']] = np.nan
        df.loc[(df.RusherDir > 180) & (df.RusherDir < 360) & (df.X > df.RusherX), ['Y_err','X_err']] = np.nan
        err_df = df[['PlayId','Y_err','X_err']]#.groupby('PlayId').transform(lambda x: x.fillna(x.max()))
        df.update(err_df)
        df.drop('AdjustedX',axis=1, inplace=True)
        return df

    def combine_features(df): 
        df = map_team_name(df)
        df = get_team_on_offense(df)
        df = map_offense_defense_team(df)
        df = get_is_rusher(df)
        df = get_player_weights(df)
        yardline = update_yardline(df)
        df = update_orientation(df, yardline)         
        df = get_redzone(df)
        df = get_dis_from_yl(df) # absolute distance for both off and def
        df = get_dis_rusher(df)
        df = get_rusher_feats(df)
        df = get_mech_feats(df)
        df = get_gap_feats(df)
        df = get_err_feats(df)
        df = get_team_aggs(df, col='Y', for_offense=True, agg_type='std')
        df = get_team_aggs(df, col='Y', for_offense=False, agg_type='std')
        df = get_team_aggs(df, col='X', for_offense=True, agg_type='std')
        df = get_team_aggs(df, col='X', for_offense=False, agg_type='std')
        df = get_team_aggs(df, col='A', for_offense=True, agg_type='mean')
        df = get_team_aggs(df, col='A', for_offense=False, agg_type='mean')
        df = get_team_aggs(df, col='Dir', for_offense=True, agg_type='std')
        df = get_team_aggs(df, col='Dir', for_offense=False, agg_type='std')
        df = get_team_aggs(df, col='DisRusher', for_offense=False, agg_type='mean')
        df = get_team_aggs(df, col='DisRusher', for_offense=False, agg_type='min')
        df = get_team_aggs(df, col='Force', for_offense=True, agg_type='mean')
        df = get_team_aggs(df, col='Force', for_offense=False, agg_type='mean')
        df = get_team_aggs(df, col='Dis', for_offense=True, agg_type='mean')
        df = get_team_aggs(df, col='Dis', for_offense=False, agg_type='mean')
        df = get_team_aggs(df, col='DisFromYL', for_offense=True, agg_type='mean')
        df = get_team_aggs(df, col='DisFromYL', for_offense=False, agg_type='mean')
        df = get_team_aggs(df, col='DisFromYL', for_offense=False, agg_type='max')
        df = get_team_aggs(df, col='ForceX', for_offense=False, agg_type='mean')
        df = get_team_aggs(df, col='ForceX', for_offense=True, agg_type='mean')
        df = get_team_aggs(df, col='ForceY', for_offense=True, agg_type='mean')
        df = get_team_aggs(df, col='PlayerAngleToRusher', for_offense=False, agg_type='mean')
        df = get_team_aggs(df, col='PlayerAngleToRusher', for_offense=False, agg_type='std')
        df = get_team_aggs(df, col='PlayerAngleToRusher', for_offense=True, agg_type='mean')
        df = get_team_aggs(df, col='Y_err', for_offense=False, agg_type='mean')
        df = get_team_aggs(df, col='X_err', for_offense=False, agg_type='mean')
        df = get_team_aggs(df, col='Y_err', for_offense=False, agg_type='min')
        df = get_team_aggs(df, col='X_err', for_offense=False, agg_type='min')
        df = get_team_aggs(df, col='Y_err', for_offense=False, agg_type='std')
        df = get_team_aggs(df, col='X_err', for_offense=False, agg_type='std')        
        df = get_off_less_def_feats(df, 'X')
        return df
    
    df = combine_features(df)
    df.drop(['X','Y','S','A','Dis','Orientation','Dir','PlayerMass','DisFromYL','DisRusher',
             'NflIdRusher','IsOnOffense','NflId','JerseyNumber','IsRusher','Force',
             'Momentum','KE','Work','Power','Impulse','SX','SY','ForceX','ForceY',
             'MomentumX','MomentumY','WorkX','WorkY','PowerX','PowerY','ImpulseX',
             'ImpulseY','player_adj','player_opp','player_theta','PlayerAngleToRusher',
             'Y_err','X_err','rusher_theta','rusher_adj','rusher_hyp','rusher_opp',
             'rusher_gradient','rusher_con','gradient_dir'], axis=1, inplace=True)
    
    df = df.select_dtypes(exclude=['object'])
    df = df.fillna(df.mean())

    df = df.drop_duplicates().reset_index(drop=True)
    return df

In [11]:
train = pd.read_csv('../input/nfl-big-data-bowl-2020/train.csv')
outcomes = train[['GameId','PlayId','Yards']].drop_duplicates()

train_basetable = create_features(train)
X = train_basetable.copy()
X = X.sample(frac=1).reset_index(drop=True)

yards = X.Yards
y = np.zeros((yards.shape[0], 199))
for idx, target in enumerate(list(yards)):
    y[idx][99 + target] = 1
    
print(train_basetable.shape)
train_basetable.head()

(23171, 66)


Unnamed: 0,GameId,PlayId,Season,Quarter,Down,Distance,HomeScoreBeforePlay,VisitorScoreBeforePlay,DefendersInTheBox,Yards,Week,Temperature,Humidity,IsOffenseAtHome,YardLine,InOffenseRedzone,InDefenseRedzone,RusherX,RusherY,RusherS,RusherA,RusherDis,RusherDir,RusherDisYL,RusherMass,RusherForce,RusherMomentum,RusherKE,RusherImpulse,RusherWork,RusherSX,RusherForceY,RusherForceX,RusherMomentumX,RusherImpulseX,Y_gapmax,StdOffenseY,StdDefenseY,StdOffenseX,StdDefenseX,AvgOffenseA,AvgDefenseA,StdOffenseDir,StdDefenseDir,AvgDefenseDisRusher,MinDefenseDisRusher,AvgOffenseForce,AvgDefenseForce,AvgOffenseDis,AvgDefenseDis,AvgOffenseDisFromYL,AvgDefenseDisFromYL,MaxDefenseDisFromYL,AvgDefenseForceX,AvgOffenseForceX,AvgOffenseForceY,AvgDefensePlayerAngleToRusher,StdDefensePlayerAngleToRusher,AvgOffensePlayerAngleToRusher,AvgDefenseY_err,AvgDefenseX_err,MinDefenseY_err,MinDefenseX_err,StdDefenseY_err,StdDefenseX_err,OffLessDefX
0,2017090700,20170907000118,2017,1,3,2,0,0,6.0,8,1,63.0,77.0,True,45.0,False,False,41.25,30.53,3.63,3.35,0.38,114.26,3.75,205,686.75,744.15,1350.63225,71.891185,260.965,3.309436,282.170452,626.103345,678.434371,65.542499,16.64,6.308114,7.014714,4.392297,5.294079,1.191818,1.025455,69.10972,81.429631,9.752491,4.59331,286.34,233.012727,0.245,0.184545,2.226364,3.19,18.37,122.749438,201.117822,155.502527,0.926503,0.542587,0.981352,20.074123,47.44039,6.791573,39.630095,7.394532,6.148318,-45.34
1,2017090700,20170907000139,2017,1,1,10,0,0,6.0,3,1,63.0,77.0,True,53.0,False,False,48.93,27.16,3.06,2.41,0.34,47.8,4.07,205,494.05,627.3,959.769,54.894444,167.977,2.266862,331.863557,365.994511,464.706723,40.666057,19.42,6.892898,7.190716,4.647315,5.406292,1.567273,1.592727,110.152689,118.90526,10.297028,4.287773,365.933182,351.486364,0.236364,0.197273,2.5,3.561818,18.95,168.186969,222.593726,241.921652,0.963307,0.676699,1.052558,53.554398,58.617656,42.489031,48.176676,8.315906,8.072008,-51.64
2,2017090700,20170907000189,2017,1,1,10,0,0,7.0,5,1,63.0,77.0,True,75.0,False,False,71.34,19.11,5.77,2.42,0.6,138.04,3.66,205,496.1,1182.85,3412.52225,51.587522,297.66,3.857889,368.905807,331.69823,790.867267,34.492017,20.41,6.192438,6.57762,4.003402,4.720893,1.755909,2.092727,36.419174,34.4427,9.903689,4.22167,410.418636,477.791818,0.380455,0.360909,2.435909,3.389091,16.76,125.771761,158.65386,356.822781,0.574583,0.539388,0.872223,90.674956,82.273608,76.029945,66.224905,8.791224,9.069508,-38.95
3,2017090700,20170907000345,2017,1,2,2,0,0,9.0,2,1,63.0,77.0,True,108.0,False,True,104.47,25.36,4.45,3.2,0.46,84.56,3.53,210,672.0,934.5,2079.2625,69.465169,309.12,4.429957,63.707834,668.973327,930.291033,69.152299,19.65,3.795418,4.505029,1.679696,0.962418,1.087273,1.293636,125.543391,150.729268,6.309354,4.528002,273.020909,318.250909,0.226818,0.199091,1.304091,1.349091,2.8,148.42973,167.614868,175.555447,0.946594,0.261819,1.042222,10.036287,109.275327,2.155073,108.105219,4.486292,0.966148,-17.51
4,2017090700,20170907000395,2017,1,1,10,7,0,7.0,7,1,63.0,77.0,False,35.0,False,False,29.99,27.12,3.9,2.53,0.44,157.92,5.01,216,546.48,842.4,1642.68,61.654154,240.4512,1.466013,506.401103,205.422281,316.658851,23.175847,14.12,7.466987,7.864325,4.811638,5.391251,1.870909,2.121818,76.029425,46.7747,11.056456,4.288088,475.914091,518.136364,0.288182,0.245455,3.026364,3.959091,14.41,156.934523,145.861543,434.51801,1.342073,0.983452,1.583429,94.162556,37.098131,77.692564,1.322679,15.17389,19.916916,-58.06


In [12]:
cat = ['InDefenseRedzone']# find a way to remove
num = list(set(X.columns.values.tolist()) - set(cat))
num.remove('GameId')
num.remove('PlayId')
print(len(cat))
print(len(num))

1
63


In [13]:
features = list(set(['GameId', 
                     'RusherX',
                     'RusherDir',
                     'YardLine',
                     'StdDefenseX', 
                     'StdOffenseX',
                     'StdDefenseY', 
                     'MaxDefenseDisFromYL',
                     'AvgDefenseForce',
                     'RusherWork', 
                     'RusherMomentumX',
                     'AvgOffensePlayerAngleToRusher',

                     'RusherA',
                     'RusherDis',
                     'RusherDisYL', 
                     'StdOffenseY', 
                     'AvgOffenseA', 
                     'AvgDefenseA', 
                     'StdOffenseDir',
                     'StdDefenseDir',
                     'AvgDefenseDisRusher',
                     'MinDefenseDisRusher',
                     'AvgOffenseForce',
                     'OffLessDefX', 
                     'InDefenseRedzone', # 
                     'AvgOffenseDis',
                     'AvgDefenseDis',
                     'AvgOffenseDisFromYL',
                     'AvgDefenseDisFromYL',   
                     'AvgDefenseForceX',
                     'AvgOffenseForceX',
                     'AvgOffenseForceY',
                     'RusherForce',
                     'RusherMomentum',
                     'RusherKE',
                     'RusherForceX',
                     'RusherForceY',
                     'RusherImpulseX',
                     'RusherSX',
                     'AvgDefensePlayerAngleToRusher',
                     'StdDefensePlayerAngleToRusher',
                     'Y_gapmax'
                    ]))
X = X[features]

In [14]:
scaler = StandardScaler()
num = list(set(features) & set(num)) # update num to only show intersection with features selected
X[num] = np.array(scaler.fit_transform(X[num]))

In [None]:
## MODEL 1 ##

class CRPSCallback(Callback):
    
    def __init__(self,validation, predict_batch_size=20, include_on_batch=False):
        super(CRPSCallback, self).__init__()
        self.validation = validation
        self.predict_batch_size = predict_batch_size
        self.include_on_batch = include_on_batch
        
        print('validation shape',len(self.validation))

    def on_batch_begin(self, batch, logs={}):
        pass

    def on_train_begin(self, logs={}):
        if not ('CRPS_score_val' in self.params['metrics']):
            self.params['metrics'].append('CRPS_score_val')

    def on_batch_end(self, batch, logs={}):
        if (self.include_on_batch):
            logs['CRPS_score_val'] = float('-inf')

    def on_epoch_end(self, epoch, logs={}):
        logs['CRPS_score_val'] = float('-inf')
            
        if (self.validation):
            X_valid, y_valid = self.validation[0], self.validation[1]
            y_pred = self.model.predict(X_valid)
            y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
            y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
            val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_valid.shape[0])
            val_s = np.round(val_s, 6)
            logs['CRPS_score_val'] = val_s
            
            
def crps_score(y_prediction, y_valid, shape=X.shape[0]):
    y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
    y_pred = np.clip(np.cumsum(y_prediction, axis=1), 0, 1)
    val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * shape)
    crps = np.round(val_s, 6)
    
    return crps


def get_nn(x_tr, y_tr, x_val, y_val, shape):
        
    K.clear_session()
    inp = Input(shape = (x_tr.shape[1],))
    x = Dense(512, input_dim=X.shape[1], activation='relu')(inp)
    #x = Dropout(0.5)(x)
    #x = keras.layers.BatchNormalization()(x)
    x = Dense(256, activation='relu')(x)
    #x = Dropout(0.5)(x)
    #x = keras.layers.BatchNormalization()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = keras.layers.GaussianNoise(0.15)(x)
    #x = keras.layers.BatchNormalization()(x)
    out = Dense(199, activation='softmax')(x)
    model = Model(inp,out)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[])
    es = EarlyStopping(monitor='CRPS_score_val', 
                       mode='min',
                       restore_best_weights=True, 
                       verbose=1, 
                       patience=10)
    mc = ModelCheckpoint('best_model.h5',monitor='CRPS_score_val',mode='min',
                                   save_best_only=True, verbose=1, save_weights_only=True)
    bsz = 1024
    steps = x_tr.shape[0]/bsz
    model.fit(x_tr, y_tr,callbacks=[CRPSCallback(validation = (x_val,y_val)),es,mc], epochs=100, batch_size=bsz, verbose=1)
    model.load_weights("best_model.h5")
    y_pred = model.predict(x_val)
    y_valid = y_val
    crps = crps_score(y_pred, y_valid, shape=shape)
    return model,crps



metric = "multi_logloss"
param = {'num_leaves': 50,
         'min_data_in_leaf': 30,
         'objective':'multiclass',
         'num_class': 199,
         'max_depth': 6, # -1
         'learning_rate': 0.01,
         "min_child_samples": 20,
         "boosting": "gbdt",
         "feature_fraction": 0.4, #0.7
         "bagging_freq": 1,
         "bagging_fraction": 0.9,
         "bagging_seed": 11,
         "metric": metric,
         "lambda_l1": 0.1,
         "verbosity": -1,
         'n_jobs': -1,
         "seed":1234}


def get_lgbm(x_tr, y_tr, x_val, y_val, shape):
    y_valid = y_val
    y_tr = np.argmax(y_tr, axis=1)
    y_val = np.argmax(y_val, axis=1)
    trn_data = lgb.Dataset(x_tr, label=y_tr, categorical_feature=cat)
    val_data = lgb.Dataset(x_val, label=y_val, categorical_feature=cat)
    model = lgb.train(param, trn_data, 10000, valid_sets = [val_data], verbose_eval = 100, early_stopping_rounds = 200)
    
    y_pred = model.predict(x_val, num_iteration=model.best_iteration)
    crps = crps_score(y_pred, y_valid, shape=shape)
    
    return model, crps


loop = 2
fold = 5

oof_nn = np.zeros([loop, y.shape[0], y.shape[1]])
oof_lgbm = np.zeros([loop, y.shape[0], y.shape[1]])
oof_rf = np.zeros([loop, y.shape[0], y.shape[1]])

models_nn = []
crps_csv_nn = []

models_lgbm = []
crps_csv_lgbm = []

models_rf = []
crps_csv_rf = []

feature_importance = np.zeros([loop, fold, X.shape[1]])

s_time = time.time()

for k in range(loop):
    kfold = KFold(fold, random_state = 42 + k, shuffle = True)
    for k_fold, (tr_inds, val_inds) in enumerate(kfold.split(yards)):
        print("-----------")
        print(f'Loop {k+1}/{loop}' + f' Fold {k_fold+1}/{fold}')
        print("-----------")
        tr_x, tr_y = X.loc[tr_inds], y[tr_inds]
        val_x, val_y = X.loc[val_inds], y[val_inds]
        
        # Train NN
        nn, crps_nn = get_nn(tr_x, tr_y, val_x, val_y, shape=val_x.shape[0])
        models_nn.append(nn)
        print("the %d fold crps (NN) is %f"%((k_fold+1), crps_nn))
        crps_csv_nn.append(crps_nn)
        
        # Train LGBM
        lgbm, crps_lgbm = get_lgbm(tr_x, tr_y, val_x, val_y, shape=val_x.shape[0])
        models_lgbm.append(lgbm)
        print("the %d fold crps (LGBM) is %f"%((k_fold+1), crps_lgbm))
        crps_csv_lgbm.append(crps_lgbm)
        

        #Predict OOF
        oof_nn[k, val_inds, :] = nn.predict(val_x)
        oof_lgbm[k, val_inds, :] = lgbm.predict(val_x, num_iteration=lgbm.best_iteration)


a = lgbm.predict(val_x, num_iteration=lgbm.best_iteration)

crps_oof_nn = []
crps_oof_lgbm = []
crps_oof_rf = []

for k in range(loop):
    crps_oof_nn.append(crps_score(oof_nn[k,...], y))
    crps_oof_lgbm.append(crps_score(oof_lgbm[k,...], y))

print("mean crps (NN) is %f"%np.mean(crps_csv_nn))
print("mean crps (LGBM) is %f"%np.mean(crps_csv_lgbm))

print("mean OOF crps (NN) is %f"%np.mean(crps_oof_nn))
print("mean OOF crps (LGBM) is %f"%np.mean(crps_oof_lgbm))

def weight_opt(oof_nn, oof_rf, y_true):
    weight_nn = np.inf
    best_crps = np.inf
    
    for i in np.arange(0, 1.01, 0.05):
        crps_blend = np.zeros(oof_nn.shape[0])
        for k in range(oof_nn.shape[0]):
            crps_blend[k] = crps_score(i * oof_nn[k,...] + (1-i) * oof_rf[k,...], y_true)
        if np.mean(crps_blend) < best_crps:
            best_crps = np.mean(crps_blend)
            weight_nn = round(i, 2)
            
        print(str(round(i, 2)) + ' : mean crps (Blend) is ', round(np.mean(crps_blend), 6))
        
    print('-'*36)
    print('Best weight for NN: ', weight_nn)
    print('Best weight for LGBM: ', round(1-weight_nn, 2))
    print('Best mean crps (Blend): ', round(best_crps, 6))
    
    return weight_nn, round(1-weight_nn, 2)

weight_nn, weight_lgbm = weight_opt(oof_nn, oof_lgbm, y)

-----------
Loop 1/2 Fold 1/5
-----------
validation shape 2
Epoch 1/100

Epoch 00001: CRPS_score_val improved from inf to 0.08428, saving model to best_model.h5
Epoch 2/100

Epoch 00002: CRPS_score_val improved from 0.08428 to 0.08374, saving model to best_model.h5
Epoch 3/100

Epoch 00003: CRPS_score_val improved from 0.08374 to 0.08316, saving model to best_model.h5
Epoch 4/100

Epoch 00004: CRPS_score_val improved from 0.08316 to 0.08257, saving model to best_model.h5
Epoch 5/100

Epoch 00005: CRPS_score_val improved from 0.08257 to 0.08198, saving model to best_model.h5
Epoch 6/100

Epoch 00006: CRPS_score_val improved from 0.08198 to 0.08139, saving model to best_model.h5
Epoch 7/100

Epoch 00007: CRPS_score_val improved from 0.08139 to 0.08080, saving model to best_model.h5
Epoch 8/100

Epoch 00008: CRPS_score_val improved from 0.08080 to 0.08020, saving model to best_model.h5
Epoch 9/100

Epoch 00009: CRPS_score_val improved from 0.08020 to 0.07959, saving model to best_model.h


Epoch 00046: CRPS_score_val improved from 0.05535 to 0.05467, saving model to best_model.h5
Epoch 47/100

Epoch 00047: CRPS_score_val improved from 0.05467 to 0.05399, saving model to best_model.h5
Epoch 48/100

Epoch 00048: CRPS_score_val improved from 0.05399 to 0.05332, saving model to best_model.h5
Epoch 49/100

Epoch 00049: CRPS_score_val improved from 0.05332 to 0.05264, saving model to best_model.h5
Epoch 50/100

Epoch 00050: CRPS_score_val improved from 0.05264 to 0.05197, saving model to best_model.h5
Epoch 51/100

Epoch 00051: CRPS_score_val improved from 0.05197 to 0.05131, saving model to best_model.h5
Epoch 52/100

Epoch 00052: CRPS_score_val improved from 0.05131 to 0.05065, saving model to best_model.h5
Epoch 53/100

Epoch 00053: CRPS_score_val improved from 0.05065 to 0.05000, saving model to best_model.h5
Epoch 54/100

Epoch 00054: CRPS_score_val improved from 0.05000 to 0.04935, saving model to best_model.h5
Epoch 55/100

Epoch 00055: CRPS_score_val improved from 0.0


Epoch 00020: CRPS_score_val improved from 0.07404 to 0.07340, saving model to best_model.h5
Epoch 21/100

Epoch 00021: CRPS_score_val improved from 0.07340 to 0.07276, saving model to best_model.h5
Epoch 22/100

Epoch 00022: CRPS_score_val improved from 0.07276 to 0.07211, saving model to best_model.h5
Epoch 23/100

Epoch 00023: CRPS_score_val improved from 0.07211 to 0.07146, saving model to best_model.h5
Epoch 24/100

Epoch 00024: CRPS_score_val improved from 0.07146 to 0.07080, saving model to best_model.h5
Epoch 25/100

Epoch 00025: CRPS_score_val improved from 0.07080 to 0.07014, saving model to best_model.h5
Epoch 26/100

Epoch 00026: CRPS_score_val improved from 0.07014 to 0.06948, saving model to best_model.h5
Epoch 27/100

Epoch 00027: CRPS_score_val improved from 0.06948 to 0.06881, saving model to best_model.h5
Epoch 28/100

Epoch 00028: CRPS_score_val improved from 0.06881 to 0.06814, saving model to best_model.h5
Epoch 29/100

Epoch 00029: CRPS_score_val improved from 0.0


Epoch 00008: CRPS_score_val improved from 0.08087 to 0.08027, saving model to best_model.h5
Epoch 9/100

Epoch 00009: CRPS_score_val improved from 0.08027 to 0.07966, saving model to best_model.h5
Epoch 10/100

Epoch 00010: CRPS_score_val improved from 0.07966 to 0.07904, saving model to best_model.h5
Epoch 11/100

Epoch 00011: CRPS_score_val improved from 0.07904 to 0.07841, saving model to best_model.h5
Epoch 12/100

Epoch 00012: CRPS_score_val improved from 0.07841 to 0.07779, saving model to best_model.h5
Epoch 13/100

Epoch 00013: CRPS_score_val improved from 0.07779 to 0.07716, saving model to best_model.h5
Epoch 14/100

Epoch 00014: CRPS_score_val improved from 0.07716 to 0.07651, saving model to best_model.h5
Epoch 15/100

Epoch 00015: CRPS_score_val improved from 0.07651 to 0.07586, saving model to best_model.h5
Epoch 16/100

Epoch 00016: CRPS_score_val improved from 0.07586 to 0.07521, saving model to best_model.h5
Epoch 17/100

Epoch 00017: CRPS_score_val improved from 0.07


Epoch 00098: CRPS_score_val improved from 0.02765 to 0.02732, saving model to best_model.h5
Epoch 99/100

Epoch 00099: CRPS_score_val improved from 0.02732 to 0.02700, saving model to best_model.h5
Epoch 100/100

Epoch 00100: CRPS_score_val improved from 0.02700 to 0.02668, saving model to best_model.h5
the 4 fold crps (NN) is 0.026679
Training until validation scores don't improve for 200 rounds
[100]	valid_0's multi_logloss: 2.83496
[200]	valid_0's multi_logloss: 2.8024
[300]	valid_0's multi_logloss: 2.7892
[400]	valid_0's multi_logloss: 2.78669
[500]	valid_0's multi_logloss: 2.78892
Early stopping, best iteration is:
[395]	valid_0's multi_logloss: 2.78656
the 4 fold crps (LGBM) is 0.013035
-----------
Loop 1/2 Fold 5/5
-----------
validation shape 2
Epoch 1/100

Epoch 00001: CRPS_score_val improved from inf to 0.08434, saving model to best_model.h5
Epoch 2/100

Epoch 00002: CRPS_score_val improved from 0.08434 to 0.08386, saving model to best_model.h5
Epoch 3/100

Epoch 00003: CRPS


Epoch 00041: CRPS_score_val improved from 0.06042 to 0.05974, saving model to best_model.h5
Epoch 42/100

Epoch 00042: CRPS_score_val improved from 0.05974 to 0.05905, saving model to best_model.h5
Epoch 43/100

Epoch 00043: CRPS_score_val improved from 0.05905 to 0.05837, saving model to best_model.h5
Epoch 44/100

Epoch 00044: CRPS_score_val improved from 0.05837 to 0.05768, saving model to best_model.h5
Epoch 45/100

Epoch 00045: CRPS_score_val improved from 0.05768 to 0.05700, saving model to best_model.h5
Epoch 46/100

Epoch 00046: CRPS_score_val improved from 0.05700 to 0.05631, saving model to best_model.h5
Epoch 47/100

Epoch 00047: CRPS_score_val improved from 0.05631 to 0.05563, saving model to best_model.h5
Epoch 48/100

Epoch 00048: CRPS_score_val improved from 0.05563 to 0.05495, saving model to best_model.h5
Epoch 49/100

Epoch 00049: CRPS_score_val improved from 0.05495 to 0.05427, saving model to best_model.h5
Epoch 50/100

Epoch 00050: CRPS_score_val improved from 0.0


Epoch 00029: CRPS_score_val improved from 0.06840 to 0.06774, saving model to best_model.h5
Epoch 30/100

Epoch 00030: CRPS_score_val improved from 0.06774 to 0.06707, saving model to best_model.h5
Epoch 31/100

Epoch 00031: CRPS_score_val improved from 0.06707 to 0.06640, saving model to best_model.h5
Epoch 32/100

Epoch 00032: CRPS_score_val improved from 0.06640 to 0.06574, saving model to best_model.h5
Epoch 33/100

Epoch 00033: CRPS_score_val improved from 0.06574 to 0.06506, saving model to best_model.h5
Epoch 34/100

Epoch 00034: CRPS_score_val improved from 0.06506 to 0.06439, saving model to best_model.h5
Epoch 35/100

Epoch 00035: CRPS_score_val improved from 0.06439 to 0.06371, saving model to best_model.h5
Epoch 36/100

Epoch 00036: CRPS_score_val improved from 0.06371 to 0.06305, saving model to best_model.h5
Epoch 37/100

Epoch 00037: CRPS_score_val improved from 0.06305 to 0.06237, saving model to best_model.h5
Epoch 38/100

Epoch 00038: CRPS_score_val improved from 0.0

In [15]:
## MODEL 2 ##

#def crps(y_true, y_pred):
#    y_true = np.clip(np.cumsum(y_true, axis=1), 0, 1)
#    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
#    return ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * y_true.shape[0]) 
#
#def model_396_1():
#    inputs = []
#    embeddings = []
#    for i in cat:
#        input_ = Input(shape=(1,))
#        embedding = Embedding(int(np.absolute(X[i]).max() + 1), 10, input_length=1)(input_)
#        embedding = Reshape(target_shape=(10,))(embedding)
#        inputs.append(input_)
#        embeddings.append(embedding)
#    input_numeric = Input(shape=(len(num),))
#    embedding_numeric = Dense(512, activation='relu')(input_numeric) 
#    inputs.append(input_numeric)
#    embeddings.append(embedding_numeric)
#    
#    x = Concatenate()(embeddings)
#    x = Dense(256, activation='relu')(x)
#    x = Dense(128, activation='relu')(x)
#    x = Dropout(0.5)(x)
#    x = keras.layers.GaussianNoise(0.15)(x)
#    output = Dense(199, activation='softmax')(x)
#    model = Model(inputs, output)
#    return model
#
#
#n_splits = 5
#kf = GroupKFold(n_splits=n_splits)
#score = []
#for i_369, (tdx, vdx) in enumerate(kf.split(X, y, X['GameId'])):
#    print(f'Fold : {i_369}')
#    X_train, X_val, y_train, y_val = X.iloc[tdx], X.iloc[vdx], y[tdx], y[vdx]
#    X_train = [np.absolute(X_train[i]) for i in cat] + [X_train[num]]
#    X_val = [np.absolute(X_val[i]) for i in cat] + [X_val[num]]
#    model = model_396_1()
#    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=[])
#    es = EarlyStopping(monitor='val_CRPS', 
#                   mode='min',
#                   restore_best_weights=True, 
#                   verbose=2, 
#                   patience=5)
#    es.set_model(model)
#    metric = Metric(model, [es], [(X_train,y_train), (X_val,y_val)])
#    for i in range(1):
#        model.fit(X_train, y_train, verbose=False)
#    for i in range(1):
#        model.fit(X_train, y_train, batch_size=64, verbose=False)
#    for i in range(1):
#        model.fit(X_train, y_train, batch_size=128, verbose=False)
#    for i in range(1):
#        model.fit(X_train, y_train, batch_size=256, verbose=False)
#    model.fit(X_train, y_train, callbacks=[metric], epochs=100, batch_size=1024, verbose=False)
#    score_ = crps(y_val, model.predict(X_val))
#    model.save(f'keras_369_{i_369}.h5')
#    print(score_)
#    score.append(score_)
#
#print('')
#print('')
#print(f'Final Score: {np.mean(score)}')

Fold : 0
tr CRPS 0.012581 val CRPS 0.012509
tr CRPS 0.01257 val CRPS 0.012505
tr CRPS 0.012544 val CRPS 0.012486
tr CRPS 0.012529 val CRPS 0.012511
tr CRPS 0.012507 val CRPS 0.012476
tr CRPS 0.012485 val CRPS 0.012481
tr CRPS 0.012474 val CRPS 0.012473
tr CRPS 0.01245 val CRPS 0.012471
tr CRPS 0.01243 val CRPS 0.012471
tr CRPS 0.012414 val CRPS 0.012456
tr CRPS 0.012405 val CRPS 0.01246
tr CRPS 0.012378 val CRPS 0.012439
tr CRPS 0.012365 val CRPS 0.012456
tr CRPS 0.012346 val CRPS 0.012452
tr CRPS 0.012313 val CRPS 0.01244
tr CRPS 0.012296 val CRPS 0.012437
tr CRPS 0.012275 val CRPS 0.012449
tr CRPS 0.012284 val CRPS 0.012457
tr CRPS 0.012268 val CRPS 0.012454
tr CRPS 0.012237 val CRPS 0.012456
tr CRPS 0.012212 val CRPS 0.01246
Restoring model weights from the end of the best epoch
Epoch 00021: early stopping


NameError: name 'crps' is not defined

In [None]:
## MODEL 1 ##
def predict(x_te, models_nn, models_rf, weight_nn, weight_rf, iteration=False):
    model_num_nn = len(models_nn)
    model_num_rf = len(models_rf)
    for k,m in enumerate(models_nn):
        if k==0:
            y_pred_nn = m.predict(x_te, batch_size=1024)
            if iteration:
                y_pred_rf = models_rf[k].predict(x_te, num_iteration=models_rf[k].best_iteration)
            else:
                y_pred_rf = models_rf[k].predict(x_te)
        else:
            y_pred_nn += m.predict(x_te, batch_size=1024)
            if iteration:
                y_pred_rf += models_rf[k].predict(x_te, num_iteration=models_rf[k].best_iteration)
            else:
                y_pred_rf += models_rf[k].predict(x_te)
            
    y_pred_nn = y_pred_nn / model_num_nn
    y_pred_rf = y_pred_rf / model_num_rf
    
    return weight_nn * y_pred_nn + weight_rf * y_pred_rf

In [None]:
## MODEL 1 ##
for (test_df, sample_prediction_df) in iter_test:
    basetable = create_features(test_df)
    
    basetable = basetable[features]
    basetable[num] = scaler.transform(basetable[num])
    
     y_pred = predict(basetable, models_nn, models_lgbm, weight_nn, weight_lgbm, iteration=True)
     y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1).tolist()[0]

     preds_df = pd.DataFrame(data=[y_pred], columns=sample_prediction_df.columns)
     env.predict(preds_df)

 env.write_submission_file()

In [None]:
## MODEL 2 ##
#models = []
#for i in range(n_splits):
#    models.append(load_model(f'keras_369_{i}.h5'))

In [None]:
## MODEL 2 ##
#for (test_df, sample_prediction_df) in tqdm.tqdm(iter_test):
#    basetable = create_features(test_df)
#    
#    basetable = basetable[features]
#    basetable[num] = scaler.transform(basetable[num])
#    test_ = [np.absolute(basetable[i]) for i in cat] + [basetable[num]]
#    
#    y_pred = np.mean([model.predict(test_) for model in models], axis=0)
#    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1).tolist()[0]
#    
#    preds_df = pd.DataFrame(data=[y_pred], columns=sample_prediction_df.columns)
#    
#    env.predict(preds_df)
#env.write_submission_file()