In [1]:
# IMPORTS 
import numpy as np
import pandas as pd


import sklearn.metrics as mtr
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GroupShuffleSplit


from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import Callback, EarlyStopping
from keras.models import Model
from keras.layers import Input, Dense, Concatenate, Reshape, Dropout, merge, Add
from keras.layers.embeddings import Embedding
from sklearn.model_selection import KFold,GroupKFold

import random as rn
import tensorflow as tf
from keras.models import load_model

import os
import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', 250)
pd.set_option('display.max_rows', 150)


Using TensorFlow backend.


In [2]:
#train = pd.read_csv('../input/nfl-big-data-bowl-2020/train.csv', dtype={'WindSpeed': 'object'})
train = pd.read_csv('../data/train.csv', dtype={'WindSpeed': 'object'})
outcomes = train[['GameId','PlayId','Yards']].drop_duplicates()

In [3]:
# evaluation metric
def crps(y_true, y_pred):
    y_true = np.clip(np.cumsum(y_true, axis=1), 0, 1)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
    return ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * y_true.shape[0]) 

In [4]:
# author : nlgn
# Link : https://www.kaggle.com/kingychiu/keras-nn-starter-crps-early-stopping
class Metric(Callback):
    def __init__(self, model, callbacks, data):
        super().__init__()
        self.model = model
        self.callbacks = callbacks
        self.data = data

    def on_train_begin(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_begin(logs)

    def on_train_end(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_end(logs)

    def on_epoch_end(self, batch, logs=None):
        X_train, y_train = self.data[0][0], self.data[0][1]
        y_pred = self.model.predict(X_train)
        y_true = np.clip(np.cumsum(y_train, axis=1), 0, 1)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
        tr_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_train[-1].shape[0])
        tr_s = np.round(tr_s, 6)
        logs['tr_CRPS'] = tr_s

        X_valid, y_valid = self.data[1][0], self.data[1][1]

        y_pred = self.model.predict(X_valid)
        y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
        val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_valid[-1].shape[0])
        val_s = np.round(val_s, 6)
        logs['val_CRPS'] = val_s
        print('tr CRPS', tr_s, 'val CRPS', val_s)

        for callback in self.callbacks:
            callback.on_epoch_end(batch, logs)

In [5]:
# author : ryancaldwell
# Link : https://www.kaggle.com/ryancaldwell/location-eda
def create_features(df, deploy=False):
    def new_X(x_coordinate, play_direction):
        if play_direction == 'left':
            return 120.0 - x_coordinate
        else:
            return x_coordinate

    def new_line(rush_team, field_position, yardline):
        if rush_team == field_position:
            # offense starting at X = 0 plus the 10 yard endzone plus the line of scrimmage
            return 10.0 + yardline
        else:
            # half the field plus the yards between midfield and the line of scrimmage
            return 60.0 + (50 - yardline)

    def new_orientation(angle, play_direction):
        if play_direction == 'left':
            new_angle = 360.0 - angle
            if new_angle == 360.0:
                new_angle = 0.0
            return new_angle
        else:
            return angle

    def euclidean_distance(x1,y1,x2,y2):
        x_diff = (x1-x2)**2
        y_diff = (y1-y2)**2

        return np.sqrt(x_diff + y_diff)

    def back_direction(orientation):
        if orientation > 180.0:
            return 1
        else:
            return 0

    def update_yardline(df):
        new_yardline = df[df['NflId'] == df['NflIdRusher']]
        new_yardline['YardLine'] = new_yardline[['PossessionTeam','FieldPosition','YardLine']].apply(lambda x: new_line(x[0],x[1],x[2]), axis=1)
        new_yardline = new_yardline[['GameId','PlayId','YardLine']]

        return new_yardline

    def update_orientation(df, yardline):
        df['X'] = df[['X','PlayDirection']].apply(lambda x: new_X(x[0],x[1]), axis=1)
        df['Orientation'] = df[['Orientation','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)
        df['Dir'] = df[['Dir','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)

        df = df.drop('YardLine', axis=1)
        df = pd.merge(df, yardline, on=['GameId','PlayId'], how='inner')

        return df

    def back_features(df):
        carriers = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','NflIdRusher','X','Y','Orientation','Dir','YardLine']]
        carriers['back_from_scrimmage'] = carriers['YardLine'] - carriers['X']
        carriers['back_oriented_down_field'] = carriers['Orientation'].apply(lambda x: back_direction(x))
        carriers['back_moving_down_field'] = carriers['Dir'].apply(lambda x: back_direction(x))
        carriers = carriers.rename(columns={'X':'back_X',
                                            'Y':'back_Y'})
        carriers = carriers[['GameId','PlayId','NflIdRusher','back_X','back_Y','back_from_scrimmage','back_oriented_down_field','back_moving_down_field']]

        return carriers

    def features_relative_to_back(df, carriers):
        player_distance = df[['GameId','PlayId','NflId','X','Y']]
        player_distance = pd.merge(player_distance, carriers, on=['GameId','PlayId'], how='inner')
        player_distance = player_distance[player_distance['NflId'] != player_distance['NflIdRusher']]
        player_distance['dist_to_back'] = player_distance[['X','Y','back_X','back_Y']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        player_distance = player_distance.groupby(['GameId','PlayId','back_from_scrimmage','back_oriented_down_field','back_moving_down_field'])\
                                         .agg({'dist_to_back':['min','max','mean','std']})\
                                         .reset_index()
        player_distance.columns = ['GameId','PlayId','back_from_scrimmage','back_oriented_down_field','back_moving_down_field',
                                   'min_dist','max_dist','mean_dist','std_dist']

        return player_distance

    def defense_features(df):
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']

        defense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X','Y','RusherX','RusherY']]
        defense['def_dist_to_back'] = defense[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)

        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'def_dist_to_back':['min','max','mean','std']})\
                         .reset_index()
        defense.columns = ['GameId','PlayId','def_min_dist','def_max_dist','def_mean_dist','def_std_dist']

        return defense

    def static_features(df):
        static_features = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','X','Y','S','A','Dis','Orientation','Dir',
                                                            'YardLine','Quarter','Down','Distance','DefendersInTheBox']].drop_duplicates()
        static_features['DefendersInTheBox'] = static_features['DefendersInTheBox'].fillna(np.mean(static_features['DefendersInTheBox']))

        return static_features
    
    def split_personnel(s):
        splits = s.split(',')
        for i in range(len(splits)):
            splits[i] = splits[i].strip()

        return splits

    def defense_formation(l):
        dl = 0
        lb = 0
        db = 0
        other = 0

        for position in l:
            sub_string = position.split(' ')
            if sub_string[1] == 'DL':
                dl += int(sub_string[0])
            elif sub_string[1] in ['LB','OL']:
                lb += int(sub_string[0])
            else:
                db += int(sub_string[0])

        counts = (dl,lb,db,other)

        return counts

    def offense_formation(l):
        qb = 0
        rb = 0
        wr = 0
        te = 0
        ol = 0

        sub_total = 0
        qb_listed = False
        for position in l:
            sub_string = position.split(' ')
            pos = sub_string[1]
            cnt = int(sub_string[0])

            if pos == 'QB':
                qb += cnt
                sub_total += cnt
                qb_listed = True
            # Assuming LB is a line backer lined up as full back
            elif pos in ['RB','LB']:
                rb += cnt
                sub_total += cnt
            # Assuming DB is a defensive back and lined up as WR
            elif pos in ['WR','DB']:
                wr += cnt
                sub_total += cnt
            elif pos == 'TE':
                te += cnt
                sub_total += cnt
            # Assuming DL is a defensive lineman lined up as an additional line man
            else:
                ol += cnt
                sub_total += cnt

        # If not all 11 players were noted at given positions we need to make some assumptions
        # I will assume if a QB is not listed then there was 1 QB on the play
        # If a QB is listed then I'm going to assume the rest of the positions are at OL
        # This might be flawed but it looks like RB, TE and WR are always listed in the personnel
        if sub_total < 11:
            diff = 11 - sub_total
            if not qb_listed:
                qb += 1
                diff -= 1
            ol += diff

        counts = (qb,rb,wr,te,ol)

        return counts
    
    def personnel_features(df):
        personnel = df[['GameId','PlayId','OffensePersonnel','DefensePersonnel']].drop_duplicates()
        personnel['DefensePersonnel'] = personnel['DefensePersonnel'].apply(lambda x: split_personnel(x))
        personnel['DefensePersonnel'] = personnel['DefensePersonnel'].apply(lambda x: defense_formation(x))
        personnel['num_DL'] = personnel['DefensePersonnel'].apply(lambda x: x[0])
        personnel['num_LB'] = personnel['DefensePersonnel'].apply(lambda x: x[1])
        personnel['num_DB'] = personnel['DefensePersonnel'].apply(lambda x: x[2])

        personnel['OffensePersonnel'] = personnel['OffensePersonnel'].apply(lambda x: split_personnel(x))
        personnel['OffensePersonnel'] = personnel['OffensePersonnel'].apply(lambda x: offense_formation(x))
        personnel['num_QB'] = personnel['OffensePersonnel'].apply(lambda x: x[0])
        personnel['num_RB'] = personnel['OffensePersonnel'].apply(lambda x: x[1])
        personnel['num_WR'] = personnel['OffensePersonnel'].apply(lambda x: x[2])
        personnel['num_TE'] = personnel['OffensePersonnel'].apply(lambda x: x[3])
        personnel['num_OL'] = personnel['OffensePersonnel'].apply(lambda x: x[4])

        # Let's create some features to specify if the OL is covered
        personnel['OL_diff'] = personnel['num_OL'] - personnel['num_DL']
        personnel['OL_TE_diff'] = (personnel['num_OL'] + personnel['num_TE']) - personnel['num_DL']
        # Let's create a feature to specify if the defense is preventing the run
        # Let's just assume 7 or more DL and LB is run prevention
        personnel['run_def'] = (personnel['num_DL'] + personnel['num_LB'] > 6).astype(int)

        personnel.drop(['OffensePersonnel','DefensePersonnel'], axis=1, inplace=True)
        
        return personnel

    def combine_features(relative_to_back, defense, static, personnel, deploy=deploy):
        df = pd.merge(relative_to_back,defense,on=['GameId','PlayId'],how='inner')
        df = pd.merge(df,static,on=['GameId','PlayId'],how='inner')
        df = pd.merge(df,personnel,on=['GameId','PlayId'],how='inner')

        if not deploy:
            df = pd.merge(df, outcomes, on=['GameId','PlayId'], how='inner')

        return df
    
    yardline = update_yardline(df)
    df = update_orientation(df, yardline)
    back_feats = back_features(df)
    rel_back = features_relative_to_back(df, back_feats)
    def_feats = defense_features(df)
    static_feats = static_features(df)
    personnel = personnel_features(df)
    basetable = combine_features(rel_back, def_feats, static_feats, personnel, deploy=deploy)
    return basetable

In [6]:
train_basetable = create_features(train, False)

In [7]:
X = train_basetable.copy()
yards = X.Yards


y = np.zeros((yards.shape[0], 199))
for idx, target in enumerate(list(yards)):
    y[idx][99 + target] = 1

In [8]:
def process_two(t_):
    t_['fe1'] = pd.Series(np.sqrt(np.absolute(np.square(t_.X.values) - np.square(t_.Y.values))))
    t_['fe5'] = np.square(t_['S'].values) + 2 * t_['A'].values * t_['Dis'].values  # N
    t_['fe7'] = np.arccos(np.clip(t_['X'].values / t_['Y'].values, -1, 1))  # N
    t_['fe8'] = t_['S'].values / np.clip(t_['fe1'].values, 0.6, None)
    radian_angle = (90 - t_['Dir']) * np.pi / 180.0
    t_['fe10'] = np.abs(t_['S'] * np.cos(radian_angle))
    t_['fe11'] = np.abs(t_['S'] * np.sin(radian_angle))
    return t_

In [9]:
X = process_two(X)

In [10]:
X.head()

Unnamed: 0,GameId,PlayId,back_from_scrimmage,back_oriented_down_field,back_moving_down_field,min_dist,max_dist,mean_dist,std_dist,def_min_dist,def_max_dist,def_mean_dist,def_std_dist,X,Y,S,A,Dis,Orientation,Dir,YardLine,Quarter,Down,Distance,DefendersInTheBox,num_DL,num_LB,num_DB,num_QB,num_RB,num_WR,num_TE,num_OL,OL_diff,OL_TE_diff,run_def,Yards,fe1,fe5,fe7,fe8,fe10,fe11
0,2017090700,20170907000118,3.75,1,0,1.449724,22.415872,8.046559,4.873845,4.59331,22.415872,9.752491,5.327299,41.25,30.53,3.63,3.35,0.38,198.02,114.26,45.0,1,3,2,6.0,2,3,6,1,1,3,1,5,3,4,0,8,27.739531,15.7229,0.0,0.13086,3.309436,1.491487
1,2017090700,20170907000139,4.07,0,0,0.792023,23.025872,8.614623,5.598683,4.287773,23.025872,10.297028,5.833217,48.93,27.16,3.06,2.41,0.34,149.3,47.8,53.0,1,1,10,6.0,2,3,6,1,1,3,1,5,3,4,0,3,40.699869,11.0024,0.0,0.075185,2.266862,2.055465
2,2017090700,20170907000189,3.66,1,0,1.64639,20.726285,8.482583,4.642121,4.22167,20.726285,9.903689,5.07329,71.34,19.11,5.77,2.42,0.6,219.18,138.04,75.0,1,1,10,7.0,2,3,6,1,1,3,1,5,3,4,0,5,68.732841,36.1969,0.0,0.083948,3.857889,4.29064
3,2017090700,20170907000345,3.53,0,0,0.918096,9.791231,5.549379,1.983128,4.528002,9.791231,6.309354,1.834174,104.47,25.36,4.45,3.2,0.46,173.78,84.56,108.0,1,2,2,9.0,4,4,3,1,2,0,2,6,2,4,1,2,101.345209,22.7465,0.0,0.043909,4.429957,0.421875
4,2017090700,20170907000395,5.01,0,0,0.502892,21.214806,9.168819,5.611232,4.288088,21.214806,11.056456,5.900009,29.99,27.12,3.9,2.53,0.44,34.27,157.92,35.0,1,1,10,7.0,3,2,6,1,1,1,3,5,2,5,0,7,12.802566,17.4364,0.0,0.304626,1.466013,3.613974


In [11]:
cat = ['back_oriented_down_field', 'back_moving_down_field', 'Quarter', 'Down', 'DefendersInTheBox', 'num_DL', 'num_LB', 'num_DB', 'num_QB', 'num_RB', 'num_WR', 'num_TE', 'num_OL', 'OL_diff', 'OL_TE_diff', 'run_def']
num = ['back_from_scrimmage', 'min_dist', 'max_dist', 'mean_dist', 'std_dist', 'def_min_dist', 'def_max_dist', 'def_mean_dist', 'def_std_dist',
       'X', 'Y', 'S', 'A', 'Dis', 'Orientation', 'Dir', 'YardLine', 'Distance'] + ['fe1', 'fe5', 'fe7','fe8', 'fe10', 'fe11'] #+ ['X_log','Y_log','S_log','A_log','Dis_log','Orientation_log','Dir_log','YardLine_log','fe1_log','fe5_log','fe7_log','fe8_log','fe10_log','fe11_log']

In [12]:
scaler = StandardScaler()
X[num] = scaler.fit_transform(X[num])

In [27]:
def get_model():
    inputs = []
    embeddings = []
    for i in cat:
        input_ = Input(shape=(1,))
        embedding = Embedding(int(np.absolute(X[i]).max() + 1), 10, input_length=1)(input_)
        embedding = Reshape(target_shape=(10,))(embedding)
        inputs.append(input_)
        embeddings.append(embedding)
    input_numeric = Input(shape=(len(num),))
    embedding_numeric = Dense(512, activation='relu')(input_numeric) 
    inputs.append(input_numeric)
    embeddings.append(embedding_numeric)
    
    x = Concatenate()(embeddings)
    x = Dense(384, activation=None)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.05)(x)
    x = Dropout(0.5)(x)
    
    x = Dense(256, activation=None)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.05)(x)
    x = Dropout(0.4)(x)
    
    x = Dense(192, activation=None)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.05)(x)
    x = Dropout(0.3)(x)
    
    outputs = Dense(199, activation='softmax')(x)
    model = Model(inputs, outputs)
    #model.compile(optimizer='adam', loss='mse')
    return model


In [28]:
from keras.models import Model
from keras.layers import *

n_splits = 5
kf = GroupKFold(n_splits=n_splits)
#kf = GroupShuffleSplit(n_splits=n_splits, random_state=12345)
score = []
for i_369, (tdx, vdx) in enumerate(kf.split(X, y, X['GameId'])):
    print(f'Fold : {i_369}')
    X_train, X_val, y_train, y_val = X.iloc[tdx], X.iloc[vdx], y[tdx], y[vdx]
    X_train = [np.absolute(X_train[i]) for i in cat] + [X_train[num]]
    X_val = [np.absolute(X_val[i]) for i in cat] + [X_val[num]]
    model = get_model()
    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=[])
    es = EarlyStopping(monitor='val_CRPS', 
                   mode='min',
                   restore_best_weights=True, 
                   verbose=1, 
                   patience=15)
    es.set_model(model)
    metric = Metric(model, [es], [(X_train,y_train), (X_val,y_val)])
    for i in range(1):
        model.fit(X_train, y_train)
    for i in range(1):
        model.fit(X_train, y_train, batch_size=64)
    for i in range(1):
        model.fit(X_train, y_train, batch_size=128)
    for i in range(1):
        model.fit(X_train, y_train, batch_size=256)
    model.fit(X_train, y_train, callbacks=[metric], epochs=250, batch_size=1024)
    score_ = crps(y_val, model.predict(X_val))
    model.save(f'keras_369_{i_369}.h5')
    print(score_)
    score.append(score_)
    

print(np.mean(score))

Fold : 0
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/250
tr CRPS 0.012946 val CRPS 0.012669
Epoch 2/250
tr CRPS 0.012935 val CRPS 0.012661
Epoch 3/250
tr CRPS 0.012916 val CRPS 0.012647
Epoch 4/250
tr CRPS 0.01291 val CRPS 0.012648
Epoch 5/250
tr CRPS 0.012903 val CRPS 0.012644
Epoch 6/250
tr CRPS 0.012897 val CRPS 0.012641
Epoch 7/250
tr CRPS 0.012885 val CRPS 0.012633
Epoch 8/250
tr CRPS 0.012874 val CRPS 0.012627
Epoch 9/250
tr CRPS 0.012858 val CRPS 0.012616
Epoch 10/250
tr CRPS 0.012848 val CRPS 0.012608
Epoch 11/250
tr CRPS 0.012839 val CRPS 0.012606
Epoch 12/250
tr CRPS 0.012831 val CRPS 0.012602
Epoch 13/250
tr CRPS 0.012821 val CRPS 0.012597
Epoch 14/250
tr CRPS 0.012808 val CRPS 0.012593
Epoch 15/250
tr CRPS 0.012792 val CRPS 0.01258
Epoch 16/250
tr CRPS 0.012781 val CRPS 0.012579
Epoch 17/250
tr CRPS 0.012772 val CRPS 0.012569
Epoch 18/250
tr CRPS 0.012771 val CRPS 0.012571
Epoch 19/250
tr CRPS 0.012765 val CRPS 0.012582
Epoch 20/250
tr CRPS 0.01275 val CRPS 0.012571
Epo

tr CRPS 0.012164 val CRPS 0.012514
Epoch 66/250
tr CRPS 0.012128 val CRPS 0.012539
Restoring model weights from the end of the best epoch
Epoch 00066: early stopping
0.012483160885387938
Fold : 1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/250
tr CRPS 0.012814 val CRPS 0.013102
Epoch 2/250
tr CRPS 0.012812 val CRPS 0.013105
Epoch 3/250
tr CRPS 0.0128 val CRPS 0.0131
Epoch 4/250
tr CRPS 0.01279 val CRPS 0.013093
Epoch 5/250
tr CRPS 0.012789 val CRPS 0.013099
Epoch 6/250
tr CRPS 0.012771 val CRPS 0.013089
Epoch 7/250
tr CRPS 0.012766 val CRPS 0.013084
Epoch 8/250
tr CRPS 0.012755 val CRPS 0.013073
Epoch 9/250
tr CRPS 0.012745 val CRPS 0.013072
Epoch 10/250
tr CRPS 0.012732 val CRPS 0.013056
Epoch 11/250
tr CRPS 0.012722 val CRPS 0.01305
Epoch 12/250
tr CRPS 0.012719 val CRPS 0.013053
Epoch 13/250
tr CRPS 0.012713 val CRPS 0.013061
Epoch 14/250
tr CRPS 0.012702 val CRPS 0.013056
Epoch 15/250
tr CRPS 0.012687 val CRPS 0.013048
Epoch 16/250
tr CRPS 0.012679 val CRPS 0.013041
Epoch 17/25

tr CRPS 0.012061 val CRPS 0.012937
Epoch 63/250
tr CRPS 0.012048 val CRPS 0.012937
Epoch 64/250
tr CRPS 0.012042 val CRPS 0.012946
Epoch 65/250
tr CRPS 0.01203 val CRPS 0.012961
Epoch 66/250
tr CRPS 0.012016 val CRPS 0.012977
Epoch 67/250
tr CRPS 0.012 val CRPS 0.012978
Epoch 68/250
tr CRPS 0.011985 val CRPS 0.012968
Epoch 69/250
tr CRPS 0.011945 val CRPS 0.012967
Epoch 70/250
tr CRPS 0.011935 val CRPS 0.012963
Epoch 71/250
tr CRPS 0.01193 val CRPS 0.012985
Restoring model weights from the end of the best epoch
Epoch 00071: early stopping
0.012927113574228618
Fold : 2
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/250
tr CRPS 0.012758 val CRPS 0.013319
Epoch 2/250
tr CRPS 0.012741 val CRPS 0.013303
Epoch 3/250
tr CRPS 0.012731 val CRPS 0.0133
Epoch 4/250
tr CRPS 0.012722 val CRPS 0.013296
Epoch 5/250
tr CRPS 0.012715 val CRPS 0.013295
Epoch 6/250
tr CRPS 0.012706 val CRPS 0.013289
Epoch 7/250
tr CRPS 0.012695 val CRPS 0.013284
Epoch 8/250
tr CRPS 0.012684 val CRPS 0.013276
Epoch 9/250

tr CRPS 0.01214 val CRPS 0.013123
Epoch 55/250
tr CRPS 0.012134 val CRPS 0.013136
Epoch 56/250
tr CRPS 0.012117 val CRPS 0.013139
Epoch 57/250
tr CRPS 0.012094 val CRPS 0.013135
Epoch 58/250
tr CRPS 0.012079 val CRPS 0.013147
Epoch 59/250
tr CRPS 0.012066 val CRPS 0.013122
Epoch 60/250
tr CRPS 0.012034 val CRPS 0.01312
Epoch 61/250
tr CRPS 0.012023 val CRPS 0.013139
Epoch 62/250
tr CRPS 0.012 val CRPS 0.013136
Epoch 63/250
tr CRPS 0.011997 val CRPS 0.013116
Epoch 64/250
tr CRPS 0.011976 val CRPS 0.013131
Epoch 65/250
tr CRPS 0.011959 val CRPS 0.013129
Epoch 66/250
tr CRPS 0.011943 val CRPS 0.013126
Epoch 67/250
tr CRPS 0.011912 val CRPS 0.013121
Epoch 68/250
tr CRPS 0.011905 val CRPS 0.013151
Epoch 69/250
tr CRPS 0.011893 val CRPS 0.013127
Epoch 70/250
tr CRPS 0.01187 val CRPS 0.013149
Epoch 71/250
tr CRPS 0.011846 val CRPS 0.013115
Epoch 72/250
tr CRPS 0.011854 val CRPS 0.013134
Epoch 73/250
tr CRPS 0.011829 val CRPS 0.013158
Epoch 74/250
tr CRPS 0.01179 val CRPS 0.013114
Epoch 75/250

tr CRPS 0.012479 val CRPS 0.013203
Epoch 29/250
tr CRPS 0.012468 val CRPS 0.013202
Epoch 30/250
tr CRPS 0.012459 val CRPS 0.013201
Epoch 31/250
tr CRPS 0.012448 val CRPS 0.013193
Epoch 32/250
tr CRPS 0.012436 val CRPS 0.013198
Epoch 33/250
tr CRPS 0.012427 val CRPS 0.013195
Epoch 34/250
tr CRPS 0.012419 val CRPS 0.013191
Epoch 35/250
tr CRPS 0.01241 val CRPS 0.01319
Epoch 36/250
tr CRPS 0.012397 val CRPS 0.01319
Epoch 37/250
tr CRPS 0.012386 val CRPS 0.013194
Epoch 38/250
tr CRPS 0.012366 val CRPS 0.013179
Epoch 39/250
tr CRPS 0.012364 val CRPS 0.013183
Epoch 40/250
tr CRPS 0.012352 val CRPS 0.013175
Epoch 41/250
tr CRPS 0.012331 val CRPS 0.013158
Epoch 42/250
tr CRPS 0.012321 val CRPS 0.01315
Epoch 43/250
tr CRPS 0.0123 val CRPS 0.013148
Epoch 44/250
tr CRPS 0.01229 val CRPS 0.013154
Epoch 45/250
tr CRPS 0.012292 val CRPS 0.013173
Epoch 46/250
tr CRPS 0.012276 val CRPS 0.013172
Epoch 47/250
tr CRPS 0.012265 val CRPS 0.013162
Epoch 48/250
tr CRPS 0.012243 val CRPS 0.013154
Epoch 49/250

tr CRPS 0.012808 val CRPS 0.012479
Epoch 18/250
tr CRPS 0.012793 val CRPS 0.012465
Epoch 19/250
tr CRPS 0.012786 val CRPS 0.012466
Epoch 20/250
tr CRPS 0.012778 val CRPS 0.012467
Epoch 21/250
tr CRPS 0.012768 val CRPS 0.012465
Epoch 22/250
tr CRPS 0.012754 val CRPS 0.01245
Epoch 23/250
tr CRPS 0.012744 val CRPS 0.012443
Epoch 24/250
tr CRPS 0.012736 val CRPS 0.012443
Epoch 25/250
tr CRPS 0.012727 val CRPS 0.012435
Epoch 26/250
tr CRPS 0.012716 val CRPS 0.012429
Epoch 27/250
tr CRPS 0.012707 val CRPS 0.012429
Epoch 28/250
tr CRPS 0.012694 val CRPS 0.01242
Epoch 29/250
tr CRPS 0.012683 val CRPS 0.012418
Epoch 30/250
tr CRPS 0.012672 val CRPS 0.012418
Epoch 31/250
tr CRPS 0.01266 val CRPS 0.012413
Epoch 32/250
tr CRPS 0.012649 val CRPS 0.012404
Epoch 33/250
tr CRPS 0.012638 val CRPS 0.012401
Epoch 34/250
tr CRPS 0.012628 val CRPS 0.012393
Epoch 35/250
tr CRPS 0.012623 val CRPS 0.012394
Epoch 36/250
tr CRPS 0.012611 val CRPS 0.012391
Epoch 37/250
tr CRPS 0.012599 val CRPS 0.012388
Epoch 38

tr CRPS 0.011777 val CRPS 0.012337
Restoring model weights from the end of the best epoch
Epoch 00084: early stopping
0.012292237450475356
0.012787221490381435


In [None]:
from kaggle.competitions import nflrush
env = nflrush.make_env()
iter_test = env.iter_test()

models = []
for i in range(n_splits):
    models.append(load_model(f'keras_{i}.h5'))
    
for (test_df, sample_prediction_df) in iter_test:
    basetable = create_features(test_df, deploy=True)
    basetable = process_two(basetable)
    basetable[num] = scaler.transform(basetable[num])
    test_ = [np.absolute(basetable[i]) for i in cat] + [basetable[num]]
    
    y_pred = np.mean([model.predict(test_) for model in models], axis=0)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1).tolist()[0]
    
    preds_df = pd.DataFrame(data=[y_pred], columns=sample_prediction_df.columns)
    env.predict(preds_df)
    
env.write_submission_file()