In [6]:
import numpy as np
import pandas as pd

import sklearn.metrics as mtr
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import Callback, EarlyStopping
from keras.models import Model
from keras.layers import Input, Dense, Concatenate, Reshape, Dropout, merge, Add
from keras.layers.embeddings import Embedding

from sklearn.model_selection import KFold,GroupKFold

import warnings
import random as rn
import math
import datetime
import tensorflow as tf
from keras.models import load_model
import os
import tqdm

warnings.filterwarnings("ignore")
pd.options.display.max_columns = 100

#from kaggle.competitions import nflrush
#env = nflrush.make_env()
#iter_test = env.iter_test()

In [7]:
# evaluation metric
def crps(y_true, y_pred):
    y_true = np.clip(np.cumsum(y_true, axis=1), 0, 1)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
    return ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * y_true.shape[0]) 


# author : nlgn
# Link : https://www.kaggle.com/kingychiu/keras-nn-starter-crps-early-stopping
class Metric(Callback):
    def __init__(self, model, callbacks, data):
        super().__init__()
        self.model = model
        self.callbacks = callbacks
        self.data = data

    def on_train_begin(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_begin(logs)

    def on_train_end(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_end(logs)

    def on_epoch_end(self, batch, logs=None):
        X_train, y_train = self.data[0][0], self.data[0][1]
        y_pred = self.model.predict(X_train)
        y_true = np.clip(np.cumsum(y_train, axis=1), 0, 1)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
        tr_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_train[-1].shape[0])
        tr_s = np.round(tr_s, 6)
        logs['tr_CRPS'] = tr_s

        X_valid, y_valid = self.data[1][0], self.data[1][1]

        y_pred = self.model.predict(X_valid)
        y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
        val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_valid[-1].shape[0])
        val_s = np.round(val_s, 6)
        logs['val_CRPS'] = val_s
        print('tr CRPS', tr_s, 'val CRPS', val_s)

        for callback in self.callbacks:
            callback.on_epoch_end(batch, logs)

In [22]:
def create_features(df):
    
    def new_X(x_coordinate, play_direction):
        if play_direction == 'left':
            return 120.0 - x_coordinate
        else:
            return x_coordinate

    def new_line(rush_team, field_position, yardline):
        if rush_team == field_position:
            # offense starting at X = 0 plus the 10 yard endzone plus the line of scrimmage
            return 10.0 + yardline
        else:
            # half the field plus the yards between midfield and the line of scrimmage
            return 60.0 + (50 - yardline)

    def new_orientation(angle, play_direction):
        if play_direction == 'left':
            new_angle = 360.0 - angle
            if new_angle == 360.0:
                new_angle = 0.0
            return new_angle
        else:
            return angle

    def euclidean_distance(x1,y1,x2,y2):
        x_diff = (x1-x2)**2
        y_diff = (y1-y2)**2
        return np.sqrt(x_diff + y_diff)

    def back_direction(orientation):
        if orientation > 180.0:
            return 1
        else:
            return 0

    def map_team_name(df):
        map_abbr = {'ARI': 'ARZ', 'BAL': 'BLT', 'CLE': 'CLV', 'HOU': 'HST'}
        for abb in df['PossessionTeam'].unique():
            map_abbr[abb] = abb
        df['PossessionTeam'] = df['PossessionTeam'].map(map_abbr)
        df['HomeTeamAbbr'] = df['HomeTeamAbbr'].map(map_abbr)
        df['VisitorTeamAbbr'] = df['VisitorTeamAbbr'].map(map_abbr)
        df['FieldPosition'] = df['FieldPosition'].map(map_abbr)
        return df
    
    def clean_position(df):
        def get_position(pos):
            if pos == 'SAF':
                return 'DB'
            if pos == 'S':
                return 'DB'
            elif pos == 'OG':
                return 'G'
            elif pos == "OT":
                return 'T'
            else:
                return pos
        df['Position'] = df['Position'].apply(get_position)
        return df
            
    def update_yardline(df):
        new_yardline = df[df['NflId'] == df['NflIdRusher']]
        new_yardline['YardLine'] = new_yardline[['PossessionTeam','FieldPosition','YardLine']].apply(lambda x: new_line(x[0],x[1],x[2]), axis=1)
        new_yardline = new_yardline[['GameId','PlayId','YardLine']]
        return new_yardline

    def update_orientation(df, yardline):
        df['X'] = df[['X','PlayDirection']].apply(lambda x: new_X(x[0],x[1]), axis=1)
        df['Orientation'] = df[['Orientation','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)
        df['Dir'] = df[['Dir','PlayDirection']].apply(lambda x: new_orientation(x[0],x[1]), axis=1)
        df = df.drop('YardLine', axis=1)
        df = pd.merge(df, yardline, on=['GameId','PlayId'], how='inner')
        return df

    def back_features(df):
        carriers = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','NflIdRusher','X','Y','Orientation','Dir','YardLine']]
        carriers['back_from_scrimmage'] = carriers['YardLine'] - carriers['X']
        carriers['back_oriented_down_field'] = carriers['Orientation'].apply(lambda x: back_direction(x))
        carriers['back_moving_down_field'] = carriers['Dir'].apply(lambda x: back_direction(x))
        carriers = carriers.rename(columns={'X':'back_X',
                                            'Y':'back_Y'})
        carriers = carriers[['GameId','PlayId','NflIdRusher','back_X','back_Y',
                             'back_from_scrimmage','back_oriented_down_field','back_moving_down_field']]
        return carriers

    def features_relative_to_back(df, carriers):
        player_distance = df[['GameId','PlayId','NflId','X','Y']]
        player_distance = pd.merge(player_distance, carriers, on=['GameId','PlayId'], how='inner')
        player_distance = player_distance[player_distance['NflId'] != player_distance['NflIdRusher']]
        player_distance['dist_to_back'] = player_distance[['X','Y','back_X','back_Y']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        player_distance = player_distance.groupby(['GameId','PlayId','back_from_scrimmage','back_oriented_down_field','back_moving_down_field'])\
                                         .agg({'dist_to_back':['min','max','mean','std']})\
                                         .reset_index()
        player_distance.columns = ['GameId','PlayId','back_from_scrimmage','back_oriented_down_field','back_moving_down_field',
                                   'min_dist','max_dist','mean_dist','std_dist']
        return player_distance

    def defense_features(df):
        rusher = df[df['NflId'] == df['NflIdRusher']][['GameId','PlayId','Team','X','Y']]
        rusher.columns = ['GameId','PlayId','RusherTeam','RusherX','RusherY']
        defense = pd.merge(df,rusher,on=['GameId','PlayId'],how='inner')
        defense = defense[defense['Team'] != defense['RusherTeam']][['GameId','PlayId','X','Y','RusherX','RusherY']]
        defense['def_dist_to_back'] = defense[['X','Y','RusherX','RusherY']].apply(lambda x: euclidean_distance(x[0],x[1],x[2],x[3]), axis=1)
        defense = defense.groupby(['GameId','PlayId'])\
                         .agg({'def_dist_to_back':['min','max','mean','std']})\
                         .reset_index()
        defense.columns = ['GameId','PlayId','def_min_dist','def_max_dist','def_mean_dist','def_std_dist']
        return defense

    def create_general_position(df):
        def get_general_position(pos):
            if pos == 'SS' or pos == 'FS' or pos == 'CB' or pos == 'DB':
                return 'DB'
            elif pos == 'DE' or pos == 'DT' or pos == 'DL':
                return 'DL'
            elif pos == 'ILB' or pos == 'OLB' or pos == 'MLB' or pos == 'LB':
                return 'LB'
            elif pos == 'WR':
                return 'WR'
            elif pos == 'TE':
                return 'TE'
            elif pos == 'T' or pos == 'G' or pos == 'C' or pos == 'NT' or pos == 'OL':
                return 'OL'
            elif pos == 'QB' or pos == 'RB' or pos == 'FB' or pos == 'HB' or pos == 'TB' or pos == 'WB':
                return 'OB'
            else:
                return 'Other'
        df['GeneralPosition'] = df['Position'].apply(get_general_position)
        return df
    
    def get_team_on_offense(df):
        df['TeamOnOffense'] = "home"
        df.loc[df.PossessionTeam != df.HomeTeamAbbr, 'TeamOnOffense'] = "away"
        df['IsOnOffense'] = df.Team == df.TeamOnOffense 
        return df
    
    def get_is_on_offense(df):
        return df
    
    def map_offense_defense_team(df):
        df['OffenseTeam'] = df['VisitorTeamAbbr']
        df.loc[df.TeamOnOffense == 'home', 'OffenseTeam'] = df['HomeTeamAbbr']    
        df['DefenseTeam'] = df['VisitorTeamAbbr']
        df.loc[df.TeamOnOffense == 'away', 'DefenseTeam'] = df['HomeTeamAbbr']
        df['IsOffenseAtHome'] = True
        df.loc[df.TeamOnOffense == 'away', 'IsOffenseAtHome'] = False
        return df
    
    def get_is_offense_winning(df):
        df['OffenseScore'] = df['HomeScoreBeforePlay']
        df.loc[df.TeamOnOffense == 'away', 'OffenseScore'] = df['VisitorScoreBeforePlay']
        df['DefenseScore'] = df['VisitorScoreBeforePlay']
        df.loc[df.TeamOnOffense == 'away', 'DefenseScore'] = df['HomeScoreBeforePlay']
        df['OffenseLessDefenseScore'] = df['OffenseScore'] - df['DefenseScore']
        df['OffenseInOwnTerritory'] = False
        df.loc[df.FieldPosition == df.OffenseTeam, 'OffenseInOwnTerritory'] = True
        df.drop(['OffenseScore','DefenseScore'], axis=1, inplace=True)
        return df

    def get_general_pos_counts(df):
        df['NumberOfBacksOnPlay'] = 0
        df['NumberOfOLinemenOnPlay'] = 0
        df['NumberOfWRsOnPlay'] = 0
        df['NumberOfTEsOnPlay'] = 0
        df['NumberOfDBsOnPlay'] = 0
        df['NumberOfDLinemenOnPlay'] = 0 
        df['NumberOfLBsOnPlay'] = 0
        # Pivot to find counts of each general position
        gen_pos_counts = df[['PlayId','GeneralPosition']].pivot_table(index='PlayId', columns='GeneralPosition', 
                                                                      aggfunc=len, fill_value=0)
        gen_pos_counts = gen_pos_counts.rename(columns = 
                              {'DB':'NumberOfDBsOnPlay', 'DL':'NumberOfDLinemenOnPlay', 
                               'LB':'NumberOfLBsOnPlay', 'OB':'NumberOfBacksOnPlay',
                               'OL':'NumberOfOLinemenOnPlay', 'TE':'NumberOfTEsOnPlay',
                               'WR':'NumberOfWRsOnPlay'})
        gen_pos_counts = gen_pos_counts.reset_index(drop=False)
        del gen_pos_counts.columns.name
        gen_pos_counts_cols = gen_pos_counts.columns.values.tolist()
        gen_pos_counts = gen_pos_counts.loc[gen_pos_counts.index.repeat(22)].reset_index(drop=True)
        df.update(gen_pos_counts)
        return df

    def utc2sec(x):
        return int(x.split("-")[2].split(":")[2].split(".")[0])
    def gameclock2secs(x):
        clock = x.split(":")
        return (60 * int(clock[0])) + int(clock[1])        
    
    def str_to_float(txt):
        try:
            return float(txt)
        except:
            return -1

    def get_time_features(df):
        df['TimeBetweenSnapHandoff'] = df['TimeHandoff'].apply(utc2sec) - df['TimeSnap'].apply(utc2sec)
        df['QuarterGameSecs'] = df['GameClock'].apply(gameclock2secs)
        df['TotalGameSecsPlayed'] = (900 - df['QuarterGameSecs']) + ((df['Quarter'] - 1) * 900)
        df['HalfGameSecsLeft'] = df['QuarterGameSecs']
        df.loc[(df['Quarter'].isin([1,3])), 'HalfGameSecsLeft'] = (900 + df['QuarterGameSecs'])
        return(df)
    
    def get_player_age(df):
        def timesnap2day(x):
            days = x.split("-")
            return 365 * int(days[0]) + 30 * int(days[1]) + int(days[2][:2])    
        def birthday2day(x):
            days = x.split("/")
            return 30 * int(days[0]) + int(days[1]) + 365 * int(days[2])        
        df['PlayerAge'] = df['TimeSnap'].apply(timesnap2day) - df['PlayerBirthDate'].apply(birthday2day)
        df.drop('PlayerBirthDate', axis=1, inplace=True)
        return df
        
    def get_player_weights_bmi(df):
        def height2inch(x):
            height = x.split("-")
            return 12 * int(height[0]) + int(height[1])
        df['PlayerHeight'] = df['PlayerHeight'].apply(height2inch)
        df['PlayerBMI'] = df['PlayerWeight'] / df['PlayerHeight']
        return df
    def get_is_rusher(df):
        df['IsRusher'] = df.NflId == df.NflIdRusher 
        return df

    
    def static_features(df):
        static_features = df[df['NflId'] == df['NflIdRusher']][[
            'GameId','PlayId','X','Y','S','A','Dis','Orientation','Dir',
            'YardLine','Quarter','Down','Distance','DefendersInTheBox',
            'Yards','PlayerHeight','PlayerWeight','IsOffenseAtHome','OffenseInOwnTerritory',
            'NumberOfBacksOnPlay','NumberOfOLinemenOnPlay','NumberOfWRsOnPlay',
            'NumberOfTEsOnPlay','NumberOfDBsOnPlay','NumberOfDLinemenOnPlay',
            'NumberOfLBsOnPlay','TimeBetweenSnapHandoff','QuarterGameSecs',
            'TotalGameSecsPlayed','HalfGameSecsLeft','PlayerBMI'

        ]].drop_duplicates()
        static_features['DefendersInTheBox'] = static_features['DefendersInTheBox'].fillna(np.mean(static_features['DefendersInTheBox']))
        return static_features
    
    def combine_features(df):
        df = map_team_name(df)
        df = get_team_on_offense(df)
        df = map_offense_defense_team(df)
        df = clean_position(df)
        df = get_is_rusher(df)
        df = get_is_offense_winning(df)
        df = create_general_position(df)
        df = get_general_pos_counts(df)
        df = get_time_features(df)
        df = get_player_weights_bmi(df)
        
        yardline = update_yardline(df)
        df = update_orientation(df, yardline)
        back_feats = back_features(df)        
        rel_back = features_relative_to_back(df, back_feats)
        def_feats = defense_features(df)
        static_feats = static_features(df)
        combined_df1 = rel_back.merge(def_feats, on=['GameId','PlayId'], how='left')
        combined_df2 = static_feats.merge(combined_df1, on=['GameId','PlayId'], how='left')
        return combined_df2

    df = combine_features(df)   
    
    df = df.select_dtypes(exclude=['object'])

    return(df)

In [23]:
train = pd.read_csv('../input/nfl-big-data-bowl-2020/train.csv')
outcomes = train[['GameId','PlayId','Yards']].drop_duplicates()

train_basetable = create_features(train)

X = train_basetable.copy()
yards = X.Yards

y = np.zeros((yards.shape[0], 199))
for idx, target in enumerate(list(yards)):
    y[idx][99 + target] = 1

train_basetable.head()

Unnamed: 0,GameId,PlayId,X,Y,S,A,Dis,Orientation,Dir,YardLine,Quarter,Down,Distance,DefendersInTheBox,Yards,PlayerHeight,PlayerWeight,IsOffenseAtHome,OffenseInOwnTerritory,NumberOfBacksOnPlay,NumberOfOLinemenOnPlay,NumberOfWRsOnPlay,NumberOfTEsOnPlay,NumberOfDBsOnPlay,NumberOfDLinemenOnPlay,NumberOfLBsOnPlay,TimeBetweenSnapHandoff,QuarterGameSecs,TotalGameSecsPlayed,HalfGameSecsLeft,PlayerBMI,back_from_scrimmage,back_oriented_down_field,back_moving_down_field,min_dist,max_dist,mean_dist,std_dist,def_min_dist,def_max_dist,def_mean_dist,def_std_dist
0,2017090700,20170907000118,41.25,30.53,3.63,3.35,0.38,198.02,114.26,45.0,1,3,2,6.0,8,70,205,True,True,2,5,3,1,6,4,1,1,854,46,1754,2.928571,3.75,1,0,1.449724,22.415872,8.046559,4.873845,4.59331,22.415872,9.752491,5.327299
1,2017090700,20170907000139,48.93,27.16,3.06,2.41,0.34,149.3,47.8,53.0,1,1,10,6.0,3,70,205,True,True,2,5,3,1,6,4,1,1,832,68,1732,2.928571,4.07,0,0,0.792023,23.025872,8.614623,5.598683,4.287773,23.025872,10.297028,5.833217
2,2017090700,20170907000189,71.34,19.11,5.77,2.42,0.6,219.18,138.04,75.0,1,1,10,7.0,5,70,205,True,False,2,5,3,1,6,4,1,2,782,118,1682,2.928571,3.66,1,0,1.64639,20.726285,8.482583,4.642121,4.22167,20.726285,9.903689,5.07329
3,2017090700,20170907000345,104.47,25.36,4.45,3.2,0.46,173.78,84.56,108.0,1,2,2,9.0,2,71,210,True,False,3,7,0,2,3,4,3,2,732,168,1632,2.957746,3.53,0,0,0.918096,9.791231,5.549379,1.983128,4.528002,9.791231,6.309354,1.834174
4,2017090700,20170907000395,29.99,27.12,3.9,2.53,0.44,34.27,157.92,35.0,1,1,10,7.0,7,71,216,False,True,2,5,1,3,6,3,2,1,728,172,1628,3.042254,5.01,0,0,0.502892,21.214806,9.168819,5.611232,4.288088,21.214806,11.056456,5.900009


In [24]:
def process_two(t_):
    t_['fe1'] = pd.Series(np.sqrt(np.absolute(np.square(t_.X.values) - np.square(t_.Y.values))))
    t_['fe5'] = np.square(t_['S'].values) + 2 * t_['A'].values * t_['Dis'].values  # N
    t_['fe7'] = np.arccos(np.clip(t_['X'].values / t_['Y'].values, -1, 1))  # N
    t_['fe8'] = t_['S'].values / np.clip(t_['fe1'].values, 0.6, None)
    radian_angle = (90 - t_['Dir']) * np.pi / 180.0
    t_['fe10'] = np.abs(t_['S'] * np.cos(radian_angle))
    t_['fe11'] = np.abs(t_['S'] * np.sin(radian_angle))
    return t_

In [28]:
X = process_two(X)

important = ['back_from_scrimmage', 'min_dist', 'max_dist', 'mean_dist', 'std_dist',
       'def_min_dist', 'def_max_dist', 'def_mean_dist', 'def_std_dist', 'X',
       'Y', 'S', 'A', 'Dis', 'Orientation', 'Dir', 'YardLine']

cat = ['IsOffenseAtHome','OffenseInOwnTerritory',
       'back_oriented_down_field', 'back_moving_down_field']

num = ['back_from_scrimmage', 'min_dist', 'max_dist', 'mean_dist', 'std_dist', 
       'def_min_dist', 'def_max_dist', 'def_mean_dist', 'def_std_dist',
       'X', 'Y', 'S', 'A', 'Dis', 'Orientation', 'Dir', 'YardLine', 
       'Distance','Quarter','Down','DefendersInTheBox','PlayerHeight', 
       'PlayerWeight', 'Week', 'OffenseScore', 'DefenseScore','NumberOfBacksOnPlay',
       'NumberOfOLinemenOnPlay', 'NumberOfWRsOnPlay',
       'NumberOfTEsOnPlay', 'NumberOfDBsOnPlay', 'NumberOfDLinemenOnPlay',
       'NumberOfLBsOnPlay', 'TimeBetweenSnapHandoff', 'QuarterGameSecs',
       'TotalGameSecsPlayed', 'HalfGameSecsLeft', 'PlayerBMI',
      ] + ['fe1', 'fe5', 'fe7','fe8', 'fe10', 'fe11']
num = [i for i in num if i in important]
print(len(cat))
print(len(num))

4
17


In [29]:
scaler = StandardScaler()
X[num] = scaler.fit_transform(X[num])

In [30]:
def model_396_1():
    inputs = []
    embeddings = []
    for i in cat:
        input_ = Input(shape=(1,))
        embedding = Embedding(int(np.absolute(X[i]).max() + 1), 10, input_length=1)(input_)
        embedding = Reshape(target_shape=(10,))(embedding)
        inputs.append(input_)
        embeddings.append(embedding)
    input_numeric = Input(shape=(len(num),))
    embedding_numeric = Dense(512, activation='relu')(input_numeric) 
    inputs.append(input_numeric)
    embeddings.append(embedding_numeric)
    x = Concatenate()(embeddings)
    x = Dense(256, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(199, activation='softmax')(x)
    model = Model(inputs, output)
    return model


n_splits = 5
kf = GroupKFold(n_splits=n_splits)
score = []
for i_369, (tdx, vdx) in enumerate(kf.split(X, y, X['GameId'])):
    print(f'Fold : {i_369}')
    X_train, X_val, y_train, y_val = X.iloc[tdx], X.iloc[vdx], y[tdx], y[vdx]
    X_train = [np.absolute(X_train[i]) for i in cat] + [X_train[num]]
    X_val = [np.absolute(X_val[i]) for i in cat] + [X_val[num]]
    model = model_396_1()
    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=[])
    es = EarlyStopping(monitor='val_CRPS', 
                   mode='min',
                   restore_best_weights=True, 
                   verbose=2, 
                   patience=5)
    es.set_model(model)
    metric = Metric(model, [es], [(X_train,y_train), (X_val,y_val)])
    for i in range(1):
        model.fit(X_train, y_train, verbose=False)
    for i in range(1):
        model.fit(X_train, y_train, batch_size=64, verbose=False)
    for i in range(1):
        model.fit(X_train, y_train, batch_size=128, verbose=False)
    for i in range(1):
        model.fit(X_train, y_train, batch_size=256, verbose=False)
    model.fit(X_train, y_train, callbacks=[metric], epochs=100, batch_size=1024, verbose=False)
    score_ = crps(y_val, model.predict(X_val))
    model.save(f'keras_369_{i_369}.h5')
    print(score_)
    score.append(score_)

Fold : 0
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
tr CRPS 0.012891 val CRPS 0.012681
tr CRPS 0.012873 val CRPS 0.012673
tr CRPS 0.012869 val CRPS 0.012666
tr CRPS 0.012853 val CRPS 0.012664
tr CRPS 0.01283 val CRPS 0.012661
tr CRPS 0.012811 val CRPS 0.012649
tr CRPS 0.012803 val CRPS 0.012637
tr CRPS 0.012776 val CRPS 0.012633
tr CRPS 0.012767 val CRPS 0.012614
tr CRPS 0.012748 val CRPS 0.012613
tr CRPS 0.012742 val CRPS 0.012616
tr CRPS 0.012735 val CRPS 0.012617
tr CRPS 0.012705 val CRPS 0.012589
tr CRPS 0.012682 val CRPS 0.012575
tr CRPS 0.012669 val CRPS 0.012563
tr CRPS 0.012662 val CRPS 0.012571
tr CRPS 0.012646 val CRPS 0.012574
tr CRPS 0.012616 val CRPS 0.012558
tr CRPS 0.012604 val CRPS 0.012565
tr CRPS 0.012588 val CRPS 0.012546
tr CRPS 0.012574 val CRPS 0.012544
tr CRPS 0.012537 val C

In [34]:
print(np.mean(score))

2.0063890230834214e-07


In [None]:
models = []
for i in range(n_splits):
    models.append(load_model(f'keras_369_{i}.h5'))

In [None]:
for (test_df, sample_prediction_df) in tqdm.tqdm(iter_test):
    basetable = create_features(test_df, deploy=True)
    basetable = process_two(basetable)
    basetable[num] = scaler.transform(basetable[num])
    test_ = [np.absolute(basetable[i]) for i in cat] + [basetable[num]]
    
    y_pred = np.mean([model.predict(test_) for model in models], axis=0)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1).tolist()[0]
    
    preds_df = pd.DataFrame(data=[y_pred], columns=sample_prediction_df.columns)
    env.predict(preds_df)
    
env.write_submission_file()