In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from kaggle.competitions import nflrush
from string import punctuation
from tqdm import tqdm
import gc, re
import pickle

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split, KFold

import lightgbm as lgb
from catboost import Pool, CatBoostRegressor

from keras.layers import Dense,Input,Flatten,concatenate,Dropout,Lambda,BatchNormalization, LeakyReLU
from keras.models import Sequential
import keras.backend as K
from keras.callbacks import Callback,EarlyStopping,ModelCheckpoint
from  keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.losses import binary_crossentropy
from keras.utils import to_categorical

import codecs
import time
import datetime
import re

TRAIN_OFFLINE = False

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 150)

Using TensorFlow backend.


/kaggle/input/nfl-big-data-bowl-2020/train.csv
/kaggle/input/nfl-big-data-bowl-2020/kaggle/competitions/nflrush/sample_submission.csv.encrypted
/kaggle/input/nfl-big-data-bowl-2020/kaggle/competitions/nflrush/competition.cpython-36m-x86_64-linux-gnu.so
/kaggle/input/nfl-big-data-bowl-2020/kaggle/competitions/nflrush/test.csv.encrypted
/kaggle/input/nfl-big-data-bowl-2020/kaggle/competitions/nflrush/__init__.py


In [2]:
%%time
train_df_org = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2020/train.csv', low_memory=False)

CPU times: user 6.44 s, sys: 1.3 s, total: 7.74 s
Wall time: 7.68 s


In [3]:
train_df = train_df_org.copy()

In [4]:
def get_player_specific_cols(col_names):
    cols, total_players = [], 22
    for col in col_names:
        for player in range(total_players):
            cols.append(f'{col}_player{player}')
    return cols
def mean_without_overflow_fast(col):
    col /= len(col)
    return col.mean() * len(col)
def encode_cyclic_feature(df, col, max_vals):
    df[col + '_sin'] = np.sin(2 * np.pi * df[col]/max_vals)
    df[col + '_cos'] = np.cos(2 * np.pi * df[col]/max_vals)
    del df[col]
    return df
def extract_timestamp(df, timestamp_col):
    df[f'{timestamp_col}Hour'] = np.uint8(df[timestamp_col].dt.hour)
    df[f'{timestamp_col}Minute'] = np.uint8(df[timestamp_col].dt.minute)
    df[f'{timestamp_col}Second'] = np.uint8(df[timestamp_col].dt.second)
    return df
def get_player_specific_cols(col_names):
    cols, total_players = [], 22
    for col in col_names:
        for player in range(total_players):
            cols.append(f'{col}_player{player}')
    return cols
def height_to_inches(player_height):
    return int(player_height.split('-')[0]) * 12 + int(player_height.split('-')[1])
def bdate_to_age(bdate):
    now = pd.to_datetime('now')
    return (now.year - bdate.dt.year) - ((now.month - bdate.dt.month) < 0)
def get_grouping_dict(df, key):
    dicts = []
    for _, row in df.iterrows():
        dicts.append(dict([(pos.split()[1], pos.split()[0]) for (pos) in row[key].split(',')]))
    return dicts

In [5]:

def groupby_playid(df, is_training=True):
    
    total_players = 22
    non_player_features = ['GameId', 'PlayId', 'Season', 'YardLine', 'Quarter', 'GameClock',
       'PossessionTeam', 'Down', 'Distance', 'FieldPosition',
       'HomeScoreBeforePlay', 'VisitorScoreBeforePlay',
       'OffenseFormation', 'OffensePersonnel', 'DefendersInTheBox',
       'DefensePersonnel', 'PlayDirection', 'TimeHandoff', 'TimeSnap',
       'Yards', 'HomeTeamAbbr', 'VisitorTeamAbbr', 'Week', 'Stadium',
       'Location', 'StadiumType', 'Turf', 'GameWeather', 'Temperature',
       'Humidity', 'WindSpeed', 'WindDirection', 'NflId']
    
    if not is_training:
        non_player_features.remove('Yards')
    
    df['X_speed'] = np.cos(df['Dir'])*df['S']
    df['Y_speed'] = np.sin(df['Dir'])*df['S']
    
    player_features = ['Team', 'X', 'Y', 'S', 'A', 'Dis', 'Orientation', 'Dir','X_speed', 
                       'Y_speed', 'DisplayName', 'JerseyNumber', 'PlayerHeight', 'PlayerWeight',
                       'PlayerBirthDate', 'PlayerCollegeName', 'Position', 'NflIdRusher']
    
    playids_groups = df.groupby('PlayId').size().keys()
    
    player_features_columns = []
    for feature in player_features:
        for player in range(total_players):
            player_features_columns.append(f'{feature}_player{player}')
    
    # first assign non_player features which are common for a single game playid
    final_df = pd.DataFrame()
    final_df[non_player_features] = df.groupby('PlayId')[non_player_features].first().reset_index(drop=True)
    final_df = final_df.reindex(final_df.columns.tolist() + player_features_columns, axis=1)
    temp_cols = []
    if is_training:
        for group in tqdm(playids_groups, position=0, leave=True):
            temp_cols.append(df[df['PlayId'] == group][player_features].melt()['value'])
    else:
        for group in playids_groups:
            temp_cols.append(df[df['PlayId'] == group][player_features].melt()['value'])
    final_df[player_features_columns] = pd.DataFrame(temp_cols).values
    
    return final_df

In [6]:
def feature_engineering(df, is_training=True, label_encoders={}):
    
    if is_training:
        label_encoders['NflId'] = LabelEncoder()
        label_encoders['NflId'].fit(df['NflId'])
    try:
        df['NflId'] = label_encoders['NflId'].transform(df['NflId'])
    except:
        df['NflId'] = np.nan
       
    team_dict = {
        'away': 0,
        'home': 1
    }
    df['Team'] = df['Team'].map(team_dict)
    season_dict = {
        2017: 0,
        2018: 1
    }
    df['Season'] = df['Season'].map(season_dict)
    df = groupby_playid(df, is_training)
    
    df = df.drop(['Season', 'Temperature', 'Humidity'], axis = 1)
    
    if is_training:
        df = df.apply(lambda group: group.interpolate(limit_direction='both'))
    
    df['WindDirection'] = df['WindDirection'].fillna(method='backfill')
    df['WindSpeed'] = df['WindSpeed'].fillna(method='backfill')
    df['GameWeather'] = df['GameWeather'].fillna(method='backfill')
    df['StadiumType'] = df['StadiumType'].fillna(method='backfill')
    df['FieldPosition'] = df['FieldPosition'].fillna(method='backfill')
    df['OffenseFormation'] = df['OffenseFormation'].fillna(method='backfill')
    
    df['GameClock'] = pd.to_datetime(df['GameClock'])
    df['TimeHandoff'] = pd.to_datetime(df['TimeHandoff'])
    df['TimeSnap'] = pd.to_datetime(df['TimeSnap'])
    
    df = extract_timestamp(df, 'GameClock')
    df = extract_timestamp(df, 'TimeHandoff')
    df = extract_timestamp(df, 'TimeSnap')
    df = df.drop(['GameClock', 'TimeHandoff', 'TimeSnap'], axis=1)
    
    df = encode_cyclic_feature(df, 'GameClockHour', 24)
    df = encode_cyclic_feature(df, 'GameClockMinute', 60)
    df = encode_cyclic_feature(df, 'GameClockSecond', 60)
    
    df = encode_cyclic_feature(df, 'TimeHandoffHour', 24)
    df = encode_cyclic_feature(df, 'TimeHandoffMinute', 60)
    df = encode_cyclic_feature(df, 'TimeHandoffSecond', 60)
    
    df = encode_cyclic_feature(df, 'TimeSnapHour', 24)
    df = encode_cyclic_feature(df, 'TimeSnapMinute', 60)
    df = encode_cyclic_feature(df, 'TimeSnapSecond', 60)
    
    def transform_game_weather(x):
        x = str(x).lower()
        if 'indoor' in x:
            return  'indoor'
        elif 'cloud' in x or 'coudy' in x or 'clouidy' in x:
            return 'cloudy'
        elif 'rain' in x or 'shower' in x:
            return 'rain'
        elif 'sunny' in x:
            return 'sunny'
        elif 'clear' in x:
            return 'clear'
        elif 'cold' in x or 'cool' in x:
            return 'cool'
        elif 'snow' in x:
            return 'snow'
        return x
    
    df['GameWeather'] = df['GameWeather'].apply(lambda row: transform_game_weather(row))
    
    categorical_features = ['PossessionTeam', 'FieldPosition', 'OffenseFormation', 'PlayDirection', 'HomeTeamAbbr', 
                        'VisitorTeamAbbr', 'NflId','Stadium', 'Location', 'GameWeather'] + get_player_specific_cols(['Position', 'PlayerCollegeName', 'NflIdRusher'])
    
    for col in get_player_specific_cols(['PlayerHeight']):
        df[col] = df[col].apply(lambda x: height_to_inches(x))
    
    for col in get_player_specific_cols(['PlayerBirthDate']):
        df[col] = pd.to_datetime(df[col])
        df[col] = bdate_to_age(df[col])
    
    for cat in categorical_features:
        if is_training:
            label_encoders[cat] = LabelEncoder()
            label_encoders[cat].fit(df[cat])
        try:
            df[cat] = label_encoders[cat].transform(df[cat])
        except Exception as e:
            df[cat] = np.nan # Put NaN in case when any unseen label is found in testing dataset.
            
        
#     offense_groups = ['QB', 'RB', 'OL', 'FB', 'WR', 'TE']
#     defense_groups = ['DL', 'LB', 'CB', 'S']
    
#     offense_dicts = get_grouping_dict(df, 'OffensePersonnel')
#     defense_dicts = get_grouping_dict(df, 'DefensePersonnel')
    
#     offense_grps_df = pd.DataFrame(offense_dicts).rename(columns={'OL': 'OL_offense', 'DL': 'DL_offense', 'LB': 'LB_offense', 'DB': 'DB_offense'}).fillna(0).astype(int)
#     defense_grps_df = pd.DataFrame(defense_dicts).rename(columns={'OL': 'OL_defense', 'DL': 'DL_defense', 'LB': 'LB_defense', 'DB': 'DB_defense'}).fillna(0).astype(int)
    
#     df = pd.concat([df, offense_grps_df, defense_grps_df], axis=1)
    df = df.drop(['OffensePersonnel', 'DefensePersonnel'], axis=1)
    
    try:
        df['NflIdRusher'] = label_encoders['NflId'].transform(df['NflIdRusher'])
    except:
        df['NflIdRusher'] = np.nan
        
    wind_directions = ['N', 'E', 'S', 'W', 'NE', 'SE', 'SW', 'NW', 'NNE', 'ENE', 'ESE', 'SSE', 'SSW', 'WSW', 'WNW', 'NNW']  # https://www.quora.com/What-is-the-definition-of-SSW-wind-direction
    
    df.loc[df['WindSpeed'].isin(wind_directions), 'WindSpeed'] = np.nan
    df.loc[~df['WindDirection'].isin(wind_directions), 'WindDirection'] = np.nan
    
    df['WindDirection'] = df['WindDirection'].fillna(method='backfill')
    df['WindSpeed'] = df['WindSpeed'].fillna(method='backfill')
    
    if is_training:
        label_encoders['WindDirection'] = LabelEncoder()
        label_encoders['WindDirection'].fit(df['WindDirection'])
    try:
        df['WindDirection'] = label_encoders['WindDirection'].transform(df['WindDirection'])
    except Exception as e:
        df['WindDirection'] = np.nan
    
    def transform_windspeed(speed):
        speed = str(speed)
        if 'MPH' in speed or 'mph' in speed or 'MPh' in speed:
            speed = speed.replace('MPH', '').strip()
            speed = speed.replace('MPH', '').strip()
            speed = speed.replace('MPh', '').strip()
        if '-' in speed:
            return (float(speed.split('-')[0]) + float(speed.split('-')[1]))/2
        try:
            return float(speed)
        except:
            return 10 # https://sciencing.com/average-daily-wind-speed-24011.html
        
    df['WindSpeed'] = df['WindSpeed'].apply(lambda speed: transform_windspeed(speed))
    
    beaufort = [(0, 0, 0.3), (1, 0.3, 1.6), (2, 1.6, 3.4), (3, 3.4, 5.5), (4, 5.5, 8), 
                (5, 8, 10.8), (6, 10.8, 13.9), (7, 13.9, 17.2), (8, 17.2, 20.8), 
                (9, 20.8, 24.5), (10, 24.5, 28.5), (11, 28.5, 33), (12, 33, 200)]

    for item in beaufort:
        df.loc[(df['WindSpeed']>=item[1]) & (df['WindSpeed']<item[2]), 'beaufort_scale'] = item[0]
    
    df['DefendersInTheBox_vs_Distance'] = df['DefendersInTheBox'] / df['Distance']
    df['Field_eq_Possession'] = df['FieldPosition'] == df['PossessionTeam']
    
    # Add BMI as a feature: formula for BMI: kg/m^2
    total_players = 22
    
    def get_bmi(height, weight):
        return weight / (height ** 2) * 755
    
    def is_rusher(x, y):
        return x == y
    
    for player in range(total_players):
        df[f'BMI_player{player}'] = np.vectorize(get_bmi)(df[f'PlayerHeight_player{player}'], df[f'PlayerWeight_player{player}'])
        df[f'is_rusher_player{player}'] = np.vectorize(is_rusher)(df['NflId'], df[f'NflIdRusher_player{player}'])

    # Cleaning the Turf to Natural and artificial
    # from https://www.kaggle.com/c/nfl-big-data-bowl-2020/discussion/112681#latest-649087
    Turf = {'Field Turf':'Artificial', 'A-Turf Titan':'Artificial', 'Grass':'Natural', 'UBU Sports Speed S5-M':'Artificial', 
            'Artificial':'Artificial', 'DD GrassMaster':'Artificial', 'Natural Grass':'Natural', 
            'UBU Speed Series-S5-M':'Artificial', 'FieldTurf':'Artificial', 'FieldTurf 360':'Artificial', 'Natural grass':'Natural', 'grass':'Natural', 
            'Natural':'Natural', 'Artifical':'Artificial', 'FieldTurf360':'Artificial', 'Naturall Grass':'Natural', 'Field turf':'Artificial', 
            'SISGrass':'Artificial', 'Twenty-Four/Seven Turf':'Artificial', 'natural grass':'Natural'} 

    df['Turf'] = df['Turf'].map(Turf)
    df['Turf'] = df['Turf'] == 'Natural'
    
    def clean_StadiumType(txt):
        if pd.isna(txt):
            return np.nan
        txt = txt.lower()
        txt = ''.join([c for c in txt if c not in punctuation])
        txt = re.sub(' +', ' ', txt)
        txt = txt.strip()
        txt = txt.replace('outside', 'outdoor')
        txt = txt.replace('outdor', 'outdoor')
        txt = txt.replace('outddors', 'outdoor')
        txt = txt.replace('outdoors', 'outdoor')
        txt = txt.replace('oudoor', 'outdoor')
        txt = txt.replace('indoors', 'indoor')
        txt = txt.replace('ourdoor', 'outdoor')
        txt = txt.replace('retractable', 'rtr.')
        return txt
        
    df['StadiumType'] = df['StadiumType'].apply(clean_StadiumType)
    
    def transform_StadiumType(txt):
        if pd.isna(txt):
            return np.nan
        if 'outdoor' in txt or 'open' in txt:
            return 1
        if 'indoor' in txt or 'closed' in txt:
            return 0

        return np.nan
    
    df['StadiumType'] = df['StadiumType'].apply(transform_StadiumType)
    
    # from https://www.kaggle.com/c/nfl-big-data-bowl-2020/discussion/112173#latest-647309
#     df['JerseyNumberGrouped'] = df['JerseyNumber'] // 10
    
    if is_training:
        return df, label_encoders
    return df

In [7]:
def processData(df, isTrain=True):
    non_feature_cols = ['GameId', 'PlayId'] + get_player_specific_cols(['DisplayName', 'JerseyNumber','NflIdRusher'])
    target_col = ['Yards']
    if (isTrain):
        df, label_encoders = feature_engineering(df)
        
        X_train = df.drop(non_feature_cols+target_col, axis=1)
        X_train = X_train.drop(['NflId','StadiumType','NflIdRusher','Team_player0','Team_player1','Team_player2','Team_player3','Team_player4','Team_player5','Team_player6','Team_player7','Team_player8','Team_player9','Team_player10','Team_player11','Team_player12','Team_player13','Team_player14','Team_player15','Team_player16','Team_player17','Team_player18','Team_player19','Team_player20','Team_player21'],axis=1)
        X = X_train.copy().to_numpy()
#         X = X_train.copy()
        
        Y_train = df[target_col]
        yards = Y_train.to_numpy().flatten()
        y = np.zeros((yards.shape[0], 199))
        for idx, target in enumerate(yards):
            y[idx][99 + target] = 1
        return X,y
    else: 
        df = feature_engineering(df,False)
        X_test = df.drop(non_feature_cols, axis=1)
        X_test = X_test.drop(['NflId','StadiumType','NflIdRusher','Team_player0','Team_player1','Team_player2','Team_player3','Team_player4','Team_player5','Team_player6','Team_player7','Team_player8','Team_player9','Team_player10','Team_player11','Team_player12','Team_player13','Team_player14','Team_player15','Team_player16','Team_player17','Team_player18','Team_player19','Team_player20','Team_player21'],axis=1)
#         X_test = X_test.drop(['Location'],axis=1)
        X = X_test.to_numpy()
#         return X_test
        return X
    


In [None]:
%%time
X,y = processData(train_df)

  0%|          | 16/23171 [00:00<02:28, 155.61it/s]

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
from keras.layers import Dense,Input,Flatten,concatenate,Dropout,Lambda
from keras.models import Model
import keras.backend as K
import re
from keras.losses import binary_crossentropy
from  keras.callbacks import EarlyStopping,ModelCheckpoint
import codecs

from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from sklearn.metrics import f1_score




class CRPSCallback(Callback):
    
    def __init__(self,validation, predict_batch_size=20, include_on_batch=False):
        super(CRPSCallback, self).__init__()
        self.validation = validation
        self.predict_batch_size = predict_batch_size
        self.include_on_batch = include_on_batch
        
        print('validation shape',len(self.validation))

    def on_batch_begin(self, batch, logs={}):
        pass

    def on_train_begin(self, logs={}):
        if not ('CRPS_score_val' in self.params['metrics']):
            self.params['metrics'].append('CRPS_score_val')

    def on_batch_end(self, batch, logs={}):
        if (self.include_on_batch):
            logs['CRPS_score_val'] = float('-inf')

    def on_epoch_end(self, epoch, logs={}):
        logs['CRPS_score_val'] = float('-inf')
            
        if (self.validation):
            X_valid, y_valid = self.validation[0], self.validation[1]
            y_pred = self.model.predict(X_valid)
            y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
            y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
            val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_valid.shape[0])
            val_s = np.round(val_s, 6)
            logs['CRPS_score_val'] = val_s

In [None]:
from keras.models import Sequential
from keras.losses import categorical_crossentropy
def get_model(x_tr,y_tr,x_val,y_val):
    IN_DIM = x_tr.shape[1]
    model = Sequential()
    model.add(Dense(1024,input_dim = IN_DIM))
    model.add(LeakyReLU(alpha=0.3))
    model.add(Dropout(0.5))
    
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.3))
    model.add(Dropout(0.5))
    
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.3))
    model.add(Dropout(0.5))
    
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.3))
    model.add(Dropout(0.5))
    
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.3))
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
    
    model.add(Dense(199, activation='softmax'))
#     print(model.summary())
    
#     loss = categorical_crossentropy()
    model.compile(optimizer='adam', loss=categorical_crossentropy,metrics=[])
    #add lookahead
#     lookahead = Lookahead(k=5, alpha=0.5) # Initialize Lookahead
#     lookahead.inject(model) # add into model

    
    es = EarlyStopping(monitor='CRPS_score_val', 
                       mode='min',
                       restore_best_weights=True, 
                       verbose=1, 
                       patience=2)

    mc = ModelCheckpoint('best_model.h5',monitor='CRPS_score_val',mode='min',
                                   save_best_only=True, verbose=1, save_weights_only=True)
    
    bsz = 1024
    steps = x_tr.shape[0]/bsz
    model.fit(x_tr,y_tr,callbacks=[CRPSCallback(validation = (x_val,y_val)),es,mc], epochs=1000, batch_size=bsz)
#     model.load_weights("best_model.h5")
    
    y_pred = model.predict(x_val)
    y_valid = y_val
    y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
    val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * x_val.shape[0])
    crps = np.round(val_s, 6)

    return model,crps

In [None]:
from sklearn.model_selection import train_test_split, KFold
import time

losses = []
models = []
crps_csv = []

s_time = time.time()


for k in range(2):
    kfold = KFold(5, random_state = 42 + k, shuffle = True)
    for k_fold, (tr_inds, val_inds) in enumerate(kfold.split(X)):
        print("-----------")
        print("-----------")
        tr_x,tr_y = X[tr_inds],y[tr_inds]
        val_x,val_y = X[val_inds],y[val_inds]
        model,crps = get_model(tr_x,tr_y,val_x,val_y)
        models.append(model)
        print("the %d fold crps is %f"%((k_fold+1),crps))
        crps_csv.append(crps)
 
# print("mean crps is %f"%np.mean(crps_csv))


def predict(x_te):
    model_num = len(models)
    for k,m in enumerate(models):
        if k==0:
            y_pred = m.predict(x_te,batch_size=1024)
        else:
            y_pred+=m.predict(x_te,batch_size=1024)
            
    y_pred = y_pred / model_num
    
    return y_pred        

In [None]:
print("mean crps is %f"%np.mean(crps_csv))

In [None]:
%%time
from kaggle.competitions import nflrush
env = nflrush.make_env()
iter_test = env.iter_test()

In [None]:
evl = []
i = 0
for (test_df, sample_prediction_df) in iter_test:
    print(test_df)
    basetable = processData(test_df,False)
#         basetable = create_features(test_df, deploy=True)
#         basetable.drop(['GameId','PlayId'], axis=1, inplace=True)
    print(basetable.shape)
    scaled_basetable = scaler.transform(basetable)
    print(basetable)

    y_pred = predict(scaled_basetable)
#         y_pred = predict(basetable)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1).tolist()[0]

    preds_df = pd.DataFrame(data=[y_pred], columns=sample_prediction_df.columns)
    env.predict(preds_df)

env.write_submission_file()

In [None]:
for i,col in enumerate(processData(test_df,False).columns.to_list()):
    print("test:",col)
    print("train:",X.columns.to_list()[i])


In [None]:
processData(test_df,False).isna().sum()

In [None]:
X.head(1)

In [None]:
# prd = processData(test_df,False)
# print(basetable.shape)
# scaled_basetable = scaler.transform(basetable)
# print(basetable)
# predict()

basetable = processData(test_df,False)

# print(basetable.shape)
scaled_basetable = scaler.transform(basetable)
# print(basetable)

# y_pred = predict(scaled_basetable)
# y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1).tolist()[0]

# preds_df = pd.DataFrame(data=[y_pred], columns=sample_prediction_df.columns)
# preds_df
basetable

In [None]:
sample_prediction_df

# Evluate

In [None]:
%%time
from kaggle.competitions import nflrush
env = nflrush.make_env()
iter_test = env.iter_test()

In [None]:
for (test_df, sample_prediction_df) in iter_test:
    basetable = processData(test_df,False)
#         basetable = create_features(test_df, deploy=True)
#         basetable.drop(['GameId','PlayId'], axis=1, inplace=True)
    print(basetable.shape)
    scaled_basetable = scaler.transform(basetable)
    print(basetable)

    y_pred = predict(scaled_basetable)
#         y_pred = predict(basetable)
    y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1).tolist()[0]

    preds_df = pd.DataFrame(data=[y_pred], columns=sample_prediction_df.columns)
    env.predict(preds_df)

env.write_submission_file()