In [1]:
from kaggle.competitions import nflrush
import pandas as pd
import keras
from keras import models
from keras import layers
import numpy as np
import datetime

# You can only call make_env() once, so don't lose it!
env = nflrush.make_env()
train_df = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2020/train.csv', low_memory=False)

Using TensorFlow backend.


In [18]:
def standardization(k, m):
    if k not in m:
        m[k] = len(m) + 1
    return m[k]

NflId, Team, OffenseFormation, OffensePersonnel, DefensePersonnel, PlayDirection, Position, Stadium, \
GameWeather, WindDirection, PlayerCollegeName = {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}

def process(train_df, mean = None, std = None, pred = False):
    plays = int(train_df.shape[0] / 22)
    play, attr = 0, 15
    X = np.zeros((plays, attr * 22 + 25))
    Y = np.zeros((plays, 199))
    
    for index, row in train_df.iterrows():
        if play % 22 == 0:
            play = 0
        
        PlayerHeight = row['PlayerHeight'].split('-')
        TimeHandoff = datetime.datetime.strptime(row['TimeHandoff'], '%Y-%m-%dT%H:%M:%S.%fZ')
        TimeSnap = datetime.datetime.strptime(row['TimeSnap'], '%Y-%m-%dT%H:%M:%S.%fZ')
        PlayerBirthDate = datetime.datetime.strptime(row['PlayerBirthDate'], '%m/%d/%Y')
        GameClock = row['GameClock'].split(':')
        WindSpeed = row['WindSpeed']
        if isinstance(WindSpeed, str) and not WindSpeed.isnumeric():
            WindSpeed = WindSpeed.lower().replace(' ', '').replace('mph', '')
        if isinstance(WindSpeed, str) and not WindSpeed.isnumeric():
            WindSpeed = WindSpeed.split('-')
            if len(WindSpeed) == 2 and WindSpeed[0].isnumeric() and WindSpeed[1].isnumeric():
                WindSpeed = (int(WindSpeed[0]) + int(WindSpeed[1])) / 2
            else:
                WindSpeed = 0
        
        X[int(index / 22)][attr * play + 0] = standardization(row['NflId'], NflId)
        X[int(index / 22)][attr * play + 1] = standardization(row['HomeTeamAbbr'] if row['Team'] == 'home' else row['VisitorTeamAbbr'], Team)
        X[int(index / 22)][attr * play + 2] = row['X']
        X[int(index / 22)][attr * play + 3] = row['Y']
        X[int(index / 22)][attr * play + 4] = row['S']
        X[int(index / 22)][attr * play + 5] = row['A']
        X[int(index / 22)][attr * play + 6] = row['Dis']
        X[int(index / 22)][attr * play + 7] = row['Orientation']
        X[int(index / 22)][attr * play + 8] = row['Dir']
        X[int(index / 22)][attr * play + 9] = 1 if row['NflId'] == row['NflIdRusher'] else 0
        X[int(index / 22)][attr * play + 10] = int(PlayerHeight[0]) * 12 + int(PlayerHeight[1])
        X[int(index / 22)][attr * play + 11] = row['PlayerWeight']
        X[int(index / 22)][attr * play + 12] = (TimeHandoff - PlayerBirthDate).days
        X[int(index / 22)][attr * play + 13] = standardization(row['PlayerCollegeName'], PlayerCollegeName)
        X[int(index / 22)][attr * play + 14] = standardization(row['Position'], Position)
        X[int(index / 22)][attr * 22 + 0] = row['YardLine']
        X[int(index / 22)][attr * 22 + 1] = row['Quarter']
        X[int(index / 22)][attr * 22 + 2] = int(GameClock[0]) * 60 + int(GameClock[1])
        X[int(index / 22)][attr * 22 + 3] = standardization(row['PossessionTeam'], Team)
        X[int(index / 22)][attr * 22 + 4] = row['Down']
        X[int(index / 22)][attr * 22 + 5] = row['Distance']
        X[int(index / 22)][attr * 22 + 6] = standardization(row['FieldPosition'], Team)
        X[int(index / 22)][attr * 22 + 7] = row['HomeScoreBeforePlay']
        X[int(index / 22)][attr * 22 + 8] = row['VisitorScoreBeforePlay']
        X[int(index / 22)][attr * 22 + 9] = standardization(row['OffenseFormation'], OffenseFormation)
        X[int(index / 22)][attr * 22 + 10] = standardization(row['OffensePersonnel'], OffensePersonnel)
        X[int(index / 22)][attr * 22 + 11] = row['DefendersInTheBox']
        X[int(index / 22)][attr * 22 + 12] = standardization(row['DefensePersonnel'], DefensePersonnel)
        X[int(index / 22)][attr * 22 + 13] = standardization(row['PlayDirection'], PlayDirection)
        X[int(index / 22)][attr * 22 + 14] = (TimeHandoff - TimeHandoff.replace(hour=0, minute=0, second=0, microsecond=0)).total_seconds()
        X[int(index / 22)][attr * 22 + 15] = (TimeHandoff - TimeSnap).total_seconds()
        X[int(index / 22)][attr * 22 + 16] = standardization(row['HomeTeamAbbr'], Team)
        X[int(index / 22)][attr * 22 + 17] = standardization(row['VisitorTeamAbbr'], Team)
        X[int(index / 22)][attr * 22 + 18] = row['Week']
        X[int(index / 22)][attr * 22 + 19] = standardization(row['Stadium'], Stadium)
        X[int(index / 22)][attr * 22 + 20] = standardization(row['GameWeather'], GameWeather)
        X[int(index / 22)][attr * 22 + 21] = row['Temperature']
        X[int(index / 22)][attr * 22 + 22] = row['Humidity']
        X[int(index / 22)][attr * 22 + 23] = WindSpeed
        X[int(index / 22)][attr * 22 + 24] = standardization(row['WindDirection'], WindDirection)
        
        play += 1
        
        if not pred:
            Y[int(index / 22)][row['Yards'] + 99] = 1.0
            
    if mean == None:
        mean = np.mean(X, axis = 0)
    if std == None:
        std = np.std(X, axis = 0)
    X = (X - mean) / std
    train_X, train_Y, dev_X, dev_Y = X, Y, None, None
    if not pred:
        train_X = X[:int(len(X) * 0.9)]
        train_Y = Y[:int(len(Y) * 0.9)]
        dev_X = X[int(len(X) * 0.9) + 1:]
        dev_Y = Y[int(len(Y) * 0.9) + 1:]
    return train_X, train_Y, dev_X, dev_Y, mean, std

In [17]:
def train_my_model(train_df):
    train_X, train_Y, dev_X, dev_Y, mean, std = process(train_df)
    model = models.Sequential()
    model.add(layers.Dense(512, activation='relu',input_shape=(train_X.shape[1],)))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(199, activation='softmax'))
    model.compile(optimizer = 'rmsprop',loss = 'categorical_crossentropy', metrics = ['categorical_accuracy'])
    history = model.fit(train_X, train_Y, validation_data = (dev_X, dev_Y), epochs = 20, batch_size = 32)
    return model, mean, std, history
model, mean, std, history = train_my_model(train_df)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


Train on 20853 samples, validate on 2317 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
import matplotlib.pyplot as plt
val_acc = history.history['val_mean_squared_error']
acc = history.history['mean_squared_error']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [4]:
def make_my_predictions(test_df, sample_prediction_df):
    df, _, _, _, _, _ = process(test_df, mean = mean, std = std, pred = True)
    pdf = model.predict(df)
    predictions_df = np.zeros(200)
    predictions_df[0] = test_df[1]
    for p in range(199):
        predictions_df[p + 1] = pdf[p] if p == 0 else pdf[p - 1] + pdf[p]
    return prediction_df

In [5]:
for (test_df, sample_prediction_df) in env.iter_test():
  predictions_df = make_my_predictions(test_df, sample_prediction_df)
  env.predict(predictions_df)

env.write_submission_file()

(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(

(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(

(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(

(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(22, 48)
(