In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import average_precision_score

# keras functionality
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import Callback, EarlyStopping
from tensorflow.keras.layers import (Activation, Add, Average, AvgPool1D, AvgPool2D, BatchNormalization, Concatenate, Conv1D, Conv2D,
                                     Dense, Dropout, Flatten, Input, Lambda, LayerNormalization, MaxPool1D, MaxPool2D, Reshape)

# Read in data

In [2]:
od_feats = np.load('../data/offense_vs_defensive_features.npy')

In [3]:
od_feats.shape

(31007, 11, 11, 5)

In [4]:
Yards = pd.read_csv('../data/yard_outcomes.csv')

In [5]:
Yards.head()

Unnamed: 0,GameId,PlayId,Yards
0,2017090700,20170907000118,8
1,2017090700,20170907000139,3
2,2017090700,20170907000189,5
3,2017090700,20170907000345,2
4,2017090700,20170907000395,7


# Create Dependent Variable

Creating an index that refers to 0-198. An index of 0 would refer to a -99 yard loss, and an index of 198 is a 99 yard rush. For some reason the public kernels liked to cap these predicted indexes between 71 and 150 (-28 yard rush to 51 yard rush). It turns out the worst rush was only -15 yards lost, so it doesn't get capped on the lower end.

In [36]:
Yards['YardIndex'] = Yards['Yards'].apply(lambda val: val + 99)

min_idx_y = 0
max_idx_y = 198
num_classes = max_idx_y - min_idx_y + 1

Yards['YardIndexClipped'] = Yards['YardIndex'].apply(lambda val: min_idx_y if val < min_idx_y else max_idx_y if val > max_idx_y else val)

print(num_classes)

199


In [37]:
Yards.head()

Unnamed: 0,GameId,PlayId,Yards,YardIndex,YardIndexClipped
0,2017090700,20170907000118,8,107,107
1,2017090700,20170907000139,3,102,102
2,2017090700,20170907000189,5,104,104
3,2017090700,20170907000345,2,101,101
4,2017090700,20170907000395,7,106,106


In [38]:
Y = Yards['YardIndexClipped'].values

y = np.zeros((len(Y), num_classes), np.int32)
for indx, row in enumerate(Y):
    y[(indx, row - min_idx_y)] = 1

y = y.astype('float32')

# Standardize Features

In [10]:
X = (od_feats - od_feats.mean(axis=0))/(od_feats.std(axis=0))

# Split data into train/val/test

In [11]:
N = Yards.shape[0]
train_perc = 0.80
val_test_perc = 0.50

train_N = int(N*train_perc)
val_N = int((N-train_N)*val_test_perc)
test_N = int(N-train_N-val_N)
(train_N, val_N, test_N)

(24805, 3101, 3101)

In [12]:
train_indx = np.random.choice(a=Yards.index, size=train_N, replace=False)

remaining_indexes = [x for x in Yards.index if x not in train_indx]
val_indx = np.random.choice(a=remaining_indexes, size=val_N, replace=False)

test_indx = [x for x in remaining_indexes if x not in val_indx]

print(len(train_indx), len(val_indx), len(test_indx))

24805 3101 3101


In [39]:
X_train = X[train_indx, :, :]
X_val = X[val_indx, :, :]
X_test = X[test_indx, :, :]

y_train = y[train_indx]
y_val = y[val_indx]
y_test = y[test_indx]

print((X_train.shape, y_train.shape), (X_val.shape, y_val.shape), (X_test.shape, y_test.shape))

((24805, 11, 11, 5), (24805, 199)) ((3101, 11, 11, 5), (3101, 199)) ((3101, 11, 11, 5), (3101, 199))


# Define Model Architecture

In [48]:
K.clear_session()

# input dimension is 11 offensive players x 11 defensive players x 5 features
inputs = Input(shape=(11,11,5))

x = Conv2D(128, kernel_size=(1,1), activation='relu')(inputs)
x = Conv2D(160, kernel_size=(1,1), activation='relu')(x)
x = Conv2D(128, kernel_size=(1,1), activation='relu')(x)

a = AvgPool2D(pool_size=(11,1))(x)
a = Lambda(lambda x: x*0.7)(a)
m = MaxPool2D(pool_size=(11,1))(x)
m = Lambda(lambda x: x*0.3)(m)

x = Add()([a, m])
x = Reshape((11,128))(x)

x = BatchNormalization()(x)

x = Conv1D(160, kernel_size=1, strides=1, activation='relu')(x)
x = BatchNormalization()(x)
x = Conv1D(96, kernel_size=1, strides=1, activation='relu')(x)
x = BatchNormalization()(x)
x = Conv1D(96, kernel_size=1, strides=1, activation='relu')(x)
x = BatchNormalization()(x)

a = AvgPool1D(pool_size=11)(x)
m = MaxPool1D(pool_size=11)(x)

x = Average()([a,m])
x = Flatten()(x)

x = Dense(96, activation='relu')(x)
x = BatchNormalization()(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)

output = Dense(num_classes, activation='softmax')(x)

model = Model(inputs = inputs, outputs = output)

In [49]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 11, 11, 5)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 11, 11, 128)  768         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 11, 11, 160)  20640       conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 11, 11, 128)  20608       conv2d_1[0][0]                   
______________________________________________________________________________________________

#### Define Callbacks and Early Stopping

In [50]:
def crps(y_true, y_pred):
    loss = K.mean(K.sum((K.cumsum(y_pred, axis = 1) - K.cumsum(y_true, axis=1))**2, axis=1))/199
    return loss

In [51]:
class Metric(Callback):
    def __init__(self, model, callbacks, data):
        super().__init__()
        self.model = model
        self.callbacks = callbacks
        self.data = data

    def on_train_begin(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_begin(logs)

    def on_train_end(self, logs=None):
        for callback in self.callbacks:
            callback.on_train_end(logs)

    def on_epoch_end(self, batch, logs=None):
        X_valid, y_valid = self.data[0], self.data[1]

        y_pred = self.model.predict(X_valid)
        y_true = np.clip(np.cumsum(y_valid, axis=1), 0, 1)
        y_pred = np.clip(np.cumsum(y_pred, axis=1), 0, 1)
        val_s = ((y_true - y_pred) ** 2).sum(axis=1).sum(axis=0) / (199 * X_valid.shape[0])
        logs['val_CRPS'] = val_s
        
        for callback in self.callbacks:
            callback.on_epoch_end(batch, logs)

In [52]:
es = EarlyStopping(monitor='val_CRPS', 
                   mode='min', 
                   restore_best_weights=True, 
                   verbose=0, 
                   patience=10)

es.set_model(model)

metric = Metric(model, [es], [X_val, y_val])

opt = Adam(learning_rate=0.001)

In [53]:
model.compile(loss=crps, optimizer=opt)

In [54]:
n_epochs = 30
batch_size = 64

In [55]:
%%time
model.fit(X_train,
          y_train, 
          epochs=n_epochs,
          batch_size=batch_size,
          verbose=1,
          callbacks=[metric],
          validation_data=(X_val, y_val))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Wall time: 12min 26s


<keras.callbacks.History at 0x297c96ad130>

In [56]:
train_preds = model.predict(X_train)
val_preds = model.predict(X_val)
test_preds = model.predict(X_test)

In [60]:
y_train[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [62]:
np.cumsum(train_preds[0])

array([9.5160913e-06, 2.0239346e-05, 3.3851735e-05, 4.0540428e-05,
       4.7648700e-05, 5.4460943e-05, 5.8603142e-05, 6.4378459e-05,
       6.9419373e-05, 7.7327051e-05, 8.2415718e-05, 8.8928617e-05,
       9.7560915e-05, 1.0596512e-04, 1.1286179e-04, 1.1923726e-04,
       1.2739503e-04, 1.3937705e-04, 1.4929849e-04, 1.5877988e-04,
       1.6632696e-04, 1.7679312e-04, 1.8471695e-04, 1.9585979e-04,
       2.0419930e-04, 2.1189929e-04, 2.2178219e-04, 2.2862764e-04,
       2.3792090e-04, 2.4660741e-04, 2.5293554e-04, 2.6172507e-04,
       2.6695593e-04, 2.7768739e-04, 2.8527298e-04, 2.9880766e-04,
       3.0596918e-04, 3.2741239e-04, 3.3565558e-04, 3.4112041e-04,
       3.4886069e-04, 3.5478570e-04, 3.6125057e-04, 3.6931885e-04,
       3.7335014e-04, 3.8241199e-04, 3.8895680e-04, 3.9576154e-04,
       4.0471798e-04, 4.1210846e-04, 4.1917808e-04, 4.2825751e-04,
       4.3252265e-04, 4.3724268e-04, 4.4187889e-04, 4.4837943e-04,
       4.5370750e-04, 4.7971026e-04, 4.9049221e-04, 4.9944251e

In [57]:
float(crps(y_train, train_preds))

0.013064509257674217

In [58]:
float(crps(y_val, val_preds))

0.013194269500672817

In [59]:
float(crps(y_test, test_preds))

0.013321706093847752