# Kaggle
## Competition NFL Big Data Bowl

In [None]:
# Carregando os pacotes
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Statistic lib
from scipy import stats
from scipy.stats import skew, norm

# Sklearn lib
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder

# Modelos de Regressao
from xgboost import XGBRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from mlxtend.regressor import StackingCVRegressor
from lightgbm import LGBMRegressor
import lightgbm as lgb
import xgboost as XGB
import tqdm

# Model Keras (NN)
import keras
from keras import backend as K
from keras.callbacks import EarlyStopping
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Embedding, Concatenate, Flatten, BatchNormalization, Dropout, Activation, PReLU, Add
from keras.callbacks import ModelCheckpoint
from keras.utils.vis_utils import plot_model
from keras import metrics

# Misc lib
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import RepeatedKFold
from functools import partial
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from IPython.display import Image

# Utils
import pandasql as ps
import re 
import math, string, os
import datetime

# Options
import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_seq_items = 8000
pd.options.display.max_rows = 8000
pd.set_option('display.max_columns', None)
import gc
gc.enable()

In [None]:
# Carregando os dados de treino
train = pd.read_csv('../data/train_stage2.csv', low_memory=False)
#train = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2020/train.csv', low_memory=False)
print ("Dataset carregado !!")

# Feature Engineering

In [None]:
# Funcao para realizar feature engineering no dataset (treino ou teste)
def feature_engineering(df): 

    # Nova feature para indicar se é o jogador que esta realizando a jogada (corredor)
    df['IsRusher'] = df['NflId'] == df['NflIdRusher']
    
    # Tratando a feature Team
    df['Team'] = df['Team'].apply(lambda x: x.strip()=='home')
    
    # Remove todas as colunas categoricas
    cat_features = []
    for col in df.columns:
        if df[col].dtype =='object':
            cat_features.append(col)
    df = df.drop(cat_features, axis=1) 
    
    # Ordenacao do dataset e renovando o index
    df = df.sort_values(by=['PlayId', 'Team', 'IsRusher']).reset_index()
    
    # Removendo colunas que não serão utilizadas
    df.drop(['GameId', 'PlayId', 'index', 'IsRusher', 'Team', 'NflId', 'NflIdRusher'], axis=1, inplace=True)

    df_median = df.median()
    df.fillna(df_median, inplace=True)

    return df

In [None]:
# Criando um novo dataset aplicando Feature Engineering
train_df = feature_engineering(train)
ind_train = len(train_df)

# Criação e Validação dos Modelos de ML

In [None]:
# Fazendo uma limpeza na memoria
gc.collect()

In [None]:
# Criar uma linha para cada jogada em que o rusher é o último
players_col = []
for col in train_df.columns:
    if train_df[col][:22].std()!=0:
        players_col.append(col)
        
X_train = np.array(train_df[players_col]).reshape(ind_train//22,-1)

In [None]:
play_col = train_df.drop(players_col+['Yards'], axis=1).columns
X_play_col = np.zeros(shape=(X_train.shape[0], len(play_col)))
for i, col in enumerate(play_col):
    X_play_col[:, i] = train_df[col][::22]

In [None]:
X_train = np.concatenate([X_train, X_play_col], axis=1)
y_train = np.zeros(shape=(X_train.shape[0], 199))

for i,yard in enumerate(train_df['Yards'][::22]):
    y_train[i, yard+99:] = np.ones(shape=(1, 100-yard))

In [None]:
__all__ = ['RAdam']

class RAdam(keras.optimizers.Optimizer):
    """RAdam optimizer.
    # Arguments
        learning_rate: float >= 0. Learning rate.
        beta_1: float, 0 < beta < 1. Generally close to 1.
        beta_2: float, 0 < beta < 1. Generally close to 1.
        epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
        decay: float >= 0. Learning rate decay over each update.
        weight_decay: float >= 0. Weight decay for each param.
        amsgrad: boolean. Whether to apply the AMSGrad variant of this
            algorithm from the paper "On the Convergence of Adam and
            Beyond".
        total_steps: int >= 0. Total number of training steps. Enable warmup by setting a positive value.
        warmup_proportion: 0 < warmup_proportion < 1. The proportion of increasing steps.
        min_lr: float >= 0. Minimum learning rate after warmup.
    # References
        - [Adam - A Method for Stochastic Optimization](https://arxiv.org/abs/1412.6980v8)
        - [On the Convergence of Adam and Beyond](https://openreview.net/forum?id=ryQu7f-RZ)
        - [On The Variance Of The Adaptive Learning Rate And Beyond](https://arxiv.org/pdf/1908.03265v1.pdf)
    """

    def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999,
                 epsilon=None, decay=0., weight_decay=0., amsgrad=False,
                 total_steps=0, warmup_proportion=0.1, min_lr=0., **kwargs):
        learning_rate = kwargs.pop('lr', learning_rate)
        super(RAdam, self).__init__(**kwargs)
        with K.name_scope(self.__class__.__name__):
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.learning_rate = K.variable(learning_rate, name='learning_rate')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.decay = K.variable(decay, name='decay')
            self.weight_decay = K.variable(weight_decay, name='weight_decay')
            self.total_steps = K.variable(total_steps, name='total_steps')
            self.warmup_proportion = K.variable(warmup_proportion, name='warmup_proportion')
            self.min_lr = K.variable(min_lr, name='min_lr')
        if epsilon is None:
            epsilon = K.epsilon()
        self.epsilon = epsilon
        self.initial_decay = decay
        self.initial_weight_decay = weight_decay
        self.initial_total_steps = total_steps
        self.amsgrad = amsgrad

    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr

        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        if self.initial_total_steps > 0:
            warmup_steps = self.total_steps * self.warmup_proportion
            decay_steps = K.maximum(self.total_steps - warmup_steps, 1)
            decay_rate = (self.min_lr - lr) / decay_steps
            lr = K.switch(
                t <= warmup_steps,
                lr * (t / warmup_steps),
                lr + decay_rate * K.minimum(t - warmup_steps, decay_steps),
            )

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i)) for (i, p) in enumerate(params)]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i)) for (i, p) in enumerate(params)]

        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p), name='vhat_' + str(i)) for (i, p) in enumerate(params)]
        else:
            vhats = [K.zeros(1, name='vhat_' + str(i)) for i in range(len(params))]

        self.weights = [self.iterations] + ms + vs + vhats

        beta_1_t = K.pow(self.beta_1, t)
        beta_2_t = K.pow(self.beta_2, t)

        sma_inf = 2.0 / (1.0 - self.beta_2) - 1.0
        sma_t = sma_inf - 2.0 * t * beta_2_t / (1.0 - beta_2_t)

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            m_corr_t = m_t / (1.0 - beta_1_t)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                v_corr_t = K.sqrt(vhat_t / (1.0 - beta_2_t))
                self.updates.append(K.update(vhat, vhat_t))
            else:
                v_corr_t = K.sqrt(v_t / (1.0 - beta_2_t))

            r_t = K.sqrt((sma_t - 4.0) / (sma_inf - 4.0) *
                         (sma_t - 2.0) / (sma_inf - 2.0) *
                         sma_inf / sma_t)

            p_t = K.switch(sma_t >= 5, r_t * m_corr_t / (v_corr_t + self.epsilon), m_corr_t)

            if self.initial_weight_decay > 0:
                p_t += self.weight_decay * p

            p_t = p - lr * p_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates

    @property
    def lr(self):
        return self.learning_rate

    @lr.setter
    def lr(self, learning_rate):
        self.learning_rate = learning_rate

    def get_config(self):
        config = {
            'learning_rate': float(K.get_value(self.learning_rate)),
            'beta_1': float(K.get_value(self.beta_1)),
            'beta_2': float(K.get_value(self.beta_2)),
            'decay': float(K.get_value(self.decay)),
            'weight_decay': float(K.get_value(self.weight_decay)),
            'epsilon': self.epsilon,
            'amsgrad': self.amsgrad,
            'total_steps': float(K.get_value(self.total_steps)),
            'warmup_proportion': float(K.get_value(self.warmup_proportion)),
            'min_lr': float(K.get_value(self.min_lr)),
        }
        base_config = super(RAdam, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
# Função para criar a estrutura da Rede Neural
def make_nn_model():
    numerical_inputs = Input(shape=(X_train.shape[1],)) 
    x = Dense(X_train.shape[1], activation='relu')(numerical_inputs)
    x = BatchNormalization()(x)
    
    logits = Dense(256,activation=None)(x)
    logits = PReLU()(logits)
    logits__ = logits
    logits = Dropout(0.5)(logits)
    
    logits = Dense(256,activation=None)(logits)
    logits = PReLU()(logits)
    logits_ = logits
    logits = Dropout(0.5)(logits)
    
    logits = Dense(256,activation=None)(logits)
    logits = PReLU()(logits)
    logits = Concatenate()([logits, logits_, logits__])
    logits = Dropout(0.25)(logits)
    
    out = Dense(199, activation='sigmoid')(logits)
    
    model = Model(inputs = numerical_inputs, outputs=out)
    return model

In [None]:
def crps(labels,predictions) :
    y_pred = np.zeros((len(labels),199))
    y_ans = np.zeros((len(labels),199))
    j = np.array(range(199))
    for i,(p,t) in enumerate(zip(np.round(scaler.inverse_transform(predictions)),labels)) :
        k2 = j[j>=p-10]
        y_pred[i][k2]=(k2+10-p)*0.05
        k1 = j[j>=p+10]
        y_pred[i][k1]= 1.0
        k3 = j[j>=t]
        y_ans[i][k3]= 1.0
                           
    return 'CRPS: ', K.np.sum((y_pred-y_ans)**2)/(199*y_pred.shape[0]), False

In [None]:
# Funcao de treinamento do modelo de Redes Neurais
def train_model(x_tr, y_tr, x_vl, y_vl):
    model = make_nn_model()
    er = EarlyStopping(patience=10, min_delta=1e-4, restore_best_weights=True, monitor='val_loss')
    model.compile(optimizer=RAdam(warmup_proportion=0.1, min_lr=1e-7), loss='mse', metrics=[crps])
    model.fit(x_tr, y_tr, epochs=2, callbacks=[er], validation_data=[x_vl, y_vl])
    return model

In [None]:
# Setup cross validation folds
kf = 2
rkf = RepeatedKFold(n_splits=kf, n_repeats=kf)
print(str(kf) + ' Folds para treino...')

In [None]:
# Aplicando a mesma escala nos dados
scaler = MinMaxScaler() 
X_train = scaler.fit_transform(X_train) 

In [None]:
models = []

for fold_, (tr_idx, vl_idx) in enumerate(rkf.split(X_train, y_train)):
    strLog = "fold {}".format(fold_)
    print(strLog)
    
    x_tr, y_tr = X_train[tr_idx], y_train[tr_idx]
    x_vl, y_vl = X_train[vl_idx], y_train[vl_idx]
    
    model = train_model(x_tr, y_tr, x_vl, y_vl)
    models.append(model)

# REALIZANDO A SUBMISSAO

In [None]:
# Funcao para realizar as previsoes no dataset de teste
def make_pred(df, sample, env, models):
    X = np.array(df[players_col]).reshape(ind_test//22, -1)
    play_col = df.drop(players_col, axis=1).columns
    X_play_col = np.zeros(shape=(X.shape[0], len(play_col)))
    for i, col in enumerate(play_col):
        X_play_col[:, i] = df[col][::22]
    
    X = np.concatenate([X, X_play_col], axis=1)
    X = scaler.transform(X)

    y_pred = np.array([model.predict(X) for model in models]).mean(0)
    
    for pred in y_pred:
        prev = 0
        for i in range(len(pred)):
            print(pred[i])
            if pred[i]<prev:
                pred[i]=prev
            prev=pred[i]

    env.predict(pd.DataFrame(data=y_pred,columns=sample.columns))
    return y_pred

In [None]:
env = nflrush.make_env()

In [None]:
for test, sample in tqdm.tqdm(env.iter_test()):
    make_pred(test, sample, env, models)

In [None]:
env.write_submission_file()