In [35]:
import os
import gc
import time
import codecs
from datetime import date, datetime

random_seed = 42

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import lightgbm as lgb
import xgboost as xgb

%matplotlib inline
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
plt.style.use('ggplot')
mpl.rc('lines', linewidth=4, color='g')

import warnings
warnings.filterwarnings('ignore')

In [36]:
import keras
import tensorflow as tf
from keras import optimizers
from keras import backend as K
from keras import regularizers
from keras.models import Sequential
from keras.layers import (Dense, Activation, Dropout, Flatten, Bidirectional,
                          CuDNNGRU, Embedding, Conv1D, CuDNNLSTM,                           
                          MaxPooling1D, GlobalMaxPooling1D, LSTM) 
from keras.utils import plot_model, np_utils
from keras.wrappers.scikit_learn import KerasRegressor
from keras.preprocessing import sequence
from keras.callbacks import EarlyStopping


Using TensorFlow backend.


In [17]:
from git import Repo

PATH_OF_GIT_REPO = '/home/hq/Kaggle/.git'

def git_push(COMMIT_MESSAGE):
    try:
        repo = Repo(PATH_OF_GIT_REPO)
        repo.git.add(update=True)
        repo.index.commit(COMMIT_MESSAGE)
        origin = repo.remote(name='Kagglez')
        origin.push(refspec='master:master')
    except:
        print('Some error occured while pushing the code')

In [13]:
for dirname, _, filenames in os.walk('../Input/nfl-2020'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

../Input/nfl-2020/train.csv
../Input/nfl-2020/kaggle/competitions/nflrush/sample_submission.csv.encrypted
../Input/nfl-2020/kaggle/competitions/nflrush/competition.cpython-36m-x86_64-linux-gnu.so
../Input/nfl-2020/kaggle/competitions/nflrush/__init__.py
../Input/nfl-2020/kaggle/competitions/nflrush/test.csv.encrypted


In [52]:
df = pd.read_csv('../Input/nfl-2020/train.csv')

In [53]:
df.shape, df.columns

((509762, 49),
 Index(['GameId', 'PlayId', 'Team', 'X', 'Y', 'S', 'A', 'Dis', 'Orientation',
        'Dir', 'NflId', 'DisplayName', 'JerseyNumber', 'Season', 'YardLine',
        'Quarter', 'GameClock', 'PossessionTeam', 'Down', 'Distance',
        'FieldPosition', 'HomeScoreBeforePlay', 'VisitorScoreBeforePlay',
        'NflIdRusher', 'OffenseFormation', 'OffensePersonnel',
        'DefendersInTheBox', 'DefensePersonnel', 'PlayDirection', 'TimeHandoff',
        'TimeSnap', 'Yards', 'PlayerHeight', 'PlayerWeight', 'PlayerBirthDate',
        'PlayerCollegeName', 'Position', 'HomeTeamAbbr', 'VisitorTeamAbbr',
        'Week', 'Stadium', 'Location', 'StadiumType', 'Turf', 'GameWeather',
        'Temperature', 'Humidity', 'WindSpeed', 'WindDirection'],
       dtype='object'))

In [54]:
df.head(5)

Unnamed: 0,GameId,PlayId,Team,X,Y,S,A,Dis,Orientation,Dir,...,Week,Stadium,Location,StadiumType,Turf,GameWeather,Temperature,Humidity,WindSpeed,WindDirection
0,2017090700,20170907000118,away,73.91,34.84,1.69,1.13,0.4,81.99,177.18,...,1,Gillette Stadium,"Foxborough, MA",Outdoor,Field Turf,Clear and warm,63.0,77.0,8,SW
1,2017090700,20170907000118,away,74.67,32.64,0.42,1.35,0.01,27.61,198.7,...,1,Gillette Stadium,"Foxborough, MA",Outdoor,Field Turf,Clear and warm,63.0,77.0,8,SW
2,2017090700,20170907000118,away,74.0,33.2,1.22,0.59,0.31,3.01,202.73,...,1,Gillette Stadium,"Foxborough, MA",Outdoor,Field Turf,Clear and warm,63.0,77.0,8,SW
3,2017090700,20170907000118,away,71.46,27.7,0.42,0.54,0.02,359.77,105.64,...,1,Gillette Stadium,"Foxborough, MA",Outdoor,Field Turf,Clear and warm,63.0,77.0,8,SW
4,2017090700,20170907000118,away,69.32,35.42,1.82,2.43,0.16,12.63,164.31,...,1,Gillette Stadium,"Foxborough, MA",Outdoor,Field Turf,Clear and warm,63.0,77.0,8,SW


In [55]:
df.tail(5)

Unnamed: 0,GameId,PlayId,Team,X,Y,S,A,Dis,Orientation,Dir,...,Week,Stadium,Location,StadiumType,Turf,GameWeather,Temperature,Humidity,WindSpeed,WindDirection
509757,2018123015,20181230154157,home,86.77,24.2,2.14,2.12,0.22,44.55,15.31,...,17,CenturyLink Field,"Seattle, WA",Outdoor,FieldTurf,Cloudy,45.0,76.0,5,SE
509758,2018123015,20181230154157,home,86.76,27.18,1.16,0.66,0.11,53.63,42.8,...,17,CenturyLink Field,"Seattle, WA",Outdoor,FieldTurf,Cloudy,45.0,76.0,5,SE
509759,2018123015,20181230154157,home,87.26,27.05,2.59,1.18,0.26,3.96,21.12,...,17,CenturyLink Field,"Seattle, WA",Outdoor,FieldTurf,Cloudy,45.0,76.0,5,SE
509760,2018123015,20181230154157,home,84.57,24.37,4.36,1.79,0.47,148.08,183.34,...,17,CenturyLink Field,"Seattle, WA",Outdoor,FieldTurf,Cloudy,45.0,76.0,5,SE
509761,2018123015,20181230154157,home,80.8,26.35,4.87,4.1,0.45,135.44,118.24,...,17,CenturyLink Field,"Seattle, WA",Outdoor,FieldTurf,Cloudy,45.0,76.0,5,SE


In [56]:
outcomes = df[['GameId','PlayId','Yards']].drop_duplicates()

In [57]:
def create_features(df, deploy=False):
    def new_X(x_coordinate, play_direction):
        if play_direction == 'left':
            return 120.0 - x_coordinate
        else:
            return x_coordinate

In [None]:
git_