## Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn import metrics
from IPython.core.display import HTML
import matplotlib.pyplot as plt

### Load the data from CSV file

In [2]:
data_raw = pd.read_csv('Formatted_Data_Numerical\Patriots_Away_Data_Formatted_Numerical.csv')

In [3]:
data_raw.head(10)

Unnamed: 0,Team_Del,Yards_Del,Dist_To_Endzone,Play_ID,Table Names,Table Names-1,home_team,away_team,season_type,week,...,yrdln,ydstogo,home_timeouts_remaining,away_timeouts_remaining,score_differential,play_type_nfl,drive_play_count,roof,surface,temp
0,MIA,35,35,2022_01_NE_MIA44,play_by_play_2022.csv,"play_by_play_2014.csv,play_by_play_2020.csv,pl...",MIA,NE,1,1,...,MIA 35,0,3,3,0,KICK_OFF,8.0,outdoors,grass,
1,NE,25,75,2022_01_NE_MIA59,play_by_play_2022.csv,"play_by_play_2014.csv,play_by_play_2020.csv,pl...",MIA,NE,1,1,...,NE 25,10,3,3,0,PASS,8.0,outdoors,grass,
2,NE,34,84,2022_01_NE_MIA83,play_by_play_2022.csv,"play_by_play_2014.csv,play_by_play_2020.csv,pl...",MIA,NE,1,1,...,NE 34,1,3,3,0,RUSH,8.0,outdoors,grass,
3,NE,46,96,2022_01_NE_MIA109,play_by_play_2022.csv,"play_by_play_2014.csv,play_by_play_2020.csv,pl...",MIA,NE,1,1,...,NE 46,10,3,3,0,RUSH,8.0,outdoors,grass,
4,NE,49,99,2022_01_NE_MIA130,play_by_play_2022.csv,"play_by_play_2014.csv,play_by_play_2020.csv,pl...",MIA,NE,1,1,...,NE 49,7,3,3,0,PASS,8.0,outdoors,grass,
5,MIA,43,43,2022_01_NE_MIA154,play_by_play_2022.csv,"play_by_play_2014.csv,play_by_play_2020.csv,pl...",MIA,NE,1,1,...,MIA 43,10,3,3,0,RUSH,8.0,outdoors,grass,
6,MIA,35,35,2022_01_NE_MIA175,play_by_play_2022.csv,"play_by_play_2014.csv,play_by_play_2020.csv,pl...",MIA,NE,1,1,...,MIA 35,2,3,3,0,RUSH,8.0,outdoors,grass,
7,MIA,33,33,2022_01_NE_MIA196,play_by_play_2022.csv,"play_by_play_2014.csv,play_by_play_2020.csv,pl...",MIA,NE,1,1,...,MIA 33,10,3,3,0,PASS,8.0,outdoors,grass,
8,MIA,22,22,2022_01_NE_MIA236,play_by_play_2022.csv,"play_by_play_2014.csv,play_by_play_2020.csv,pl...",MIA,NE,1,1,...,MIA 22,10,3,3,0,PASS,8.0,outdoors,grass,
9,MIA,35,35,2022_01_NE_MIA571,play_by_play_2022.csv,"play_by_play_2014.csv,play_by_play_2020.csv,pl...",MIA,NE,1,1,...,MIA 35,0,3,2,-3,KICK_OFF,8.0,outdoors,grass,


### Check the Column Types

In [4]:
data_raw.dtypes

Team_Del                      object
Yards_Del                      int64
Dist_To_Endzone                int64
Play_ID                       object
Table Names                   object
Table Names-1                 object
home_team                     object
away_team                     object
season_type                    int64
week                           int64
posteam                       object
yardline_100                   int64
game_date                     object
quarter_seconds_remaining      int64
half_seconds_remaining         int64
game_seconds_remaining         int64
drive                        float64
qtr                            int64
down                         float64
goal_to_go                     int64
yrdln                         object
ydstogo                        int64
home_timeouts_remaining        int64
away_timeouts_remaining        int64
score_differential             int64
play_type_nfl                 object
drive_play_count             float64
r

### Secondary Cleaning and Filtering

In [5]:
data_clean = data_raw.drop(columns = ['Play_ID', 'Table Names', 'Table Names-1', 'away_team', 'game_date', 'roof', 'surface', 'temp', 'yrdln', 'Team_Del', 'Yards_Del', 'posteam', 'season_type', 'Dist_To_Endzone'])

In [6]:
data_clean.dtypes

home_team                     object
week                           int64
yardline_100                   int64
quarter_seconds_remaining      int64
half_seconds_remaining         int64
game_seconds_remaining         int64
drive                        float64
qtr                            int64
down                         float64
goal_to_go                     int64
ydstogo                        int64
home_timeouts_remaining        int64
away_timeouts_remaining        int64
score_differential             int64
play_type_nfl                 object
drive_play_count             float64
dtype: object

### Picking Team

In [7]:
TEAMNAME = 'ATL'
data_clean = data_clean[data_clean['home_team'] == TEAMNAME]

In [8]:
data_clean.shape

(397, 16)

### Split Data Into X and Y

In [9]:
X = data_clean.drop(columns = ['play_type_nfl', 'home_team'])

In [10]:
Y = data_clean['play_type_nfl']

### Split Data Into Test and Training Set

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size= 0.2, random_state= 150)

In [12]:
print("X Train Shape:" + str(X_train.shape))
print("X Test Shape:" + str(X_test.shape))

X Train Shape:(317, 14)
X Test Shape:(80, 14)


In [13]:
#X_train.dtypes

### Create Model

In [14]:
gbm = HistGradientBoostingClassifier(random_state= 46, learning_rate= 0.01, max_depth= 8, min_samples_leaf= 20, max_iter= 100, max_leaf_nodes= 30, verbose= 0)

In [15]:
gbm = gbm.fit(X_train, y_train)

### Test Model

In [16]:
pred_test = gbm.predict(X_test)

### Evaluate Model

In [17]:
model_accuracy_percentage = metrics.accuracy_score(y_test, pred_test)*100
model_accuracy_percentage = "{:.2f}".format(model_accuracy_percentage)
model_accuracy_percentage

'68.75'

In [18]:
print("The model accuracy for predicting Patriots own play calls away against " + str(TEAMNAME) + " is " + model_accuracy_percentage + "%")

The model accuracy for predicting Patriots own play calls away against ATL is 68.75%


### View Predictions

In [19]:
predictions = pd.DataFrame({'Y_Test': y_test, 'Model Predictions': pred_test})
predictions

Unnamed: 0,Y_Test,Model Predictions
8945,PASS,PASS
6737,PASS,PASS
6768,PASS,RUSH
8922,PASS,RUSH
7770,RUSH,RUSH
...,...,...
6815,PASS,PASS
8946,RUSH,PASS
6744,RUSH,PASS
12883,RUSH,PASS


### Predict Play Call Based On Current Game Status

In [70]:
# Enter Week Number (1 Through 22 With Weeks 18-22 Being Post Season Games)
WEEK = 5

# Enter Distance to Endzone in Yards(1-100)
DISTANCE_TO_ENDZONE = 34

# Enter Game Seconds Remaining (1-3600)
GAME_SECONDS_REMAINING = 2500

# Enter Quarter Number (1-4 or 5 for Overtime)
QUARTER = 2

# Enter Down Number (1-4)
DOWN = 3

# Enter Yards to Go (1-100)
YARDS_TO_GO = 4

# Enter if it is Goal to Go (1 Means Yes Goal to Go, 0 Means No Goal to Go)
GOAL_TO_GO = 0

# Enter Drive Number (Which Drive is it)
DRIVE_NUMBER = 3

# Enter Drive Play Count (Number of Plays in the Current Drive)
DRIVE_PLAY_COUNT = 5

# Enter Patriots Timeouts Remaining (0-3)
PATS_TIMEOUTS_REMAINING = 3

# Enter Opponent Timeouts Remaining (0-3)
OPPONENT_TIMEOUTS_REMAINING = 2

# Enter Patriots Score
PATS_SCORE = 14

# Enter Opponent Team Score
OPPONENT_SCORE = 10

### Calculate Other Fields

In [71]:
# Calculate Half Seconds Left
if GAME_SECONDS_REMAINING <= 1800:
    HALF_SECONDS_REMAINING = GAME_SECONDS_REMAINING
else:
    HALF_SECONDS_REMAINING = (GAME_SECONDS_REMAINING - 1800)

In [72]:
# Calculate Quarter Seconds Left
if GAME_SECONDS_REMAINING <= 900:
    QUARTER_SECONDS_REMAINING = GAME_SECONDS_REMAINING
elif GAME_SECONDS_REMAINING > 900 and GAME_SECONDS_REMAINING <= 1800:
    QUARTER_SECONDS_REMAINING = (GAME_SECONDS_REMAINING - 900)
elif GAME_SECONDS_REMAINING > 1800 and GAME_SECONDS_REMAINING <= 2700:
    QUARTER_SECONDS_REMAINING = (GAME_SECONDS_REMAINING - 1800)
else:
    QUARTER_SECONDS_REMAINING = (GAME_SECONDS_REMAINING - 2700)

In [73]:
# Calculate Score Differential
SCORE_DIFFERENTIAL = PATS_SCORE - OPPONENT_SCORE

In [74]:
#QUARTER_SECONDS_REMAINING

In [75]:
#HALF_SECONDS_REMAINING

In [76]:
#SCORE_DIFFERENTIAL

### Predict Play on Model

In [77]:
TEST_PLAY = {'week': [WEEK], 'yardline_100': [DISTANCE_TO_ENDZONE], 'quarter_seconds_remaining': [QUARTER_SECONDS_REMAINING], 'half_seconds_remaining': [HALF_SECONDS_REMAINING], 'game_seconds_remaining': [GAME_SECONDS_REMAINING], 'drive': [DRIVE_NUMBER], 'qtr': [QUARTER], 'down': [DOWN], 'goal_to_go': [GOAL_TO_GO], 'ydstogo': [YARDS_TO_GO], 'home_timeouts_remaining': [PATS_TIMEOUTS_REMAINING], 'away_timeouts_remaining': [OPPONENT_TIMEOUTS_REMAINING], 'score_differential': [SCORE_DIFFERENTIAL], 'drive_play_count': [DRIVE_PLAY_COUNT]}
TEST_PLAY = pd.DataFrame(TEST_PLAY)

In [78]:
#TEST_PLAY.dtypes

In [79]:
PLAY_PREDICTION = gbm.predict(TEST_PLAY)
print("The model predicts this play will be a " + PLAY_PREDICTION)

['The model predicts this play will be a RUSH']
