## Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn import metrics
from IPython.core.display import HTML
import matplotlib.pyplot as plt

### Load the data from CSV file

In [2]:
data_raw = pd.read_csv('Formatted_Data_Numerical\Patriots_Home_Data_Formatted_Numerical.csv')

In [3]:
#data_raw.head(10)

### Check the Column Types

In [4]:
#data_raw.dtypes

### Secondary Cleaning and Filtering

In [5]:
data_clean = data_raw.drop(columns = ['Play_ID', 'Table Names', 'Table Names-1', 'home_team', 'game_date', 'roof', 'surface', 'temp', 'yrdln', 'Team_Del', 'Yards_Del', 'posteam', 'season_type', 'Dist_To_Endzone'])

In [6]:
data_clean.shape

(15569, 16)

### Picking Team

In [7]:
TEAMNAME = 'TEN'
data_clean = data_clean[data_clean['away_team'] == TEAMNAME]

In [8]:
data_clean.shape

(500, 16)

### Split Data Into X and Y

In [9]:
X = data_clean.drop(columns = ['play_type_nfl', 'away_team'])

In [10]:
Y = data_clean['play_type_nfl']

### Split Data Into Test and Training Set

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size= 0.2, random_state= 50)

In [12]:
print("X Train Shape:" + str(X_train.shape))
print("X Test Shape:" + str(X_test.shape))

X Train Shape:(400, 14)
X Test Shape:(100, 14)


In [13]:
X_train.dtypes

week                           int64
yardline_100                 float64
quarter_seconds_remaining    float64
half_seconds_remaining       float64
game_seconds_remaining       float64
drive                        float64
qtr                            int64
down                         float64
goal_to_go                     int64
ydstogo                        int64
home_timeouts_remaining        int64
away_timeouts_remaining        int64
score_differential           float64
drive_play_count             float64
dtype: object

In [14]:
#categorical_features = ['away_team', 'season_type', 'posteam', 'side_of_field', 'end_yard_line']
#categorical_features = ['away_team']
#cat_features_int = [0,1,3,4,17]
#cat_features_bool = [True,True,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False]

### Create Model

In [15]:
gbm = HistGradientBoostingClassifier(random_state= 46, learning_rate= 0.01, max_depth= 8, min_samples_leaf= 20, max_iter= 100, max_leaf_nodes= 30, verbose= 0)

In [16]:
gbm = gbm.fit(X_train, y_train)

### Test Model

In [17]:
pred_test = gbm.predict(X_test)

### Evaluate Model

In [18]:
model_accuracy_percentage = metrics.accuracy_score(y_test, pred_test)*100
model_accuracy_percentage = "{:.2f}".format(model_accuracy_percentage)
model_accuracy_percentage

'74.00'

In [19]:
print("The model accuracy for predicting play calls at home against " + str(TEAMNAME) + " is " + model_accuracy_percentage + "%")

The model accuracy for predicting play calls at home against TEN is 74.00%


### View Predictions

In [20]:
predictions = pd.DataFrame({'Y_Test': y_test, 'Model Predictions': pred_test})
predictions

Unnamed: 0,Y_Test,Model Predictions
10460,PUNT,PUNT
11965,KICK_OFF,KICK_OFF
14917,RUSH,RUSH
10483,PASS,PASS
10474,PASS,PASS
...,...,...
5144,RUSH,RUSH
5131,PASS,RUSH
3013,RUSH,RUSH
5116,RUSH,RUSH


### Predict Play Call Based On Current Game Status

In [21]:
# Enter Week Number (1 Through 22 With Weeks 18-22 Being Post Season Games)
WEEK = 5

# Enter Distance to Endzone in Yards(1-100)
DISTANCE_TO_ENDZONE = 20

# Enter Game Seconds Remaining (1-3600)
GAME_SECONDS_REMAINING = 600

# Enter Quarter Number (1-4 or 5 for Overtime)
QUARTER = 4

# Enter Down Number (1-4)
DOWN = 3

# Enter Yards to Go (1-100)
YARDS_TO_GO = 12

# Enter if it is Goal to Go (1 Means Yes Goal to Go, 0 Means No Goal to Go)
GOAL_TO_GO = 0

# Enter Drive Number (Which Drive is it)
DRIVE_NUMBER = 5

# Enter Drive Play Count (Number of Plays in the Current Drive)
DRIVE_PLAY_COUNT = 10

# Enter Patriots Timeouts Remaining (0-3)
PATS_TIMEOUTS_REMAINING = 3

# Enter Opponent Timeouts Remaining (0-3)
OPPONENT_TIMEOUTS_REMAINING = 3

# Enter Patriots Score
PATS_SCORE = 10

# Enter Opponent Team Score
OPPONENT_SCORE = 20

In [31]:
WEEK = 5
DISTANCE_TO_ENDZONE = 20
GAME_SECONDS_REMAINING = 600
QUARTER = 4
DOWN = 3
YARDS_TO_GO = 12
GOAL_TO_GO = 0
DRIVE_NUMBER = 5
DRIVE_PLAY_COUNT = 10
PATS_TIMEOUTS_REMAINING = 3
OPPONENT_TIMEOUTS_REMAINING = 3
PATS_SCORE = 10
OPPONENT_SCORE = 20

### Calculate Other Fields

In [32]:
# Calculate Half Seconds Left
if GAME_SECONDS_REMAINING <= 1800:
    HALF_SECONDS_REMAINING = GAME_SECONDS_REMAINING
else:
    HALF_SECONDS_REMAINING = (GAME_SECONDS_REMAINING - 1800)

In [33]:
# Calculate Quarter Seconds Left
if GAME_SECONDS_REMAINING <= 900:
    QUARTER_SECONDS_REMAINING = GAME_SECONDS_REMAINING
elif GAME_SECONDS_REMAINING > 900 and GAME_SECONDS_REMAINING <= 1800:
    QUARTER_SECONDS_REMAINING = (GAME_SECONDS_REMAINING - 900)
elif GAME_SECONDS_REMAINING > 1800 and GAME_SECONDS_REMAINING <= 2700:
    QUARTER_SECONDS_REMAINING = (GAME_SECONDS_REMAINING - 1800)
else:
    QUARTER_SECONDS_REMAINING = (GAME_SECONDS_REMAINING - 2700)

In [34]:
# Calculate Score Differential
SCORE_DIFFERENTIAL = OPPONENT_SCORE - PATS_SCORE

In [35]:
#QUARTER_SECONDS_REMAINING

In [36]:
#HALF_SECONDS_REMAINING

In [37]:
#SCORE_DIFFERENTIAL

### Predict Play on Model

In [38]:
TEST_PLAY = {'week': [WEEK], 'yardline_100': [DISTANCE_TO_ENDZONE], 'quarter_seconds_remaining': [QUARTER_SECONDS_REMAINING], 'half_seconds_remaining': [HALF_SECONDS_REMAINING], 'game_seconds_remaining': [GAME_SECONDS_REMAINING], 'drive': [DRIVE_NUMBER], 'qtr': [QUARTER], 'down': [DOWN], 'goal_to_go': [GOAL_TO_GO], 'ydstogo': [YARDS_TO_GO], 'home_timeouts_remaining': [PATS_TIMEOUTS_REMAINING], 'away_timeouts_remaining': [OPPONENT_TIMEOUTS_REMAINING], 'score_differential': [SCORE_DIFFERENTIAL], 'drive_play_count': [DRIVE_PLAY_COUNT]}
TEST_PLAY = pd.DataFrame(TEST_PLAY)

In [39]:
#TEST_PLAY.dtypes

In [40]:
PLAY_PREDICTION = gbm.predict(TEST_PLAY)
print("The model predicts this " + str(TEAMNAME) + " play will be a " + PLAY_PREDICTION)

['The model predicts this TEN play will be a PASS']
