In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import confusion_matrix, roc_auc_score
from sklearn.dummy import DummyClassifier

In [2]:
df = pd.read_csv('../../datasets/model.csv', index_col = [0])

In [3]:
X = df[['ydstogo', 'run', 'td_prob', 'goal_to_go']]
y = df['4th_down_conversion']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Stratified

In [5]:
dummy1 = DummyClassifier(strategy='stratified', random_state = 42)

dummy1.fit(X_train, y_train)

dummy1.score(X_test, y_test)

0.5293132328308208

In [6]:
preds = dummy1.predict(X_test)

In [7]:
confusion_matrix(y_test, preds) 

array([[332, 306],
       [256, 300]])

In [8]:
tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()

In [9]:
# What is the specificity of our test set?

spec = tn / (tn + fp)

# sPecificity = TN / all negatives = TN / (TN + FP)

print(f'Specificity: {round(spec,4)}')

Specificity: 0.5204


In [10]:
# What is the sensitivity of our test set?

sens = tp / (tp + fn)

# seNsitivity = TP / all positives = TP / (TP + FN)

print(f'Sensitivity: {round(sens,4)}')

Sensitivity: 0.5396


In [11]:
# What is the accuracy of our test set?
acc = (tp + tn) / (tp + tn + fp + fn)

print(f'Accuracy: {round(acc,4)}')

Accuracy: 0.5293


In [12]:
# What is the precision of our test set?

prec = tp / (tp + fp)


print(f'Precision: {round(prec,4)}')

Precision: 0.495


## Most Frequent

In [13]:
dummy2 = DummyClassifier(strategy='most_frequent', random_state = 42)

dummy2.fit(X_train, y_train)

dummy2.score(X_test, y_test)

0.5343383584589615

In [14]:
preds = dummy2.predict(X_test)

In [15]:
confusion_matrix(y_test, preds)  

array([[638,   0],
       [556,   0]])

In [16]:
tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()

In [17]:
# What is the specificity of our test set?

spec = tn / (tn + fp)

# sPecificity = TN / all negatives = TN / (TN + FP)

print(f'Specificity: {round(spec,4)}')

Specificity: 1.0


In [18]:
# What is the sensitivity of our test set?

sens = tp / (tp + fn)

# seNsitivity = TP / all positives = TP / (TP + FN)

print(f'Sensitivity: {round(sens,4)}')

Sensitivity: 0.0


In [19]:
# What is the accuracy of our test set?
acc = (tp + tn) / (tp + tn + fp + fn)

print(f'Accuracy: {round(acc,4)}')

Accuracy: 0.5343


In [20]:
# What is the precision of our test set?

prec = tp / (tp + fp)


print(f'Precision: {round(prec,4)}')

Precision: nan


  This is separate from the ipykernel package so we can avoid doing imports until
