In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import timedelta
from sklearn import tree

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [4]:
columns = [
    "turnovers", "score", "possession", "home_advantage"]

target = ["winner"]

In [5]:
file_path = Path('final_nfl_data.csv')
df = pd.read_csv(file_path)

# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')

# Drop the null rows
df = df.dropna()

df.reset_index(inplace=True, drop=True)

df = df.drop(columns=['date', 'team_id', 'name'])

df.head()

Unnamed: 0,home_advantage,turnovers,possession,score,winner
0,0,1,27:32,16,1
1,1,3,32:28,13,0
2,0,1,25:36,23,0
3,0,1,30:55,34,0
4,0,1,26:09,7,0


In [6]:
df.dtypes

home_advantage     int64
turnovers          int64
possession        object
score              int64
winner             int64
dtype: object

In [7]:
#convert Possession into Seconds
def timefinder(x):
    mins,secs = map(float, x.split(':'))
    td = timedelta(minutes=mins, seconds=secs)
    return td.total_seconds()
df['possessioninseconds'] = df['possession'].apply(timefinder)

df.head()

Unnamed: 0,home_advantage,turnovers,possession,score,winner,possessioninseconds
0,0,1,27:32,16,1,1652.0
1,1,3,32:28,13,0,1948.0
2,0,1,25:36,23,0,1536.0
3,0,1,30:55,34,0,1855.0
4,0,1,26:09,7,0,1569.0


In [8]:
df = df.drop(columns=['possession'])
df.head()

Unnamed: 0,home_advantage,turnovers,score,winner,possessioninseconds
0,0,1,16,1,1652.0
1,1,3,13,0,1948.0
2,0,1,23,0,1536.0
3,0,1,34,0,1855.0
4,0,1,7,0,1569.0


In [9]:
# Create our features
X = df.drop(columns='winner')
# Create our target
y = df['winner']

In [10]:
X.describe()

Unnamed: 0,home_advantage,turnovers,score,possessioninseconds
count,10714.0,10714.0,10714.0,10714.0
mean,0.5,1.564215,22.282341,1813.025107
std,0.500023,1.32888,10.279592,275.074953
min,0.0,0.0,0.0,885.0
25%,0.0,1.0,15.0,1624.0
50%,0.5,1.0,22.0,1814.0
75%,1.0,2.0,29.0,2000.0
max,1.0,8.0,62.0,2828.0


In [11]:
# Check the balance of our target values
y.value_counts()

0    5369
1    5345
Name: winner, dtype: int64

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, train_size=.80)
X_train.shape

(8571, 4)

In [13]:
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [14]:
# decision tree classifier and fitting model
model = tree.DecisionTreeClassifier()
model = model.fit(X_train_scaled, y_train)

In [15]:
#making prediction from test data
predictions = model.predict(X_test_scaled)

In [16]:
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])

cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,772,266
Actual 1,313,792


In [18]:
#accuracy score

acc_score = accuracy_score(y_test, predictions)

In [19]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,772,266
Actual 1,313,792


Accuracy Score : 0.7298180121325245
Classification Report
              precision    recall  f1-score   support

           0       0.71      0.74      0.73      1038
           1       0.75      0.72      0.73      1105

    accuracy                           0.73      2143
   macro avg       0.73      0.73      0.73      2143
weighted avg       0.73      0.73      0.73      2143

