In [1]:
#import required libraries
import pandas as pd
import numpy as np

In [3]:
# SQL configuratiobn
from sqlalchemy import create_engine

#Postgres DB Password
from config import password

In [4]:
# Machine learning model libraries 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

## Postgres SQL and Pandas communication set up

In [5]:
#Create engine
engine = create_engine(f'postgresql://postgres:{password}@127.0.0.1:5435/NFL_DB')

## Data Frame creation from SQL DB

In [11]:
#Import Postgres SQL table to Dummy Data Frame
champion_stats_df = pd.read_sql("Select * from sb_champion_stats", engine)
champion_stats_df

Unnamed: 0,team_year,total_tackles_def,sacks_def,safties_def,passes_defended_def,ints_def,fumbles_lost_fum,td_passes_pass,ints_pass,sacks_pass,...,year,team,fg_success_rate,extra_success_rate,pass_success_rate,avg_yards_pass,avg_yards_rush,kicks_blocked,passes_per_rushing_plays,Champion
0,arizona_cardinals_2001,670,5,0,29,13,7,0,0,0,...,2001,arizona_cardinals,0.833333,1.000000,0.000000,10.715232,3.546939,0,0.004082,0
1,arizona_cardinals_2002,547,12,0,24,7,1,0,2,5,...,2002,arizona_cardinals,0.000000,0.000000,0.388889,11.470588,4.256410,0,0.461538,0
2,arizona_cardinals_2003,548,15,0,35,8,7,5,7,25,...,2003,arizona_cardinals,0.750000,1.000000,0.568862,12.957831,4.943396,0,3.150943,0
3,arizona_cardinals_2004,511,16,0,43,7,7,13,18,39,...,2004,arizona_cardinals,0.758621,1.000000,0.560150,11.369231,3.585034,0,3.619048,0
4,arizona_cardinals_2005,483,19,0,27,9,11,21,21,45,...,2005,arizona_cardinals,0.955556,1.000000,0.625373,12.421053,3.541436,0,3.701657,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
506,washington_redskins_2012,652,22,0,54,15,5,24,8,33,...,2012,washington_redskins,0.944444,0.970588,0.659864,13.352518,5.307851,1,0.911157,0
507,washington_redskins_2013,684,21,0,42,9,13,20,19,43,...,2013,washington_redskins,0.818182,1.000000,0.581015,11.956710,4.822917,2,1.591146,0
508,washington_redskins_2014,684,13,0,35,5,12,18,18,58,...,2014,washington_redskins,0.888889,0.968750,0.665448,12.371711,4.057971,1,1.585507,0
509,washington_redskins_2015,927,33,1,59,10,11,30,11,27,...,2015,washington_redskins,0.866667,0.975610,0.695495,11.124352,3.650350,1,1.293706,0


In [12]:
#Import Postgres SQL table to Superbowl Data Frame
db_superbowl_df = pd.read_sql("Select * from superbowl", engine)
db_superbowl_df.head(10)

Unnamed: 0,team_year,year
0,kansas_city_chiefs_2020,2020
1,new_england_patriots_2019,2019
2,philadelphia_eagles_2018,2018
3,new_england_patriots_2017,2017
4,denver_broncos_2016,2016
5,new_england_patriots_2015,2015
6,seattle_seahawks_2014,2014
7,baltimore_ravens_2013,2013
8,new_york_giants_2012,2012
9,green_bay_packers_2011,2011


In [13]:
#Import Postgres SQL table to unique_teams Data Frame
db_unique_teams_df = pd.read_sql("Select * from unique_teams", engine)
db_unique_teams_df.head(10)

Unnamed: 0,team
0,Tennessee Titans
1,Denver Broncos
2,Green Bay Packers
3,Pittsburgh Steelers
4,Chicago Bears
5,Philadelphia Eagles
6,Indianapolis Colts
7,Arizona Cardinals
8,Seattle Seahawks
9,Baltimore Ravens


## Include evaluation columns to DF

In [14]:
# #Add team_year column in superbowl table to evaluate with index column fronm db_dummy_df
# db_superbowl_df["team_year"] = db_superbowl_df["winner"].astype(str) +" - " + db_superbowl_df["Date"].astype(str)
# db_superbowl_df

In [15]:
# # For dummy table, include champion column "temporary use random values to fill column"
# db_dummy_df["champion"] = np.random.random_integers(0 ,1, size= (23998,1))
# db_dummy_df

## Create the ML model 

In [19]:
# Create our features
X= champion_stats_df.drop(["team_year","team", "year","Champion"], axis=1)

# Create our target
y = champion_stats_df["Champion"]

In [20]:
# Use train_test_split to create the training and testing data.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [21]:
# Create model
rf_model = RandomForestClassifier(n_estimators=128, random_state=1)
rf_model = rf_model.fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
rf_model

RandomForestClassifier(n_estimators=128, random_state=1)

In [22]:
# Display the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"])
cm_df

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,122,0
Actual 1,6,0


In [23]:
# Determine accuary score
acc_score = accuracy_score(y_test, y_pred)

In [24]:
#Print Results 
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, y_pred))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,122,0
Actual 1,6,0


Accuracy Score : 0.953125
Classification Report
              precision    recall  f1-score   support

           0       0.95      1.00      0.98       122
           1       0.00      0.00      0.00         6

    accuracy                           0.95       128
   macro avg       0.48      0.50      0.49       128
weighted avg       0.91      0.95      0.93       128



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
