In [1]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.6-py3-none-any.whl (128 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/128.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m122.9/128.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.9/128.9 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.6 kt-legacy-1.0.5


In [2]:
# Import required libraries
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf

# All Data

In [3]:
file_path = Path("/content/final_data.csv")
df_nfl = pd.read_csv(file_path)

df_nfl.head()

Unnamed: 0.1,Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_favorite_id,spread_favorite,over_under_line,stadium_neutral,weather_temperature,...,team_division_away,team_conference_pre2002_away,team_division_pre2002_away,stadium_name,stadium_type,stadium_weather_type,stadium_surface,stadium_latitude,stadium_longitude,stadium_elevation
0,0,921966,1966,1,0,-1,-1.0,-1.0,0,83.0,...,3,0,3,75,1,-1,-1,25.7905,-80.3163,8.8
1,1,931966,1966,1,0,-1,-1.0,-1.0,0,81.0,...,3,0,3,87,1,-1,-1,29.716389,-95.409167,-1.0
2,2,941966,1966,1,0,-1,-1.0,-1.0,0,70.0,...,2,0,2,10,1,-1,-1,37.725353,-122.445496,-1.0
3,3,991966,1966,2,0,-1,-1.0,-1.0,0,82.0,...,2,1,2,75,1,-1,-1,25.7905,-80.3163,8.8
4,4,9101966,1966,1,0,-1,-1.0,-1.0,0,64.0,...,-1,0,2,50,1,-1,-1,44.501389,-88.062222,209.4


In [4]:
df_nfl.columns

Index(['Unnamed: 0', 'schedule_date', 'schedule_season', 'schedule_week',
       'schedule_playoff', 'team_favorite_id', 'spread_favorite',
       'over_under_line', 'stadium_neutral', 'weather_temperature',
       'weather_wind_mph', 'weather_humidity', 'weather_detail',
       'winner(0_away,1_home,2_tie)', 'team_name_home', 'team_conference_home',
       'team_division_home', 'team_conference_pre2002_home',
       'team_division_pre2002_home', 'team_name_away', 'team_conference_away',
       'team_division_away', 'team_conference_pre2002_away',
       'team_division_pre2002_away', 'stadium_name', 'stadium_type',
       'stadium_weather_type', 'stadium_surface', 'stadium_latitude',
       'stadium_longitude', 'stadium_elevation'],
      dtype='object')

In [5]:
y = df_nfl['winner(0_away,1_home,2_tie)']
X = df_nfl.drop(columns = ['winner(0_away,1_home,2_tie)'])

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    random_state=1,
                                                    stratify=y)
X_train.shape

(10341, 30)

In [7]:
# Create scaler instance
X_scaler = skl.preprocessing.StandardScaler()

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=30))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    return nn_model

In [9]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

In [10]:
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 60 Complete [00h 00m 22s]
val_accuracy: 0.5720916986465454

Best val_accuracy So Far: 0.6106759309768677
Total elapsed time: 00h 10m 40s


In [11]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'tanh',
 'first_units': 9,
 'num_layers': 2,
 'units_0': 5,
 'units_1': 3,
 'units_2': 5,
 'units_3': 3,
 'units_4': 7,
 'units_5': 3,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [12]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

108/108 - 0s - loss: 0.6492 - accuracy: 0.6107 - 347ms/epoch - 3ms/step
Loss: 0.6491859555244446, Accuracy: 0.6106759309768677


# Between 2000 - 2022 Data

In [13]:
file_path = Path("/content/2000sData.csv")
df_2000snfl = pd.read_csv(file_path)

df_2000snfl.head()

Unnamed: 0.1,Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_favorite_id,spread_favorite,over_under_line,stadium_neutral,weather_temperature,...,team_division_away,team_conference_pre2002_away,team_division_pre2002_away,stadium_name,stadium_type,stadium_weather_type,stadium_surface,stadium_latitude,stadium_longitude,stadium_elevation
0,7354,932000,2000,1,0,1,-6.5,46.5,0,72.0,...,8,1,8,35,0,3,-1,33.758,-84.401,-1.0
1,7355,932000,2000,1,0,5,-1.0,40.0,0,70.0,...,1,0,4,82,1,-1,-1,42.889,-78.8901,178.0
2,7356,932000,2000,1,0,17,-10.5,38.5,0,75.0,...,1,0,4,29,1,-1,-1,41.506111,-81.699444,238.0
3,7357,932000,2000,1,0,10,-6.0,39.5,0,95.0,...,7,1,7,102,1,-1,-1,32.84,-96.911,163.4
4,7358,932000,2000,1,0,13,-2.5,44.0,0,69.0,...,2,1,2,50,1,-1,-1,44.501389,-88.062222,209.4


In [14]:
y = df_2000snfl['winner(0_away,1_home,2_tie)']
X = df_2000snfl.drop(columns = ['winner(0_away,1_home,2_tie)'])

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    random_state=1,
                                                    stratify=y)
X_train.shape

(4621, 30)

In [16]:
# Create scaler instance
X_scaler = skl.preprocessing.StandardScaler()

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [28]:
tuner2 = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2,
    overwrite=True)

In [29]:
tuner2.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 60 Complete [00h 00m 23s]
val_accuracy: 0.5639195442199707

Best val_accuracy So Far: 0.5976638793945312
Total elapsed time: 00h 07m 00s


In [30]:
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'tanh',
 'first_units': 9,
 'num_layers': 2,
 'units_0': 5,
 'units_1': 3,
 'units_2': 5,
 'units_3': 3,
 'units_4': 7,
 'units_5': 3,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [32]:
best_model = tuner2.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

49/49 - 0s - loss: 0.6642 - accuracy: 0.5977 - 275ms/epoch - 6ms/step
Loss: 0.6641671061515808, Accuracy: 0.5976638793945312
