In [21]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Make NumPy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
import keras_tuner as kt

import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots
from  IPython import display

import pathlib
import shutil
import tempfile

print(tf.__version__)

2.7.0


In [192]:
from numpy.random import seed
seed(42)
tf.random.set_seed(42)

In [22]:
data = pd.read_csv("./data/nba_final_data.csv")
data = data.sample(frac=1) # Shuffle data
y = data['PLUS_MINUS_HOME']
X = data.drop(['GAME_ID', "TEAM_ID_HOME", "TEAM_ID_AWAY", "GAME_DATE", "SEASON",
               "PLUS_MINUS_HOME", "MIN_HOME", 'WL_Home_modified'], axis=1)
print(y.shape)
print(X.shape)
print(X.columns)

(16635,)
(16635, 63)
Index(['FGA_HOME', 'FG_PCT_HOME', 'FG3A_HOME', 'FG3_PCT_HOME', 'FTA_HOME',
       'FT_PCT_HOME', 'OREB_HOME', 'DREB_HOME', 'AST_HOME', 'STL_HOME',
       'BLK_HOME', 'TOV_HOME', 'PF_HOME', 'FGA_AWAY', 'FG_PCT_AWAY',
       'FG3A_AWAY', 'FG3_PCT_AWAY', 'FTA_AWAY', 'FT_PCT_AWAY', 'OREB_AWAY',
       'DREB_AWAY', 'AST_AWAY', 'STL_AWAY', 'BLK_AWAY', 'TOV_AWAY', 'PF_AWAY',
       'PTS_PAINT_HOME', 'PTS_2ND_CHANCE_HOME', 'PTS_FB_HOME',
       'TEAM_TURNOVERS_HOME', 'TOTAL_TURNOVERS_HOME', 'TEAM_REBOUNDS_HOME',
       'PTS_OFF_TO_HOME', 'PTS_PAINT_AWAY', 'PTS_2ND_CHANCE_AWAY',
       'PTS_FB_AWAY', 'TEAM_TURNOVERS_AWAY', 'TOTAL_TURNOVERS_AWAY',
       'TEAM_REBOUNDS_AWAY', 'cluster_0h', 'cluster_1h', 'cluster_2h',
       'cluster_3h', 'cluster_4h', 'cluster_5h', 'cluster_6h', 'cluster_7h',
       'cluster_8h', 'cluster_9h', 'cluster_10h', 'cluster_11h', 'cluster_0a',
       'cluster_1a', 'cluster_2a', 'cluster_3a', 'cluster_4a', 'cluster_5a',
       'cluster_6a', 'cluster

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
# Define a normalizer for the data
normalizer =tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(X_train))

first = np.array(X_train[:1])

with np.printoptions(precision=2, suppress=True):
  print('First example: \n', first)
  print()
  print('Normalized: \n', normalizer(first).numpy())

First example: 
 [[84.6   0.46 27.9   0.35 22.5   0.72 11.3  35.4  22.6   6.5   3.7  14.2
  19.9  85.8   0.44 22.3   0.35 22.8   0.73  9.6  31.7  19.7   8.4   3.8
  11.8  18.3  40.6  16.7  10.4   0.4  14.2   9.5  16.8  39.6  11.7   7.9
   0.7  11.8   8.1   0.    0.    1.    0.    1.    4.    0.    0.    0.
   3.    0.    1.    2.    0.    1.    0.    0.    1.    0.    0.    0.
   5.    1.    0.  ]]

Normalized: 
 [[ 0.5   0.13  1.12 -0.25 -0.62 -0.98  0.16  1.36  0.08 -0.98 -1.28 -0.17
  -0.36  0.79 -0.28  0.23 -0.1  -0.21 -1.09 -0.84  0.22 -0.55  0.93 -0.86
  -1.97 -1.55 -0.43  1.82 -0.81 -0.82 -0.17  0.86  0.39 -0.25 -0.72 -1.68
   0.32 -1.97 -0.28 -1.03 -0.01  0.04  0.    1.44  0.99 -1.07  0.   -0.8
  -0.75 -0.98  0.77  1.33  0.    0.04  0.   -0.6  -1.09 -1.06  0.   -0.8
   0.38  0.69 -0.79]]


In [25]:
# Just an example of a normalizer 
# fga_model = tf.keras.Sequential([fga_normalizer,
#                                 layers.Dense(units=1)])

### Hyperparameter Tuning with Hyperband


In [154]:
# Define log dir 
logdir = pathlib.Path(tempfile.mkdtemp())/"tensorboard_logs"
shutil.rmtree(logdir, ignore_errors=True)

In [155]:
def model_builder(hp):
    model = keras.Sequential()
    model.add(normalizer)
    # hyperparameter boolean for performing dropout 
    dropout = hp.Boolean("dropout") 
    # hyperparameter for percent of units to dropout 
    if dropout:
        drop_percent = hp.Choice("drop_percent", [0.1, 0.25, 0.5])
        
    kernel_regularizer= regularizers.l2(0.001)
    
    # hyperparameter for choice of regularization strength
    regularization = hp.Choice("regularization_strength", [0.0001, 0.001, 0.01])
    
    model.add(keras.layers.Flatten(input_shape=(len(X_train.columns),)))
    for i in range(hp.Int("num_layers", 1, 3)):
        hp_units = hp.Int('units', min_value=8, max_value=64, step=4)
        model.add(
            keras.layers.Dense(units=hp_units,
                               activation='elu',
                               kernel_regularizer = regularizers.l2(regularization))
        )
        # Add dropout layer if dropout hyperparameter is True
        if dropout:
            keras.layers.Dropout(drop_percent)
            
    model.add(keras.layers.Dense(1)) # output layer
    
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
            loss='mse',
            metrics=['mse'])
    return model

Bayesian optimization for parameter selection, used below, provides better performance than kt.Hyperband()

In [159]:
# tuner = kt.Hyperband(model_builder,
#                      objective='val_mse',
#                      max_epochs=5000,
#                      factor=3,
#                      directory='HP_Tuner_log',
#                      project_name='NBAPredict',
#                      overwrite=True)

In [170]:
def get_callbacks(name):
    return [
    tfdocs.modeling.EpochDots(),
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=50),
    tf.keras.callbacks.TensorBoard(logdir/name),
    ]

In [161]:
# tuner.search(X_train, y_train, validation_split=0.2, callbacks=get_callbacks('t'))

Trial 1805 Complete [00h 00m 02s]
val_mse: 174.9766845703125

Best val_mse So Far: 150.0090789794922
Total elapsed time: 03h 31m 57s
INFO:tensorflow:Oracle triggered exit
INFO:tensorflow:Assets written to: best_model\assets


In [177]:
# # Get the optimal hyperparameter model
# best_hps=tuner.get_best_hyperparameters()[0]

# # Build and save it for future use 
# best_model = tuner.hypermodel.build(best_hps)
# best_model.save('best_model')

INFO:tensorflow:Assets written to: best_model\assets


In [178]:
# print(f"""
# The hyperparameter search is complete.
# Num_layers: {best_hps.get('num_layers')}
# Num_units: {best_hps.get('units')}
# Dropout: {best_hps.get('dropout')}
# Dropout rate: {best_hps.get('drop_percent')}
# Regularization strength: {best_hps.get("regularization_strength")}
# Learning Rate: {best_hps.get('learning_rate')}
# Epochs: {best_hps.get('tuner/epochs')}
# """)


The hyperparameter search is complete.
Num_layers: 2
Num_units: 28
Dropout: False
Dropout rate: 0.1
Regularization strength: 0.01
Learning Rate: 0.01
Epochs: 3



In [179]:
# best_model.fit(X_train, y_train, epochs=100, validation_split=0.2, callbacks=get_callbacks('best_model_hyperband'))

Epoch 1/100
Epoch: 0, loss:163.4794,  mse:162.7029,  val_loss:155.6715,  val_mse:154.8058,  
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100


<keras.callbacks.History at 0x228d86ba850>

### Hyperparameter Tuning With Bayesian Optimization

In [194]:
BO_tuner = kt.BayesianOptimization(model_builder,
                                  objective='val_mse',
                                  max_trials = 100,
                                  seed=42,
                                  overwrite=True
                                  )

In [195]:
BO_tuner.search(X_train, y_train, epochs=10, validation_split=0.2, callbacks=get_callbacks('bo_tuner'))

Trial 100 Complete [00h 00m 06s]
val_mse: 151.0582275390625

Best val_mse So Far: 148.90658569335938
Total elapsed time: 00h 12m 45s
INFO:tensorflow:Oracle triggered exit


In [217]:
# Get the optimal hyperparameter model
best_hps=BO_tuner.get_best_hyperparameters()[0]

# Build and save it for future use 
best_bo_model = BO_tuner.hypermodel.build(best_hps)
best_bo_model.save('bo_best_model')

INFO:tensorflow:Assets written to: bo_best_model\assets


In [218]:
print(f"""
The hyperparameter search is complete.
Num_layers: {best_hps.get('num_layers')}
Num_units: {best_hps.get('units')}
Dropout: {best_hps.get('dropout')}
Dropout rate: {best_hps.get('drop_percent')}
Regularization strength: {best_hps.get("regularization_strength")}
Learning Rate: {best_hps.get('learning_rate')}
""")


The hyperparameter search is complete.
Num_layers: 2
Num_units: 16
Dropout: False
Dropout rate: 0.5
Regularization strength: 0.01
Learning Rate: 0.01



In [220]:
history = best_bo_model.fit(X_train, y_train, epochs=50, validation_split=0.2, callbacks=get_callbacks('best_model_hyperband'))

Epoch 1/50
Epoch: 0, loss:162.9655,  mse:162.4525,  val_loss:153.0750,  val_mse:152.4841,  
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [224]:
validation_mse = history.history['val_mse']
min_mse = min(validation_mse)
min_idx = validation_mse.index(min_mse)
num_epochs = min_idx

In [225]:
best_bo_model = BO_tuner.hypermodel.build(best_hps)
history=best_bo_model.fit(X_train, y_train, epochs=num_epochs, callbacks=get_callbacks('best_model_hyperband'))
best_bo_model.save('final_model')

Epoch 1/9
Epoch: 0, loss:161.5488,  mse:161.0164,  
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9
INFO:tensorflow:Assets written to: final_model\assets


In [228]:
test_results = best_bo_model.evaluate(X_test, y_test)
dict(zip(best_bo_model.metrics_names, test_results))



{'loss': 150.54806518554688, 'mse': 147.68775939941406}