In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import sklearn.preprocessing as skp
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
from sklearn.decomposition import PCA
import sklearn.ensemble
import sklearn.linear_model
from sklearn import neighbors
from sklearn.model_selection import GridSearchCV
import sklearn.metrics as metrics
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dropout
import keras_tuner as kt
from tensorflow.keras.layers import Dropout
from tensorflow import keras

import os
# change os directory up one level
os.chdir(os.path.dirname(os.getcwd()))
from api_helpers.game_stats_helpers import load_past_n_games
# change os directory back 
os.chdir(os.path.join(os.getcwd(), 'selected_models'))


ModuleNotFoundError: No module named 'keras_tuner'

### Data Preperation

In [2]:
def get_data():
    all_games_df = pd.read_csv("../data/all_games.csv",encoding="utf-8")
    all_games_df.drop("Unnamed: 0", axis=1,inplace=True)
    all_games_df = all_games_df.dropna()
    all_games_df = all_games_df[all_games_df['WL_A'] != ' ']
    X = all_games_df.drop(columns=["WL_A", "WL_B"])
    y = all_games_df["WL_A"]
    le = skp.LabelEncoder()
    y = le.fit_transform(y)
    return all_games_df, X, y

def data_prep(all_games_df, columns=["FG_PCT","FT_PCT", "OREB", "TOV", "DREB", "AST"]):
    columns_a = [column + "_A" for column in columns]
    columns_b = [column + "_B" for column in columns]

    n_games_df = load_past_n_games(all_games_df=all_games_df, columns=columns, n=20)
    n_games_df = n_games_df.dropna()

    x_columns = columns_a + columns_b
    y_column = "WL_A"

    #normalize x_data
    scaler = skp.StandardScaler()
    n_games_df[x_columns] = scaler.fit_transform(n_games_df[x_columns])
    # save scaler
    # joblib.dump(scaler, 'last20_scaler.bin')

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(n_games_df[x_columns], n_games_df[y_column], test_size=0.2, random_state=42)

    return X_train, X_test, y_train, y_test

In [3]:
all_games_df, X,y = get_data()
print(all_games_df)
X_train, X_test, y_train, y_test = data_prep(all_games_df)

       SEASON_ID   TEAM_ID_A TEAM_ABBREVIATION_A        TEAM_NAME_A   GAME_ID  \
0          22023  1610612737                 ATL      Atlanta Hawks  22301159   
1          22023  1610612737                 ATL      Atlanta Hawks  22301147   
2          22023  1610612737                 ATL      Atlanta Hawks  22301104   
3          22023  1610612737                 ATL      Atlanta Hawks  22301076   
4          22023  1610612737                 ATL      Atlanta Hawks  22301060   
...          ...         ...                 ...                ...       ...   
52468      21996  1610612766                 CHH  Charlotte Hornets  29600141   
52469      21996  1610612766                 CHH  Charlotte Hornets  29600107   
52470      21996  1610612766                 CHH  Charlotte Hornets  29600064   
52471      21996  1610612766                 CHH  Charlotte Hornets  29600044   
52472      21996  1610612766                 CHH  Charlotte Hornets  29600016   

        GAME_DATE    MATCHU

In [5]:


def model_builder(hp):
    model = Sequential()

    hp_units = hp.Int("units", min_value=16, max_value=128, step=32)

    hp_learning_rate = hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])

    activations = hp.Choice("activation", values=["relu", "tanh"])
    final_activation = hp.Choice("activation", values=["sigmoid", "softmax"])

    model.add(keras.Input(shape=(14,)))
    model.add(BatchNormalization())
    model.add(Dropout(hp.Float("dropout", min_value=0.0, max_value=0.5, step=0.1)))
    model.add(Dense(units=hp_units, activation=activations))
    model.add(Dropout(hp.Float("dropout", min_value=0.0, max_value=0.5, step=0.1)))
    model.add(Dense(units=hp_units, activation=activations))
    model.add(Dropout(hp.Float("dropout", min_value=0.0, max_value=0.5, step=0.1)))
    model.add(Dense(2, activation="sigmoid"))

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )

    return model




tuner = kt.Hyperband(
    model_builder,
    objective=kt.Objective("val_accuracy", direction="max"),
    max_epochs=10,
    factor=3,
    directory="my_dir",
    project_name="third",
)

print(tuner)

stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)

tuner.search(X_train, y_train, epochs=5,
             validation_split=0.2, callbacks=[stop_early])

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
model = tuner.hypermodel.build(best_hps)

print(best_hps)

print(
    f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
"""
)

NameError: name 'kt' is not defined