In [1]:
import tensorflow as tf
import keras_tuner as kt
from sklearn.model_selection import train_test_split
import pandas as pd
tf.__version__

Using TensorFlow backend


'2.14.0'

In [2]:
data = pd.read_csv("../input_data/CSVs/connect-4.csv")
data.head()

Unnamed: 0,a1,a2,a3,a4,a5,a6,b1,b2,b3,b4,...,f4,f5,f6,g1,g2,g3,g4,g5,g6,class
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
2,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2


In [3]:
target = data.pop('class')
target.head()

0    2
1    2
2    2
3    2
4    2
Name: class, dtype: int64

In [4]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

In [5]:
tf.random.set_seed(8)

# Model builder for the tuner to find optimal hyperparams with - in this case the amount of units within the first layer and learning rate to use
def model_builder(hp):
    model = tf.keras.Sequential()
    hp_units = hp.Int('units', min_value=128, max_value=512, step=64)

    reg_fc1 = tf.keras.layers.Dense(hp_units, input_shape=(42,), activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l=0.0001))
    reg_fc2 = tf.keras.layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l=0.0001))
    reg_fc3 = tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l=0.0001))
    reg_fc4 = tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l=0.0001))
    reg_fc5 = tf.keras.layers.Dense(3, activation='softmax')
    
    model.add(reg_fc1)
    model.add(reg_fc2)
    model.add(reg_fc3)
    model.add(reg_fc4)
    model.add(reg_fc5)

    loss = tf.keras.losses.SparseCategoricalCrossentropy()
    hp_learning_rate = hp.Choice('learning_rate', values = [0.01, 0.001, 0.0001])
    optimizer = tf.keras.optimizers.Adam(hp_learning_rate)
    model.compile(optimizer = optimizer, loss = loss, metrics = ['accuracy'])

    return model

In [7]:
# Use Hyperband tuner, which still randomly samples values, but will automatically cut poorly performing parameters short to save time and improve efficiency
tuner = kt.Hyperband(model_builder, objective='val_accuracy', max_epochs=5, project_name="Connect 4 Hyperband")

# Search for and extract the best parameters to use
tuner.search(X_train, y_train, validation_data=(X_test, y_test))
best_hps = tuner.get_best_hyperparameters()[0]

Trial 10 Complete [00h 00m 49s]
val_accuracy: 0.7520722150802612

Best val_accuracy So Far: 0.8099467158317566
Total elapsed time: 00h 04m 48s


In [8]:
# Select the best values found
best_units = best_hps.get('units')
best_units

384

In [9]:
best_lr = best_hps.get('learning_rate')
best_lr

0.001

In [10]:
# Build and train the model based on the optimal hyperparameters
model = tuner.hypermodel.build(best_hps)
model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x25cc68f08d0>

The time reduction of the *Hyperband* tuner shows how more thorough random testing can be done to test a variety of model structures while remaining viable for relatively short time constraints, with performance being very high (~81% validation accuracy with that remaining very close to the ~82% training accuracy)