# Tuning hyperparameters automatically.

Design of a Convolutional Neural Network (CNN) trained to classify the digits of the MNIST dataset using Keras Tuner to randomly explore the following hyperparameters.


*   The network may have between 1 and 3 layer blocks (Conv2D+MaxPooling).
*   The Conv2D layers may have filters of size 3, 5 or 7 to choose from.
*   Conv2D layers may have a sigmoid or relu activation function.
*   The Learning Rate is chosen from the list [0.5, 0.1, 0.05, 0.01, 005, 001.

EarlyStopping is configured to watch if the val_acc worsens.


In [None]:
# Install the Keras Tuner package
!pip install keras_tuner -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m169.6/169.6 KB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import numpy as np
import keras_tuner as kt
from tensorflow                  import keras
from tensorflow.keras.layers     import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.utils      import to_categorical
from tensorflow.keras            import Sequential
from sklearn.model_selection     import train_test_split

In [None]:
# We load the MNIST dataset
mnist = np.genfromtxt('./sample_data/mnist_train_small.csv', delimiter=',')

# Separate the labels from the images
X = mnist[:, 1:]
Y = mnist[:, 0:1]

# We scale the data and One-Hot Encoding the output
Xn = X / 255
Yn = to_categorical(Y)

# We use Sklearn's data splitter to train/test
from sklearn.model_selection import train_test_split

# Partition train/test split to monitor overfitting
X_train, X_test, Y_train, Y_test = train_test_split(Xn, Yn, test_size=0.3)

In [None]:
# Organize the data in the form of tensors
X_train = X_train.reshape(14000, 28, 28, 1)
X_test = X_test.reshape(6000, 28, 28, 1)

In [None]:
def build_model(hp):

  # Create the model
  model = Sequential()

  # We create as many layers as decided in mlp_layers.
  for i in range(hp.Int("Red_Neuronal_Convolucional", 1, 3)):

    # Create the first convolutional layer
    model.add(Conv2D(filters=hp.Choice('filters', [3, 5, 7]), kernel_size=3, padding="same", activation=hp.Choice('activation', ["sigmoid", "relu"]), input_shape=(28, 28, 1)))

    # Add a layer of MaxPooling
    model.add(MaxPooling2D())

  # Convert the feature maps to vector
  model.add(Flatten())

  # We create the last layer with Dense connections
  model.add(Dense(units=10, activation='softmax'))

  # We can also use the Choice method to define a list of values to choose from automatically
  model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', [0.5, 0.1, 0.05, 0.01, 0.005, 0.001])),
                loss="mse",
                metrics=["acc"])
  
  return model


In [None]:
# We initialize the object that will contain the hyperparameters
hp = kt.HyperParameters()

# Let's check that the build_model method generates a model
# of Keras functional and that everything is correct

model = build_model(hp) # We generate the model
model(X_train[:100])    # We initialize it with data. (necessary before making the summary)
model.summary()         # We generate a summary of the model

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 3)         30        
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 3)        0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 588)               0         
                                                                 
 dense (Dense)               (None, 10)                5890      
                                                                 
Total params: 5,920
Trainable params: 5,920
Non-trainable params: 0
_________________________________________________________________


In [None]:
# We configure Keras Tuner to
# make 10 random searches
# of the hyperparameter combination
# that optimizes the validation accuracy

tuner = kt.RandomSearch(
    build_model,
    max_trials=100,
    overwrite=True,
    objective="val_acc",
    directory="/tmp/tb")

# Once we have configured our "fitter"
# random, we execute it with our data
# of training. We use an
# subset of total data to speed up
# the training times in this phase.

tuner.search(
    X_train[:5000],
    Y_train[:5000],
    validation_split=0.2,
    epochs=25,
)

Trial 100 Complete [00h 01m 24s]
val_acc: 0.9330000281333923

Best val_acc So Far: 0.9710000157356262
Total elapsed time: 01h 52m 48s


In [None]:
# Once we have found the most optimal combination among all
# the experiments carried out, now we can access the best
# model all of them, and use it the same way as
# we have previously worked with Keras.

best_model = tuner.get_best_models()[0]
best_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fce35ce94c0>

In [None]:
# We print a detailed summary of the model with the hyperparameters tuned by Keras Tuner.
print(best_model.summary())

# We print the values of the hyperparameters selected as the best by Keras Tuner during the random hyperparameter search.
print(tuner.get_best_hyperparameters()[0].values)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 7)         70        
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 7)        0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 7)         448       
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 7, 7, 7)          0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 343)               0         
                                                                 
 dense (Dense)               (None, 10)                3

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

# We configure the EarlyStopping
early_stop = EarlyStopping(monitor="val_acc", patience=3)

# We train the model
best_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), 
               callbacks=[early_stop], epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100


<keras.callbacks.History at 0x7fce35ff1fd0>