# Jupyter Notebook to tune the hyperparameters of a model

In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import kerastuner as kt

np.random.seed(42)
tf.random.set_seed(42)

2021-12-24 06:59:20.808314: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
  import kerastuner as kt


### lists possible devices (CPU, GPU), used to check if GPU is recognized/exists

In [2]:
tf.config.get_visible_devices()

2021-12-24 06:59:22.338947: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-12-24 06:59:22.339769: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-12-24 06:59:22.385257: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-24 06:59:22.385385: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.8225GHz coreCount: 20 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 298.32GiB/s
2021-12-24 06:59:22.385400: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2021-12-24 06:59:22.386576: I tensorflow/stream_executor/platform/d

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

#### preprocessing of the images applied when loading image data set from disk with tensorflows flow_from_directory

In [7]:
image_gen = keras.preprocessing.image.ImageDataGenerator(rotation_range=20,  # rotate the image 20 degrees
                                                         width_shift_range=0.2,
                                                         height_shift_range=0.2,
                                                         rescale=1 / 255,  # Rescale the image by normalzing it.
                                                         shear_range=0.15,
                                                         # Shear means cutting away part of the image (max 20%)
                                                         zoom_range=0.15,  # Zoom in by 15% max
                                                         horizontal_flip=True,  # Allow horizontal flipping
                                                         fill_mode='nearest'
                                                         # Fill in missing pixels with the nearest filled value
                                                         )
t_image_gen = keras.preprocessing.image.ImageDataGenerator()

In [8]:
# path to the image data set

train_data_path = './data/data_full/train'  #local notebook
test_data_path = './data/data_full/test'  #local notebook
validation_data_path = './data/data_full/val'  #local notebook


In [9]:
# generate training set by loading the images from their directories with flow_from_directory
# important: the folder structure has to match! i.e {train} -> {ok,def}
# at the "same time" the data augmentation is applied on the images through the ImageDataGenerator
train_image_gen = image_gen.flow_from_directory(train_data_path,
                                                target_size=(224, 224),
                                                class_mode='binary')

Found 48271 images belonging to 2 classes.


In [10]:
# generate validation set by loading the images from their directories with flow_from_directory
# important: the folder structure has to match! i.e {validation} -> {ok,def}
# at the "same time" the data augmentation is applied on the images through the ImageDataGenerator

valid_image_gen = t_image_gen.flow_from_directory(validation_data_path,
                                                target_size=(224, 224),
                                                class_mode='binary')

Found 6033 images belonging to 2 classes.


In [11]:


test_image_gen = t_image_gen.flow_from_directory(test_data_path,
                                               target_size=(224, 224),
                                               class_mode='binary')

Found 6036 images belonging to 2 classes.


### used to load saved model with its weights

In [12]:
def load_model(model_path, weight_path):
    # load json and create model
    file = open(model_path, 'r')
    model_json = file.read()
    file.close()
    loaded_model = keras.models.model_from_json(model_json)
    # load weights
    loaded_model.load_weights(weight_path)
    optimizer = keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999)
    loaded_model.compile(loss="binary_crossentropy", optimizer=optimizer,
                         metrics=['accuracy', 'Recall', 'Precision', 'AUC'])
    return loaded_model

### used to build the base model using predefined architectures
currently: vgg16, xception, resnet

In [13]:
def build_base_model(architecture, weights):
    input = tf.keras.Input(shape=(224, 224, 3))
    if architecture == 'vgg16':
        return tf.keras.applications.vgg16.VGG16(weights=weights, include_top=False, input_tensor=input)
    if architecture == 'xception':
        return tf.keras.applications.xception.Xception(weights=weights, include_top=False, input_tensor=input)
    if architecture == 'resnet':
        return tf.keras.applications.resnet.ResNet50(weights=weights, include_top=False, input_tensor=input)

### builds the model for hyperparameter tuning of the first pretraining step

In [14]:
def build_model(hp):
    base_model = build_base_model('vgg16', 'imagenet')
    flat = keras.layers.Flatten(name='flatten')(base_model.output)
    dense_1 = keras.layers.Dense(units=hp.Int('units', min_value=1000, max_value=3000, step=400))(flat)
    dropout = keras.layers.Dropout(rate=hp.Float('dropout_1', min_value=0.0, max_value=0.5, default=0.25, step=0.05, ))(
        dense_1)
    batch = keras.layers.BatchNormalization()(dropout)
    output = keras.layers.Dense(1, activation='sigmoid')(batch)
    model = tf.keras.Model(base_model.input, output)
    for layer in base_model.layers:
        layer.trainable = False

    learning_rate = hp.Float('learning_rate', min_value=1e-5, max_value=1e-2, sampling='LOG', default=1e-3)
    beta_1 = hp.Float("beta_1", min_value=0.5, max_value=0.9, step=0.1, default=0.9)
    beta_2 = hp.Float("beta_2", min_value=0.800, max_value=0.999, step=0.001, default=0.999)
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate,beta_1=beta_1,beta_2=beta_2),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )
    return model

### defines the tuner

In [15]:
tuner = kt.Hyperband(
    hypermodel=build_model,
    hyperband_iterations=2,
    objective='val_accuracy',
    seed=42,
    max_epochs=5,
    executions_per_trial=1,
    directory='vgg16_tuner_v2',
    project_name='first_training_step'
)

2021-12-24 07:00:05.580367: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-24 07:00:05.580825: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-24 07:00:05.580994: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.8225GHz coreCount: 20 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 298.32GiB/s
2021-12-24 07:00:05.581029: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library li

### uses the tuner with the defined parameters and defined search spaces to find the best parameter

In [None]:
tuner.search(train_image_gen, epochs=5, validation_data=valid_image_gen)


Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
units             |2200              |?                 
dropout_1         |0                 |?                 
learning_rate     |0.00016812        |?                 
beta_1            |0.6               |?                 
beta_2            |0.977             |?                 
tuner/epochs      |2                 |?                 
tuner/initial_e...|0                 |?                 
tuner/bracket     |1                 |?                 
tuner/round       |0                 |?                 



2021-12-24 07:00:10.178544: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-12-24 07:00:10.197084: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3999980000 Hz


Epoch 1/2


2021-12-24 07:00:10.754236: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2021-12-24 07:00:11.088069: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
2021-12-24 07:00:11.599398: W tensorflow/stream_executor/gpu/asm_compiler.cc:63] Running ptxas --version returned 256
2021-12-24 07:00:11.620887: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: ptxas exited with non-zero error code 256, output: 
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.




### get the best hyperparameters found by the tuner

In [15]:
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The best number of units is {best_hps.get('units')}, the best value for the learning rate is {best_hps.get('learning_rate')} and the best rate for dropout is {best_hps.get('dropout_1')} and the best rate for beta1 is {best_hps.get('beta_1')} and the best rate for beta2 is {best_hps.get('beta_2')}
""")


The best number of units is 1400, the best value for the learning rate is 6.89841179756804e-05 and the best rate for dropout is 0.05 and the best rate for beta1 is 0.8999999999999999 and the best rate for beta2 is 0.9650000000000002



## build model for finding best hyperparameters for second pretraining step

In [11]:
def call_existing_model(lr, beta_1, beta_2):
    model = load_model('saved_models/first_train_step/tuned_params/full_data/vgg16_first_train_step.json', 'saved_models/first_train_step/tuned_params/full_data/vgg16_first_train_step_weights.h5')
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr, beta_1=beta_1, beta_2=beta_2),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )
    return model


def build_model_for_second_step(hp):
    lr = hp.Float('learning_rate', min_value=1e-5, max_value=1e-2, sampling='LOG', default=1e-3)
    beta_1 = hp.Float("beta_1", min_value=0.5, max_value=0.9, step=0.1, default=0.9)
    beta_2 = hp.Float("beta_2", min_value=0.800, max_value=0.999, step=0.001, default=0.999)
    model = call_existing_model(lr=lr, beta_1=beta_1, beta_2=beta_2)
    return model


build_model_for_second_step(kt.HyperParameters())

2021-12-23 08:02:36.526571: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-12-23 08:02:36.526961: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-23 08:02:36.527094: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1080 computeCapability: 6.1
coreClock: 1.8225GHz coreCount: 20 deviceMemorySize: 7.93GiB deviceMemoryBandwidth: 298.32GiB/s
2021-12-23 08:02:36.527121: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library li

<tensorflow.python.keras.engine.functional.Functional at 0x7f0f940f3d30>

### define tuner

In [12]:
tuner = kt.Hyperband(
    hypermodel=build_model_for_second_step,
    objective="val_accuracy",
    hyperband_iterations=2,
    executions_per_trial=1,
    directory="vgg16_tuner",
    project_name="second_training_step",
)

### define early stoping callback

In [13]:
custom_early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)

### look for the best hyperparameters

In [14]:
tuner.search(train_image_gen, epochs=100, validation_data=valid_image_gen, callbacks=[custom_early_stopping])

Trial 43 Complete [00h 20m 10s]
val_accuracy: 0.8652411699295044

Best val_accuracy So Far: 0.9996684789657593
Total elapsed time: 15h 07m 02s

Search: Running Trial #44

Hyperparameter    |Value             |Best Value So Far 
learning_rate     |0.00031788        |1.9508e-05        
beta_1            |0.5               |0.7               
beta_2            |0.944             |0.851             
tuner/epochs      |2                 |2                 
tuner/initial_e...|0                 |0                 
tuner/bracket     |4                 |4                 
tuner/round       |0                 |0                 

Epoch 1/2
 150/1509 [=>............................] - ETA: 8:31 - loss: 0.1775 - accuracy: 0.9453

KeyboardInterrupt: 

In [16]:
# Get the top 2 models.
# models = tuner.get_best_models(num_models=2)
# best_model = models[0]
# Build the model.
# Needed for `Sequential` without specified `input_shape`.
# best_model.build(input_shape=(None, 224, 224))
# best_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [29]:
tuner.results_summary()

Results summary
Results in hp_tuner/run3
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
lr: 0.00035094611635709755
beta_1: 0.7999999999999999
beta_2: 0.8440000000000001
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 4
tuner/round: 0
Score: 0.9949949979782104
Trial summary
Hyperparameters:
lr: 0.00036937225014755195
beta_1: 0.8999999999999999
beta_2: 0.9590000000000002
tuner/epochs: 34
tuner/initial_epoch: 12
tuner/bracket: 4
tuner/round: 3
tuner/trial_id: 55a43167e971016004cb478df12e1b69
Score: 0.9939939975738525
Trial summary
Hyperparameters:
lr: 0.0005154994394291512
beta_1: 0.8999999999999999
beta_2: 0.9150000000000001
tuner/epochs: 34
tuner/initial_epoch: 12
tuner/bracket: 4
tuner/round: 3
tuner/trial_id: ef2e139c3a403f908682720e74f63ac1
Score: 0.9929929971694946
Trial summary
Hyperparameters:
lr: 0.00036937225014755195
beta_1: 0.8999999999999999
beta_2: 0.9590000000000002
tuner/epochs: 100
tuner/initial_epoch: 34
tuner/

In [11]:
# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The value for the learning rate is {best_hps.get('learning_rate')}, the best value for beta_1 is {best_hps.get('beta_1')} and the best rate beta_2 is {best_hps.get('beta_2')}
""")