# L09 05/04/224

# Hyperparameters tuning, Keras tuner implementation

Hyperparameter tuning looks for the best parameters in your CNN implementation. This is done by optimizing a metric based on the validation sample.

We can search for the best values in a dynamical model for the following parameters:
- Integer hyperparameter with `hp.Int()`
- Which activation function to use with `hp.Choice()`
- Float hyperparameters (e.g. the learning rate) with `hp.Float()`
- Add or remove layers with a boolean choice function with `hp.Boolean()`

In [None]:
import keras_tuner as kt # the new boy in town

from keras.layers import Conv2D, MaxPooling2D, Input, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from keras.callbacks import EarlyStopping
from astropy.io import fits
from sklearn.model_selection import train_test_split
from astropy.utils.data import download_file
import numpy as np

In [None]:
%%time
version = 'pristine'
file_url = 'https://archive.stsci.edu/hlsps/deepmerge/hlsp_deepmerge_hst-jwst_acs-wfc3-nircam_illustris-z2_f814w-f160w-f356w_v1_sim-'+version+'.fits'
hdu = fits.open(download_file(file_url, cache=True, show_progress=True))

X = np.asarray(hdu[0].data).astype('float32')
y = np.asarray(hdu[1].data).astype('float32')

X = np.asarray(hdu[0].data).astype('float32')
y = np.asarray(hdu[1].data).astype('float32')

random_state = 42
X_train, X_valtest, y_train, y_valtest = train_test_split(X, y, test_size=0.3, random_state=random_state)
X_test, X_val, y_test, y_val = train_test_split(X_valtest, y_valtest, test_size=0.5, random_state=random_state)

imsize = np.shape(X_train)[2]

X_train = X_train.reshape(-1, imsize, imsize, 3)
X_valtest = X_valtest.reshape(-1, imsize, imsize, 3)

X_val = X_val.reshape(-1, imsize, imsize, 3)
X_test = X_test.reshape(-1, imsize, imsize, 3)

### First we define a dynamic model:

In [None]:
def build_model(hp):
    model = Sequential()
    
    # Convolutional layer 1
    
    hp_kernel_1 = hp.Int('kernel_1',
                         min_value=4,
                         max_value=10,
                              step=2)
    hp_kernel_size_1 = hp.Int('kernel_size_1',
                              min_value=3,
                              max_value=11,
                                   step=2)
    model.add(Conv2D(hp_kernel_1, (hp_kernel_size_1, hp_kernel_size_1), strides=(1, 1), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid'))
    model.add(Dropout(0.25))
    
    # Convolutional layer 2
    
    hp_kernel_2 = hp.Int('kernel_2',
                         min_value=8,
                         max_value=20,
                              step=2)
    hp_kernel_size_2 = hp.Int('kernel_size_2',
                              min_value=3,
                              max_value=11,
                                   step=2)
    model.add(Conv2D(hp_kernel_2, (hp_kernel_size_2, hp_kernel_size_2), strides=(1, 1), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid'))
    model.add(Dropout(0.25))

    # Convolutional layer 3
    
    hp_kernel_3 = hp.Int('kernel_3',
                         min_value=16,
                         max_value=40,
                              step=2)
    hp_kernel_size_3 = hp.Int('kernel_size_3',
                              min_value=3,
                              max_value=11,
                                   step=2)
    model.add(Conv2D(hp_kernel_3, (hp_kernel_size_3, hp_kernel_size_3), strides=(1, 1), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid'))
    model.add(Dropout(0.25))
    
    
    # Fully connected layer
    
    model.add(Flatten())
    model.add(Dense(64, activation='softmax'))
    model.add(Dense(32, activation='softmax'))
    model.add(Dense(1, activation='sigmoid')) # output layer

    lr = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    opt = Adam(learning_rate=lr)
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    
    return model
        
        
    

In [None]:
nb_epochs = 100
batch_size = 128
shuffle = True

stop_early = EarlyStopping(monitor='val_loss', patience=5)

hyperpar_names =    ['kernel_1', 'kernel_size_1', 'kernel_2', 'kernel_size_2', 'kernel_3', 'kernel_size_3', 'learning_rate']

tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=25,
    executions_per_trial=3,
    directory='my_dir',
    project_name='hypeparameter_optimization')

tuner.search_space_summary()

In [None]:
tuner.search(X_train, y_train, epochs = nb_epochs, batch_size  = batch_size, shuffle = shuffle, validation_data = (X_val, y_val))

## Type of tuners:

- **RandomSearch**, it doesn't learn from previously tested parameter combinations, and samples parameter combinations from a search space randomly
- **BayesianOptimization**, doesn't sample hyperparameter combinations randomly, it follows a probabilistic approach under the hood. This approach takes into account already tested combinations and uses this information to sample the next combination for a test.
- **Hyperband**, Optimized version of RandomSearch. The algorithm trains a large number of models for a few epochs and carries forward only the top-perfoming **half** of models to the next round. Hyperband determines the number of models to train in a bracket by computing $ 1 + \log_{factor} (\text{max\_epochs}) $ and rounding it up to the next integer. It's like a tournament, round of 32, round of 16 ... semifinals, finals, winner.