This notebook demonstrates how to use OptKeras, a Python package to optimize hyperparameters of Keras Deep Learning Models using Optuna.

Please see the GitHub repository of OptKeras for details:
https://github.com/Minyus/optkeras



In [1]:
try:
    from google import colab
    !pip3 install optuna==0.14.0
    !pip3 install optkeras==0.0.7
    # Alternatively you can install from the GitHub repository
    # !pip install git+https://github.com/Minyus/optkeras.git
except:
    print('Run in non-Colab environment.')

Run in non-Colab environment.


## Import modules

In [2]:
import numpy as np
import pandas as pd

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Activation, Flatten, Dense, Conv2D
from keras.layers import MaxPooling2D, Dropout, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD, Adagrad, RMSprop, Adam, Adadelta, Adamax, Nadam
import keras.backend as K

import keras
print('Keras', keras.__version__)

%tensorflow_version 1.x
import tensorflow as tf
print('TensorFlow', tf.__version__)

# import Optuna and OptKeras after Keras
import optuna 
print('Optuna', optuna.__version__)

from optkeras.optkeras import OptKeras
import optkeras
print('OptKeras', optkeras.__version__)

# (Optional) Disable messages from Optuna below WARN level.
optuna.logging.set_verbosity(optuna.logging.WARN) 

Using TensorFlow backend.


Keras 2.2.4
TensorFlow 1.14.0
Optuna 0.14.0
OptKeras 0.0.7


## Set up Dataset

In [3]:
dataset_name = 'MNIST'

if dataset_name in ['MNIST']:
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    img_x, img_y = x_train.shape[1], x_train.shape[2]
    x_train = x_train.reshape(-1, img_x, img_y, 1)
    x_test = x_test.reshape(-1, img_x, img_y, 1)   
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    num_classes = 10
    input_shape = (img_x, img_y, 1)

In [4]:
print('x_train: ', x_train.shape)
print('y_train', y_train.shape)
print('x_test: ', x_test.shape)
print('y_test', y_test.shape)
print('input_shape: ', input_shape )    

x_train:  (60000, 28, 28, 1)
y_train (60000,)
x_test:  (10000, 28, 28, 1)
y_test (10000,)
input_shape:  (28, 28, 1)


## A simple Keras model

In [5]:
model = Sequential()
model.add(Conv2D(
    filters=32,
    kernel_size=3,
    strides=1,
    activation='relu',
    input_shape=input_shape ))
model.add(Flatten())
model.add(Dense(num_classes, activation='softmax'))
model.compile(optimizer=Adam(),
            loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train,
          validation_data=(x_test, y_test), shuffle=True,
          batch_size=512, epochs=2)

W0801 15:40:08.024789 139680493655872 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0801 15:40:08.071118 139680493655872 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0801 15:40:08.076984 139680493655872 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0801 15:40:08.142308 139680493655872 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0801 15:40:08.153276 139680493655872 deprecation_wrappe

Train on 60000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f0997767ac8>

## Optimization of a simple Keras model without pruning

In [6]:
study_name = dataset_name + '_Simple'

""" Step 1. Instantiate OptKeras class
You can specify arguments for Optuna's create_study method and other arguments 
for OptKeras such as enable_pruning. 
"""

ok = OptKeras(study_name=study_name,
              monitor='val_acc',
              direction='maximize')


""" Step 2. Define objective function for Optuna """

def objective(trial):
    
    """ Step 2.1. Define parameters to try using methods of optuna.trial such as 
    suggest_categorical. In this simple demo, try 2*2*2*2 = 16 parameter sets: 
    2 values specified in list for each of 4 parameters 
    (filters, kernel_size, strides, and activation for convolution).
    """    
    model = Sequential()
    model.add(Conv2D(
        filters=trial.suggest_categorical('filters', [32, 64]),
        kernel_size=trial.suggest_categorical('kernel_size', [3, 5]),
        strides=trial.suggest_categorical('strides', [1, 2]),
        activation=trial.suggest_categorical('activation', ['relu', 'linear']),
        input_shape=input_shape ))
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer=Adam(),
                loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    """ Step 2.2. Specify callbacks(trial) and keras_verbose in fit 
    (or fit_generator) method of Keras model
    """
    model.fit(x_train, y_train, 
              validation_data=(x_test, y_test), shuffle=True,
              batch_size=512, epochs=2,
              callbacks=ok.callbacks(trial),
              verbose=ok.keras_verbose )
    
    """ Step 2.3. Return trial_best_value (or latest_value) """
    return ok.trial_best_value

""" Step 3. Run optimize. 
Set n_trials and/or timeout (in sec) for optimization by Optuna
"""
ok.optimize(objective, timeout = 3*60) # Run for 3 minutes for demo


[2019-08-01 15:40:48.302738] [OptKeras] Ready for optimization. (message printed as verbose is set to 1+)
[2019-08-01 15:40:48.489332]  (None) 
[2019-08-01 15:41:51.471535] Trial#: 0, value: 9.808000e-01| Best trial#: 0, value: 9.808000e-01, params: {'filters': 64, 'kernel_size': 5, 'strides': 1, 'activation': 'relu'}
[2019-08-01 15:42:28.326205] Trial#: 1, value: 9.718000e-01| Best trial#: 0, value: 9.808000e-01, params: {'filters': 64, 'kernel_size': 5, 'strides': 1, 'activation': 'relu'}
[2019-08-01 15:43:25.273919] Trial#: 2, value: 9.250000e-01| Best trial#: 0, value: 9.808000e-01, params: {'filters': 64, 'kernel_size': 5, 'strides': 1, 'activation': 'relu'}
[2019-08-01 15:43:37.728497] Trial#: 3, value: 9.187000e-01| Best trial#: 0, value: 9.808000e-01, params: {'filters': 64, 'kernel_size': 5, 'strides': 1, 'activation': 'relu'}
[2019-08-01 15:43:50.947701] Trial#: 4, value: 9.530000e-01| Best trial#: 0, value: 9.808000e-01, params: {'filters': 64, 'kernel_size': 5, 'strides': 1

In [7]:
""" Show Results """
print('Best trial number: ', ok.best_trial.number)
print('Best value:', ok.best_trial.value)
print('Best parameters: \n', ok.best_trial.params)

"""
Alternatively, you can access Optuna's study object to, for example, 
get the best parameters as well.
Please note that study.best_trial returns error if optimization trials 
were not completed (e.g. if you interrupt execution) as of Optuna 0.7.0, 
so usage of OptKeras is recommended.
"""
print("Best parameters (retrieved directly from Optuna)", ok.study.best_trial.params)

""" Check the Optuna CSV log file """
pd.options.display.max_rows = 8 # limit rows to display
print('Data Frame read from', ok.optuna_log_file_path, '\n')
display(pd.read_csv(ok.optuna_log_file_path))

""" Check the Keras CSV log file """
pd.options.display.max_rows = 8 # limit rows to display
print('Data Frame read from', ok.keras_log_file_path, '\n')
display(pd.read_csv(ok.keras_log_file_path))

Best trial number:  0
Best value: 0.9808000007629395
Best parameters: 
 {'filters': 64, 'kernel_size': 5, 'strides': 1, 'activation': 'relu'}
Best parameters (retrieved directly from Optuna) {'filters': 64, 'kernel_size': 5, 'strides': 1, 'activation': 'relu'}
Data Frame read from MNIST_Simple_Optuna.csv 



Unnamed: 0,number,state,value,datetime_start,datetime_complete,activation,filters,kernel_size,strides,_Datetime_epoch_begin,_Datetime_epoch_end,_Trial_num,acc,loss,val_acc,val_loss,system_attrs__number
0,0,TrialState.COMPLETE,0.9808,2019-08-01 15:40:48.313516,2019-08-01 15:41:51.362694,relu,64,5,1,2019-08-01 15:41:19.722212,2019-08-01 15:41:51.198223,0,0.972383,0.101373,0.9808,0.07121,0
1,1,TrialState.COMPLETE,0.9718,2019-08-01 15:41:51.363248,2019-08-01 15:42:28.214143,relu,32,5,1,2019-08-01 15:42:09.938641,2019-08-01 15:42:28.067890,1,0.9572,0.151473,0.9718,0.106681,1
2,2,TrialState.COMPLETE,0.925,2019-08-01 15:42:28.215066,2019-08-01 15:43:25.158421,linear,64,5,1,2019-08-01 15:42:56.725928,2019-08-01 15:43:24.981940,2,0.9153,0.295586,0.925,0.279367,2
3,3,TrialState.COMPLETE,0.9187,2019-08-01 15:43:25.159711,2019-08-01 15:43:37.611357,linear,64,5,2,2019-08-01 15:43:31.647896,2019-08-01 15:43:37.471011,3,0.913883,0.303336,0.9187,0.281933,3
4,4,TrialState.COMPLETE,0.953,2019-08-01 15:43:37.613132,2019-08-01 15:43:50.925040,relu,64,5,2,2019-08-01 15:43:44.556349,2019-08-01 15:43:50.782537,4,0.9377,0.22076,0.953,0.168444,4


Data Frame read from MNIST_Simple_Keras.csv 



Unnamed: 0,epoch,_Datetime_epoch_begin,_Datetime_epoch_end,_Trial_num,acc,loss,val_acc,val_loss
0,0,2019-05-10 07:32:28.812548,2019-05-10 07:32:29.642939,0,0.858783,0.532120,0.9315,0.232515
1,1,2019-05-10 07:32:29.699403,2019-05-10 07:32:30.322589,0,0.946100,0.191616,0.9623,0.141296
2,0,2019-05-10 07:32:30.764731,2019-05-10 07:32:31.711724,1,0.876700,0.438188,0.9179,0.289129
3,1,2019-05-10 07:32:31.764485,2019-05-10 07:32:32.542477,1,0.918517,0.288177,0.9218,0.280232
...,...,...,...,...,...,...,...,...
184,0,2019-08-01 15:43:25.579514,2019-08-01 15:43:31.557402,3,0.857333,0.532061,0.9161,0.302102
185,1,2019-08-01 15:43:31.647896,2019-08-01 15:43:37.471011,3,0.913883,0.303336,0.9187,0.281933
186,0,2019-08-01 15:43:37.989403,2019-08-01 15:43:44.466711,4,0.858100,0.536923,0.9282,0.252586
187,1,2019-08-01 15:43:44.556349,2019-08-01 15:43:50.782537,4,0.937700,0.220760,0.9530,0.168444


## Optimization of a Keras model using more Optuna's features such as pruning

In [8]:
study_name = dataset_name + '_Optimized'

ok = OptKeras( 
    # parameters for optuna.create_study
    storage='sqlite:///' + study_name + '_Optuna.db', 
    sampler=optuna.samplers.TPESampler(
        consider_prior=True, prior_weight=1.0, 
        consider_magic_clip=True, consider_endpoints=False, 
        n_startup_trials=10, n_ei_candidates=24, 
        seed=None), 
    pruner=optuna.pruners.SuccessiveHalvingPruner(
        min_resource=1, reduction_factor=4, min_early_stopping_rate=0), 
    study_name=study_name,
    load_if_exists=True,
    # parameters for OptKeras
    monitor='val_acc',
    direction='maximize',
    enable_pruning=True, 
    models_to_keep=1, # Either 1, 0, or -1 (save all models) 
    verbose=1,
    )

def objective(trial): 
    epochs = 2
    
    model = Sequential()
    
    if trial.suggest_int('Conv', 0, 1):  
        # 1 Convolution layer
        i = 1
        model.add(Conv2D(
            filters=int(trial.suggest_discrete_uniform(
                'Conv_{}_num_filters'.format(i), 32, 64, 32)), 
            kernel_size=tuple([trial.suggest_int(
                'Conv_{}_kernel_size'.format(i), 2, 3)] * 2),
            activation='relu',
            input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=tuple([trial.suggest_int(
                'Conv_{}_max_pooling_size'.format(i), 2, 3)] * 2)))
        model.add(Dropout(trial.suggest_discrete_uniform(
                'Conv_{}_dropout_rate'.format(i), 0, 0.5, 0.25) ))
        model.add(Flatten())        
    else:
        model.add(Flatten(input_shape=input_shape))
    # 2 Fully connected layers
    for i in np.arange(2) + 1:
        model.add(Dense(int(trial.suggest_discrete_uniform(
            'FC_{}_num_hidden_units'.format(i), 256, 512, 256))))
        if trial.suggest_int('FC_{}_batch_normalization'.format(i), 0, 1):
            model.add(BatchNormalization())
        model.add(Activation(trial.suggest_categorical(
            'FC_{}_acivation'.format(i), ['relu'])))
        model.add(Dropout(
            trial.suggest_discrete_uniform(
                'FC_{}_dropout_rate'.format(i), 0, 0.5, 0.25) ))
        
    # Output layer    
    model.add(Dense(num_classes, activation='softmax'))
    
    optimizer_dict = { \
    #'Adagrad': Adagrad(),
    'Adam': Adam() }
    
    model.compile(optimizer = optimizer_dict[
        trial.suggest_categorical('Optimizer', list(optimizer_dict.keys()))],
          loss='sparse_categorical_crossentropy', metrics=['accuracy'])    
    
    if ok.verbose >= 2: model.summary()
    
    batch_size = trial.suggest_int('Batch_size', 256, 256) 
    data_augmentation = trial.suggest_int('Data_augmentation', 0, 1)
    
    if not data_augmentation:
        # [Required] Specify callbacks(trial) in fit method
        model.fit(x_train, y_train, batch_size=batch_size,
                  epochs=epochs, validation_data=(x_test, y_test),
                  shuffle=True,
                  callbacks=ok.callbacks(trial),
                  verbose=ok.keras_verbose )
    
    if data_augmentation:
        # This will do preprocessing and realtime data augmentation:
        datagen = ImageDataGenerator(
            width_shift_range=[-1, 0, +1], # 1 pixel
            height_shift_range=[-1, 0, +1], # 1 pixel
            zoom_range=[0.95,1.05],  # set range for random zoom
            horizontal_flip=False,  # disable horizontal flip
            vertical_flip=False )  # disable vertical flip
        datagen.fit(x_train)
        # [Required] Specify callbacks(trial) in fit_generator method
        model.fit_generator(datagen.flow(x_train, y_train, 
                                         batch_size=batch_size),
                            epochs=epochs, validation_data=(x_test, y_test),
                            steps_per_epoch=len(x_train) // batch_size,
                            callbacks=ok.callbacks(trial),
                            verbose=ok.keras_verbose )
    
    # [Required] return trial_best_value (recommended) or latest_value
    return ok.trial_best_value

# Set n_trials and/or timeout (in sec) for optimization by Optuna
ok.optimize(objective, timeout=60) # Run for 1 minute for demo


[2019-08-01 15:43:51.160028] [OptKeras] Ready for optimization. (message printed as verbose is set to 1+)


W0801 15:43:51.506155 139680493655872 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
W0801 15:44:23.189004 139680493655872 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.



## Randomized Grid Search of a simple Keras model

In [9]:
study_name = dataset_name + '_GridSearch'

""" To run randomized grid search, set random_grid_search_mode True """
ok = OptKeras(study_name=study_name, random_grid_search_mode=True)


def objective(trial):
    model = Sequential()
    model.add(Conv2D(
        filters=trial.suggest_categorical('filters', [32, 64]),
        kernel_size=trial.suggest_categorical('kernel_size', [3, 5]),
        strides=trial.suggest_categorical('strides', [1]),
        activation=trial.suggest_categorical('activation', ['relu', 'linear']),
        input_shape=input_shape))
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer=Adam(),
                  loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    model.fit(x_train, y_train,
              validation_data=(x_test, y_test), shuffle=True,
              batch_size=512, epochs=2,
              callbacks=ok.callbacks(trial),
              verbose=ok.keras_verbose)

    return ok.trial_best_value

""" Set the number of parameter sets as n_trials for complete grid search """
ok.random_grid_search(objective, n_trials=2*2*2)  # 2*2*2 = 8 param sets


[2019-08-01 15:46:20.279641] [OptKeras] Ready for optimization. (message printed as verbose is set to 1+)
[2019-08-01 15:46:20.376395]  (None) 
[2019-08-01 15:46:54.680899] Completed:  12% (    1 /     8)
[2019-08-01 15:46:54.783971] Trial#: 0, value: 1.156144e-01| Best trial#: 0, value: 1.156144e-01, params: {'filters': 32, 'kernel_size': 5, 'strides': 1, 'activation': 'relu'}
[2019-08-01 15:47:26.458536] Completed:  25% (    2 /     8)
[2019-08-01 15:47:26.580928] Trial#: 1, value: 2.880034e-01| Best trial#: 0, value: 1.156144e-01, params: {'filters': 32, 'kernel_size': 5, 'strides': 1, 'activation': 'relu'}
[2019-08-01 15:47:55.503160] Completed:  38% (    3 /     8)
[2019-08-01 15:47:55.644526] Trial#: 2, value: 2.756525e-01| Best trial#: 0, value: 1.156144e-01, params: {'filters': 32, 'kernel_size': 5, 'strides': 1, 'activation': 'relu'}
[2019-08-01 15:48:56.419806] Completed:  50% (    4 /     8)
[2019-08-01 15:48:56.579812] Trial#: 3, value: 2.845126e-01| Best trial#: 0, value: 

In [10]:
""" ## The end of code. """

' ## The end of code. '

Please feel free to post questions or feedback [here](
https://github.com/Minyus/optkeras/issues
)
