This notebook demonstrates how to use OptKeras, wrapper of Keras and Optuna

##  Set up Google Colab environment

To run in Google Colab, specify a directory in Google Drive. (GPU is recommended.)

To run in an environment other than Google Colab, just skip this code.

In [0]:
try:
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
    import os
    # Specify a directory in Google Drive
    dir = '/content/drive/My Drive/Colab Notebooks/OptKeras'
    os.chdir(dir)    
    # Check the environment info
    print('## Current working directory: ', os.getcwd())
    print('## Check the uptime. (Google Colab reboots every 12 hours)')
    !cat /proc/uptime | awk '{print "Uptime is " $1 /60 /60 " hours (" $1 " sec)"}'
    print('## Check the GPU info')
    !nvidia-smi
    print('## Check the OS') 
    !cat /etc/issue
    print('## Check the Python version') 
    !python --version
    print('## Check the memory')
    !free -h
    print('## Check the disk')
    !df -h
except:
    print('Run the code assuming the environment is not Google Colab.')

## Install Optuna 0.7.0

In [0]:
!pip install optuna==0.7.0

## Install the latest version of OptKeras

In [0]:
!pip install git+https://github.com/Minyus/optkeras.git

## Import modules

In [0]:
# import Keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Activation, Flatten, Dense, Conv2D
from keras.layers import MaxPooling2D, Dropout, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD, Adagrad, RMSprop, Adam, Adadelta, Adamax, Nadam
import keras.backend as K

import keras
print('Keras', keras.__version__)

import tensorflow as tf
print('TensorFlow', tf.__version__)

import numpy as np
import pandas as pd

# import Optuna and OptKeras after Keras
import optuna 
print('Optuna', optuna.__version__)

from optkeras.optkeras import OptKeras
import optkeras
print('OptKeras', optkeras.__version__)

## Set up Dataset

In [0]:
dataset_name = 'MNIST'

if dataset_name in ['MNIST', 'MNIST_1000samples']:
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    
    img_x, img_y = x_train.shape[1], x_train.shape[2]
    x_train = x_train.reshape(-1, img_x, img_y, 1)
    x_test = x_test.reshape(-1, img_x, img_y, 1)   
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    num_classes = 10
    input_shape = (img_x, img_y, 1)

In [6]:
print('x_train: ', x_train.shape)
print('y_train', y_train.shape)
print('x_test: ', x_test.shape)
print('y_test', y_test.shape)
print('input_shape: ', input_shape )    

x_train:  (60000, 28, 28, 1)
y_train (60000,)
x_test:  (10000, 28, 28, 1)
y_test (10000,)
input_shape:  (28, 28, 1)


## Try a simple Convolutional Neural Networks model

In [7]:
model = Sequential()
model.add(Conv2D(128, kernel_size = (3, 3), strides = (1, 1),
               activation = 'relu', input_shape = input_shape))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
model.compile(optimizer = Adam(), 
            loss='sparse_categorical_crossentropy', metrics=['accuracy'])			  
model.fit(x_train, y_train, validation_data = (x_test, y_test), shuffle = True,
          batch_size = 512, epochs = 2) 

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 60000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f20e1ac4a20>

## Define a Keras model to optimize

In [0]:
def objective(trial): 
    epochs = 10
    
    K.clear_session()   
    model = Sequential()
    
    if trial.suggest_int('Conv', 0, 1):  
        # 1 Convolution layer
        i = 1
        model.add(Conv2D(
            filters = int(trial.suggest_discrete_uniform(
                'Conv_{}_num_filters_'.format(i), 32, 64, 32)), 
            kernel_size=tuple([trial.suggest_int(
                'Conv_{}_kernel_size'.format(i), 2, 3)] * 2),
            activation='relu',
            input_shape = input_shape))
        model.add(MaxPooling2D(pool_size=tuple([trial.suggest_int(
                'Conv_{}_max_pooling_size'.format(i), 2, 3)] * 2)))
        model.add(Dropout(trial.suggest_discrete_uniform(
                'Conv_{}_dropout_rate'.format(i), 0, 0.5, 0.25) ))
        model.add(Flatten())        
    else:
        model.add(Flatten(input_shape=input_shape))
    # 2 Fully connected layers
    for i in np.arange(2) + 1:
        model.add(Dense(int(trial.suggest_discrete_uniform(
            'FC_{}_num_hidden_units'.format(i), 256, 512, 256))))
        if trial.suggest_int('FC_{}_batch_normalization'.format(i), 0, 1):
            model.add(BatchNormalization())
        model.add(Activation(trial.suggest_categorical(
            'FC_{}_acivation'.format(i), ['relu'])))
        model.add(Dropout(
            trial.suggest_discrete_uniform(
                'FC_{}_dropout_rate'.format(i), 0, 0.5, 0.25) ))
        
    # Output layer    
    model.add(Dense(num_classes, activation='softmax'))
    
    optimizer_dict = { \
    #'Adagrad': Adagrad(),
    'Adam': Adam() }
    
    model.compile(optimizer = optimizer_dict[
        trial.suggest_categorical('Optimizer', list(optimizer_dict.keys()))],
          loss='sparse_categorical_crossentropy', metrics=['accuracy'])    
    
    if ok.verbose >= 2: model.summary()
    
    batch_size = trial.suggest_int('Batch_size', 256, 256) 
    #batch_size = int(trial.suggest_discrete_uniform(
    #                  'Batch_size', 256, 512, 256) )
    data_augmentation = trial.suggest_int('Data_augmentation', 0, 1)
    
    if not data_augmentation:
        # [Required] Specify callbacks(trial) in fit method
        model.fit(x_train, y_train, batch_size = batch_size,
                  epochs = epochs, validation_data = (x_test, y_test),
                  shuffle = True,
                  callbacks = ok.callbacks(trial), 
                  verbose = ok.keras_verbose )
    
    if data_augmentation:
        # This will do preprocessing and realtime data augmentation:
        datagen = ImageDataGenerator(
            width_shift_range=[-1, 0, +1], # 1 pixel
            height_shift_range=[-1, 0, +1], # 1 pixel
            zoom_range=[0.95,1.05],  # set range for random zoom
            horizontal_flip=False,  # disable horizontal flip
            vertical_flip=False )  # disable vertical flip
        datagen.fit(x_train)
        # [Required] Specify callbacks(trial) in fit_generator method
        model.fit_generator(datagen.flow(x_train, y_train, 
                                         batch_size=batch_size),
                            epochs=epochs, validation_data=(x_test, y_test),
                            steps_per_epoch=len(x_train) // batch_size,
                            callbacks = ok.callbacks(trial), 
                            verbose = ok.keras_verbose )  
    
    # [Required] return trial_best_value (recommended) or latest_value
    return ok.trial_best_value

## Run optimization trials

In [0]:
study_name = dataset_name + '_Optimized'

ok = OptKeras( 
    ## parameters for optuna.create_study
    storage='sqlite:///' + study_name + '_Optuna.db', 
    sampler=None, 
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=0), 
    study_name = study_name, 
    direction='minimize', 
    load_if_exists = True,
    ## parameters for OptKeras
    monitor = 'val_error', # Either 'val_error' (1 - val_acc) or 'val_loss'
    enable_pruning = False, 
    num_models_to_save = 1, # Either 1, 0, or -1 (save all models) 
    verbose = 1 )

# Set n_trials and/or timeout (in sec) for optimization by Optuna
ok.optimize(objective, n_trials=10, timeout = 12 * 60 * 60)

## Check the results

In [10]:
# OptKeras best_trial returns 
print('Best trial id: ', ok.best_trial.trial_id)
print('Best value:', ok.best_trial.value)
print('Best params: ')
ok.best_trial.params

Best trial id:  8
Best value: 0.008600000000000052
Best params: 


{'Batch_size': 256,
 'Conv': 1,
 'Conv_1_dropout_rate': 0.5,
 'Conv_1_kernel_size': 3,
 'Conv_1_max_pooling_size': 3,
 'Conv_1_num_filters_': 32.0,
 'Data_augmentation': 0,
 'FC_1_acivation': 'relu',
 'FC_1_batch_normalization': 1,
 'FC_1_dropout_rate': 0.0,
 'FC_1_num_hidden_units': 256.0,
 'FC_2_acivation': 'relu',
 'FC_2_batch_normalization': 1,
 'FC_2_dropout_rate': 0.5,
 'FC_2_num_hidden_units': 512.0,
 'Optimizer': 'Adam'}

In [0]:
"""
Alternatively, you can access Optuna's study object to, for example, 
get the best parameters as well.
Please note that study.best_trial returns error if optimization trials 
were not completed (e.g. if you interupt execution) as of Optuna 0.7.0, 
so usage of OptKeras is recommended.
"""
study = ok.study
study.best_trial.params 

In [0]:
## Check the Optuna CSV log file 
pd.options.display.max_rows = 8 # limit rows to display
print('Data Frame read from', ok.optuna_log_file_path, '\n')
pd.read_csv(ok.optuna_log_file_path)

In [0]:
## Check the Keras CSV log file

pd.options.display.max_rows = 8 # limit rows to display
print('Data Frame read from', ok.keras_log_file_path, '\n')
pd.read_csv(ok.keras_log_file_path)