# Bayesian Optimization on Keras

### MNIST training on Keras with Bayesian optimization
* This notebook runs MNIST training on Keras using Bayesian optimization to find the best hyper parameters.
* The MNIST model here is just a simple one with one input layer, one hidden layer and one output layer, without convolution.
* Hyperparameters of the model include the followings:
* - output shape of the first layer
* - dropout rate of the first layer
* - output shape of the second layer
* - dropout rate of the second layer
* - batch size
* - number of epochs
* - validation rate
* I used GPy and GPyOpt to run Bayesian optimization.

#### Import libraries

In [2]:
import GPy, GPyOpt
import numpy as np
import pandas as pds
import random
from keras.layers import Activation, Dropout, BatchNormalization, Dense
from keras.models import Sequential
from keras.datasets import mnist
from keras.metrics import categorical_crossentropy
from keras.utils import np_utils
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

#### Define MNIST model
* includes data loading function, training function, fit function and evaluation function 

In [3]:
# MNIST class
class MNIST():
    def __init__(self, first_input=784, last_output=10,
                 l1_out=512, 
                 l2_out=512, 
                 l1_drop=0.2, 
                 l2_drop=0.2, 
                 batch_size=100, 
                 epochs=10, 
                 validation_split=0.1):
        self.__first_input = first_input
        self.__last_output = last_output
        self.l1_out = l1_out
        self.l2_out = l2_out
        self.l1_drop = l1_drop
        self.l2_drop = l2_drop
        self.batch_size = batch_size
        self.epochs = epochs
        self.validation_split = validation_split
        self.__x_train, self.__x_test, self.__y_train, self.__y_test = self.mnist_data()
        self.__model = self.mnist_model()
        
    # load mnist data from keras dataset
    def mnist_data(self):
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
        X_train = X_train.reshape(60000, 784)
        X_test = X_test.reshape(10000, 784)

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')
        X_train /= 255
        X_test /= 255

        Y_train = np_utils.to_categorical(y_train, 10)
        Y_test = np_utils.to_categorical(y_test, 10)
        return X_train, X_test, Y_train, Y_test
    
    # mnist model
    def mnist_model(self):
        model = Sequential()
        model.add(Dense(self.l1_out, input_shape=(self.__first_input,)))
        model.add(Activation('relu'))
        model.add(Dropout(self.l1_drop))
        model.add(Dense(self.l2_out))
        model.add(Activation('relu'))
        model.add(Dropout(self.l2_drop))
        model.add(Dense(self.__last_output))
        model.add(Activation('softmax'))
        model.compile(loss='categorical_crossentropy',
                      optimizer=Adam(),
                      metrics=['accuracy'])

        return model
    
    # fit mnist model
    def mnist_fit(self):
        early_stopping = EarlyStopping(patience=0, verbose=1)
        
        self.__model.fit(self.__x_train, self.__y_train,
                       batch_size=self.batch_size,
                       epochs=self.epochs,
                       verbose=0,
                       validation_split=self.validation_split,
                       callbacks=[early_stopping])
    
    # evaluate mnist model
    def mnist_evaluate(self):
        self.mnist_fit()
        
        evaluation = self.__model.evaluate(self.__x_test, self.__y_test, batch_size=self.batch_size, verbose=0)
        return evaluation

#### Runner function for the MNIST model

In [4]:
# function to run mnist class
def run_mnist(first_input=784, last_output=10,
              l1_out=512, l2_out=512, 
              l1_drop=0.2, l2_drop=0.2, 
              batch_size=100, epochs=10, validation_split=0.1):
    
    _mnist = MNIST(first_input=first_input, last_output=last_output,
               l1_out=l1_out, l2_out=l2_out, 
               l1_drop=l1_drop, l2_drop=l2_drop, 
               batch_size=batch_size, epochs=epochs, 
               validation_split=validation_split)
    mnist_evaluation = _mnist.mnist_evaluate()
    return mnist_evaluation

## Bayesian Optimization
#### bounds for hyper parameters

In [5]:
# bounds for hyper-parameters in mnist model
# the bounds dict should be in order of continuous type and then discrete type
bounds = [{'name': 'validation_split', 'type': 'continuous',  'domain': (0.0, 0.3)},
          {'name': 'l1_drop',          'type': 'continuous',  'domain': (0.0, 0.3)},
          {'name': 'l2_drop',          'type': 'continuous',  'domain': (0.0, 0.3)},
          {'name': 'l1_out',           'type': 'discrete',    'domain': (64, 128, 256, 512, 1024)},
          {'name': 'l2_out',           'type': 'discrete',    'domain': (64, 128, 256, 512, 1024)},
          {'name': 'batch_size',       'type': 'discrete',    'domain': (10, 100, 500)},
          {'name': 'epochs',           'type': 'discrete',    'domain': (5, 10, 20)}]

#### Bayesian Optimization

In [6]:
# function to optimize mnist model
def f(x):
    print(x)
    evaluation = run_mnist(
        l1_drop = int(x[:,1]), 
        l2_drop = int(x[:,2]), 
        l1_out = float(x[:,3]),
        l2_out = float(x[:,4]), 
        batch_size = int(x[:,5]), 
        epochs = int(x[:,6]), 
        validation_split = float(x[:,0]))
    print("loss:{0} \t\t accuracy:{1}".format(evaluation[0], evaluation[1]))
    print(evaluation)
    return evaluation[0]

#### Optimizer instance

In [7]:
# optimizer
opt_mnist = GPyOpt.methods.BayesianOptimization(f=f, domain=bounds)

[[   0.29676309    0.17885343    0.2227852    64.          128.          100.
     5.        ]]
Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
Epoch 00001: early stopping
loss:0.09619285663531628 		 accuracy:0.9688000053167343
[0.096192856635316279, 0.96880000531673427]
[[  2.89676156e-01   1.34951510e-01   1.94380078e-01   5.12000000e+02
    2.56000000e+02   1.00000000e+02   5.00000000e+00]]
Epoch 00003: early stopping
loss:0.08108302017499228 		 accuracy:0.9760000091791153
[0.081083020174992276, 0.97600000917911534]
[[  1.51259432e-01   2.29184280e-01   1.37943174e-01   2.56000000e+02
    2.56000000e+02   5.00000000e+02   1.00000000e+01]]
Epoch 00002: early stopping
loss:0.08585127568105236 		 accuracy:0.972700007557869
[0.085851275681052358, 0.97270000755786901]
[[  3.52710110e-02   2.56120828e-01   7.82379775e-02   1.02400000e+03
    2.56000000e+02   5.00000000e+02   5.00000000e+00]]
Epoch 00005: early stopping
loss:0.0714341328185401 		 accuracy:0.9789000087

#### Running optimization

In [8]:
# optimize mnist model
opt_mnist.run_optimization(max_iter=10)

[[  8.93574957e-02   8.64991953e-02   2.19945487e-01   1.28000000e+02
    2.56000000e+02   1.00000000e+01   1.00000000e+01]]
Epoch 00002: early stopping
loss:0.07749225622072117 		 accuracy:0.9744000029563904
[0.077492256220721173, 0.9744000029563904]
[[  3.52903196e-02   2.56028660e-01   7.82641139e-02   1.02400000e+03
    2.56000000e+02   5.00000000e+02   5.00000000e+00]]
Epoch 00004: early stopping
loss:0.07497146660884027 		 accuracy:0.9781000065803528
[0.07497146660884027, 0.97810000658035279]
[[  3.00000000e-01   0.00000000e+00   3.00000000e-01   1.02400000e+03
    2.56000000e+02   5.00000000e+02   5.00000000e+00]]
Epoch 00004: early stopping
loss:0.06450482949090657 		 accuracy:0.9805000054836274
[0.064504829490906565, 0.98050000548362737]
[[  3.00000000e-01   0.00000000e+00   3.00000000e-01   1.02400000e+03
    2.56000000e+02   5.00000000e+02   5.00000000e+00]]
Epoch 00003: early stopping
loss:0.0665446940700349 		 accuracy:0.9791000044345856
[0.066544694070034896, 0.9791000044

#### The output

In [15]:
# print optimized mnist model
print("optimized parameters: {0}".format(opt_mnist.x_opt))
print("optimized loss: {0}".format(opt_mnist.fx_opt))

optimized parameters: [  3.00000000e-01   0.00000000e+00   3.00000000e-01   1.02400000e+03
   2.56000000e+02   5.00000000e+02   5.00000000e+00]
optimized loss: [ 0.06450483]
