## Import packages

In [1]:
import keras
from keras.optimizers import Adam
from keras.datasets import mnist
from keras.callbacks import TensorBoard
from keras.layers import Dense, Dropout, Flatten, AlphaDropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.models import Sequential, load_model
from keras import backend as K 
from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.preprocessing import Normalizer, QuantileTransformer
from sklearn.metrics import classification_report
from time import time
import pandas as pd

Using TensorFlow backend.


In [2]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

### Load MNIST Data Set

In [3]:
img_rows, img_cols = 28, 28
num_classes = 10

(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

## Normalize Data

In [4]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


## Convert Labels to Categorical data

In [5]:
#y_train = keras.utils.to_categorical(y_train, num_classes)
#y_test = keras.utils.to_categorical(y_test, num_classes)

## Neural Network

In [6]:
def build_model(optimizer=Adam(amsgrad=True),
                input_shape=input_shape,
                num_classes=num_classes,
                activation1='elu',
                activation2='elu',
                activation3='elu',
                activation4='elu',
                units1=1,
                units2=1,
                units3=1,
                units4=1,
                dropout1=0.3,
                dropout2=0.3,
                dropout3=0.3,
                dropout4=0.3,
                k1=8,
                k2=7,
                p1=2,
                p2=2,
                op_activation='softmax',
                loadprevmodel=False,
                modelname='Keras-MNIST'
               ):
    if loadprevmodel:
        try:
            model = load_model(modelname + '.h5')
            print('Model loaded successfully')
        except IOError:
            print('Loading previous model failed, Building a new model')       
    model = Sequential()
    model.add(Conv2D(units1, kernel_size=(k1, k1),
                     activation=activation1,
                     input_shape=input_shape,
                     padding='same'))
    model.add(MaxPooling2D(pool_size=(p1, p1), strides=2, padding='same'))
    if activation1 == 'selu':
        model.add(AlphaDropout(dropout1))
    else:
        model.add(BatchNormalization())
    
    model.add(Conv2D(units2, (k2, k2), activation=activation2, padding='same'))
    model.add(MaxPooling2D(pool_size=(p2, p2), strides=2, padding='same'))
    if activation2 == 'selu':
        model.add(AlphaDropout(dropout2))
    else:
        model.add(BatchNormalization())
    
    model.add(Flatten())
    model.add(Dense(units3, activation=activation3))
    if activation3 == 'selu':
        model.add(AlphaDropout(dropout3))
    else:
        model.add(BatchNormalization())
    model.add(Dense(units4, activation=activation4))
    if activation4 == 'selu':
        model.add(AlphaDropout(dropout4))
    else:
        model.add(BatchNormalization())
    model.add(Dense(num_classes, activation=op_activation))
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [7]:
search = True
modelh5 = 'Keras-MNIST'
batch_size = 256
epochs = 1

In [8]:
params_dict ={'dropout1': Real(0.01, 1.0, 'log-uniform'),
              'dropout2': Real(0.01, 1.0, 'log-uniform'),
              'dropout3': Real(0.01, 1.0, 'log-uniform'),
              'dropout4': Real(0.01, 1.0, 'log-uniform'),
              'units1': Integer(16,128),
              'units2': Integer(16,128),
              'units3': Integer(128,1024),
              'units4': Integer(32,64),
              'activation1': Categorical(['elu', 'relu', 'selu']),
              'activation2': Categorical(['elu', 'relu', 'selu']),
              'activation3': Categorical(['elu', 'relu', 'selu']),
              'activation4': Categorical(['elu', 'relu', 'selu']),
              'k1': Integer(3,img_rows),
              'k2': Integer(3,img_rows),
              'p1': Integer(2,8),
              'p2': Integer(2,8),
        }
if search:
    random_search = BayesSearchCV(estimator=KerasClassifier(build_model,
                                                            input_shape=input_shape,
                                                            num_classes=num_classes,
                                                            batch_size=batch_size,
                                                            epochs=epochs,
                                                            verbose=1
                                                           ),
                                  search_spaces=params_dict,
                                  scoring='accuracy',
                                  n_iter=2,
                                  cv=2,
                                  verbose=0
                                 )

    start = time()
    random_search.fit(x_train, y_train)
    print("BayesSearchCV took %.2f seconds for %d candidates"
          " parameter settings." % ((time() - start), random_search.total_iterations))

    print("val. score: %s" % random_search.best_score_)
    print("test score: %s" % random_search.score(x_train, y_train))
    print(random_search.best_params_)

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
BayesSearchCV took 416.96 seconds for 32 candidates parameter settings.
val. score: 0.9612666666666667
test score: 0.9836
{'activation1': 'selu', 'activation2': 'relu', 'activation3': 'selu', 'activation4': 'relu', 'dropout1': 0.03054859373253639, 'dropout2': 0.01821491262823199, 'dropout3': 0.22391179253754676, 'dropout4': 0.04238128630404382, 'k1': 28, 'k2': 14, 'p1': 4, 'p2': 3, 'units1': 23, 'units2': 116, 'units3': 972, 'units4': 62}


In [9]:
print(random_search.best_score_)
print(random_search.best_params_)

0.9612666666666667
{'activation1': 'selu', 'activation2': 'relu', 'activation3': 'selu', 'activation4': 'relu', 'dropout1': 0.03054859373253639, 'dropout2': 0.01821491262823199, 'dropout3': 0.22391179253754676, 'dropout4': 0.04238128630404382, 'k1': 28, 'k2': 14, 'p1': 4, 'p2': 3, 'units1': 23, 'units2': 116, 'units3': 972, 'units4': 62}


### Preprocessing Docs
#### http://scikit-learn.org/stable/auto_examples/preprocessing/plot_all_scaling.html#sphx-glr-auto-examples-preprocessing-plot-all-scaling-py
### Bayes Search CV docs
#### https://github.com/scikit-optimize/scikit-optimize/blob/master/skopt/searchcv.py

In [11]:
params = {'activation1': 'selu', 'activation2': 'relu', 'activation3': 'selu', 'activation4': 'relu', 'dropout1': 0.03054859373253639, 'dropout2': 0.01821491262823199, 'dropout3': 0.22391179253754676, 'dropout4': 0.04238128630404382, 'k1': 28, 'k2': 14, 'p1': 4, 'p2': 3, 'units1': 23, 'units2': 116, 'units3': 972, 'units4': 62}
clf = KerasClassifier(build_model,
                      input_shape=input_shape,
                      num_classes=num_classes,
                      batch_size=batch_size,
                      epochs=10,
                      verbose=1,
                     **params)
clf.fit(x_train, y_train)
y_true, y_pred = y_train, clf.predict(x_train)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
print(classification_report(y_true, y_pred))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00      5923
          1       1.00      1.00      1.00      6742
          2       1.00      1.00      1.00      5958
          3       1.00      1.00      1.00      6131
          4       1.00      1.00      1.00      5842
          5       1.00      1.00      1.00      5421
          6       1.00      1.00      1.00      5918
          7       1.00      1.00      1.00      6265
          8       1.00      1.00      1.00      5851
          9       1.00      1.00      1.00      5949

avg / total       1.00      1.00      1.00     60000



In [14]:
y_true, y_pred = y_test, clf.predict(x_test)
print(classification_report(y_true, y_pred))

             precision    recall  f1-score   support

          0       0.98      0.96      0.97       980
          1       1.00      0.97      0.98      1135
          2       0.65      1.00      0.79      1032
          3       0.93      0.85      0.89      1010
          4       0.98      0.90      0.94       982
          5       0.96      0.90      0.93       892
          6       0.97      0.94      0.95       958
          7       0.99      0.82      0.90      1028
          8       0.98      0.77      0.86       974
          9       0.83      0.95      0.88      1009

avg / total       0.93      0.91      0.91     10000

