Our problem is an image recognition problem, to identify digits from a given 28 x 28 image. We have a subset of images for training and the rest for testing our model. So first, download the train and test files. 

In [27]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from sklearn.model_selection import GridSearchCV

import tensorflow as tf
import keras
from keras.datasets import mnist
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils.np_utils import to_categorical 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from keras import backend as K

In [28]:
# To stop potential randomness
seed = 128

In [29]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [30]:
#to increase the speed of calculations look only at 2s and 7s
train_picks = np.logical_or(y_train==2,y_train==7)
test_picks = np.logical_or(y_test==2,y_test==7)

x_train = x_train[train_picks]
x_test = x_test[test_picks]
y_train = np.array(y_train[train_picks]==7,dtype=int)
y_test = np.array(y_test[test_picks]==7,dtype=int)

CNN converg faster on [0..1] data than on [0..255].

In [31]:
# Normalize the data
x_train = x_train / 255.0
x_test = x_test / 255.0

In [32]:
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, 28, 28)
    x_test = x_test.reshape(x_test.shape[0], 1, 28, 28)
    input_shape = (1, 28, 28)
else:
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
    x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
    input_shape = (28, 28, 1)

In [33]:
# Encode labels to one hot vectors 
y_train = to_categorical(y_train, num_classes = 2)
y_test = to_categorical(y_test, num_classes = 2)

We want to build simple CNN models to classify the MNIST dataset and uses sklearn's GridSearchCV to find the best hyperparameter model

In [13]:
##### BATCH SIZE #####

def create_model():
    
    model = Sequential()
    # a basic feed-forward model
    model.add(Flatten()) 
    # takes our 28x28 and makes it 1x784
    model.add(Dense(128, activation='relu')) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(128, activation='relu')) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(2, activation='softmax'))  
    # our output layer. 2 units for 2 classes. Softmax for probability distribution

    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])
    
    return model


np.random.seed(seed)

# Test all combinations of the following parameters:
param_grid = {'epochs': [6],
              'batch_size': [10, 20, 40, 60, 80, 100]}

my_classifier = KerasClassifier(create_model)

grid = GridSearchCV(my_classifier, param_grid, cv=3, n_jobs=1, verbose=1)

grid_result = grid.fit(x_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    
# Print Results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# Print Test Accuracy
print("Test Accuracy", grid_result.score(x_test, y_test))

Fitting 3 folds for each of 6 candidates, totalling 18 fits
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6


Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:  3.2min finished


Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Best: 0.992800 using {'batch_size': 10, 'epochs': 6}
0.992800 (0.001335) with: {'batch_size': 10, 'epochs': 6}
0.992228 (0.001478) with: {'batch_size': 20, 'epochs': 6}
0.992637 (0.001591) with: {'batch_size': 40, 'epochs': 6}
0.992555 (0.000989) with: {'batch_size': 60, 'epochs': 6}
0.992637 (0.001219) with: {'batch_size': 80, 'epochs': 6}
0.991491 (0.001104) with: {'batch_size': 100, 'epochs': 6}
Best: 0.992800 using {'batch_size': 10, 'epochs': 6}
Test Accuracy 0.9927184451552271


In [14]:
##### OPTIMIZATION ALGORITHM #####

def create_model(optimizer):
    
    model = Sequential()
    # a basic feed-forward model
    model.add(Flatten()) 
    # takes our 28x28 and makes it 1x784
    model.add(Dense(128, activation='relu')) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(128, activation='relu')) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(2, activation='softmax'))  
    # our output layer. 2 units for 2 classes. Softmax for probability distribution

    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])
    
    return model

# fix random seed for reproducibility
np.random.seed(seed)

# Test all combinations of the following parameters:
param_grid = {'epochs': [6],
              'batch_size': [10],
              'optimizer': ['SGD', 'Adam']
              }

my_classifier = KerasClassifier(create_model)

grid = GridSearchCV(my_classifier, param_grid, cv=3, n_jobs=1, verbose=1)

grid_result = grid.fit(x_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    
# Print Results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# Print Test Accuracy
print("Test Accuracy", grid_result.score(x_test, y_test))

Fitting 3 folds for each of 2 candidates, totalling 6 fits
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  2.9min finished


Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Best: 0.993210 using {'batch_size': 10, 'epochs': 6, 'optimizer': 'SGD'}
0.993210 (0.001633) with: {'batch_size': 10, 'epochs': 6, 'optimizer': 'SGD'}
0.992964 (0.001728) with: {'batch_size': 10, 'epochs': 6, 'optimizer': 'Adam'}
Best: 0.993210 using {'batch_size': 10, 'epochs': 6, 'optimizer': 'SGD'}
Test Accuracy 0.9917475711373449


In [21]:
##### LOSS FUNCTION #####

def create_model(optimizer, loss):
    
    model = Sequential()
    # a basic feed-forward model
    model.add(Flatten()) 
    # takes our 28x28 and makes it 1x784
    model.add(Dense(128, activation='relu')) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(128, activation='relu')) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(2, activation='softmax'))  
    # our output layer. 2 units for 2 classes. Softmax for probability distribution        

    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    
    return model

# fix random seed for reproducibility
np.random.seed(seed)

# Test all combinations of the following parameters:
param_grid = {
              'epochs': [6],
              'batch_size': [10],
              'optimizer': ['SGD'],
              'loss': ['categorical_crossentropy', 'binary_crossentropy']
              }

my_classifier = KerasClassifier(create_model)

grid = GridSearchCV(my_classifier, param_grid, cv=3, n_jobs=1, verbose=1)

grid_result = grid.fit(x_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    
# Print Results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# Print Test Accuracy
print("Test Accuracy", grid_result.score(x_test, y_test))

Fitting 3 folds for each of 2 candidates, totalling 6 fits
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:  2.3min finished


Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Best: 0.989282 using {'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.988464 (0.000696) with: {'batch_size': 10, 'epochs': 6, 'loss': 'categorical_crossentropy', 'optimizer': 'SGD'}
0.989282 (0.001335) with: {'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
Best: 0.989282 using {'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
Test Accuracy 0.98543689088914


In [24]:
##### NEURON ACTIVATION FUNCTION 1 #####

def create_model(optimizer, loss, activation_1, activation_2, activation_3):
    
    model = Sequential()
    # a basic feed-forward model
    model.add(Flatten()) 
    # takes our 28x28 and makes it 1x784
    model.add(Dense(128, activation=activation_1)) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(128, activation=activation_2)) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(2, activation=activation_3))  
    # our output layer. 2 units for 2 classes. Softmax for probability distribution

    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    
    return model

# fix random seed for reproducibility
np.random.seed(seed)

# Test all combinations of the following parameters:
param_grid = {'epochs': [6],
              'batch_size': [10],
              'optimizer': ['SGD'],
              'loss': ['binary_crossentropy'],
              'activation_1': ['softmax', 'relu', 'tanh', 'sigmoid'],
              'activation_2': ['relu'],
              'activation_3': ['softmax']
              }

my_classifier = KerasClassifier(create_model)

grid = GridSearchCV(my_classifier, param_grid, cv=3, n_jobs=1, verbose=1)

grid_result = grid.fit(x_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    
# Print Results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# Print Test Accuracy
print("Test Accuracy", grid_result.score(x_test, y_test))

Fitting 3 folds for each of 4 candidates, totalling 12 fits
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:  6.4min finished


Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Best: 0.989282 using {'activation_1': 'relu', 'activation_2': 'relu', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.977747 (0.002806) with: {'activation_1': 'softmax', 'activation_2': 'relu', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.989282 (0.001335) with: {'activation_1': 'relu', 'activation_2': 'relu', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.987646 (0.000614) with: {'activation_1': 'tanh', 'activation_2': 'relu', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.983965 (0.000810) with: {'activation_1': 'sigmoid', 'activation_2': 'relu', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
Best: 0.989282 usin

In [25]:
##### NEURON ACTIVATION FUNCTION 2 #####

def create_model(optimizer, loss, activation_1, activation_2, activation_3):
    
    model = Sequential()
    # a basic feed-forward model
    model.add(Flatten()) 
    # takes our 28x28 and makes it 1x784
    model.add(Dense(128, activation=activation_1)) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(128, activation=activation_2)) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(2, activation=activation_3))  
    # our output layer. 2 units for 2 classes. Softmax for probability distribution


    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    
    return model

# fix random seed for reproducibility
np.random.seed(seed)

# Test all combinations of the following parameters:
param_grid = {'epochs': [6],
              'batch_size': [10],
              'optimizer': ['SGD'],
              'loss': ['binary_crossentropy'],
              'activation_1': ['relu'],
              'activation_2': ['softmax', 'relu', 'tanh', 'sigmoid'],
              'activation_3': ['softmax']
              }

my_classifier = KerasClassifier(create_model)

grid = GridSearchCV(my_classifier, param_grid, cv=3, n_jobs=1, verbose=1)

grid_result = grid.fit(x_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    
# Print Results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# Print Test Accuracy
print("Test Accuracy", grid_result.score(x_test, y_test))

Fitting 3 folds for each of 4 candidates, totalling 12 fits
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6


Epoch 5/6
Epoch 6/6


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:  6.5min finished


Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Best: 0.989364 using {'activation_1': 'relu', 'activation_2': 'relu', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.972347 (0.008485) with: {'activation_1': 'relu', 'activation_2': 'softmax', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.989364 (0.001423) with: {'activation_1': 'relu', 'activation_2': 'relu', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.988710 (0.000401) with: {'activation_1': 'relu', 'activation_2': 'tanh', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.984701 (0.001206) with: {'activation_1': 'relu', 'activation_2': 'sigmoid', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
Best: 0.989364 usin

In [26]:
##### NEURON ACTIVATION FUNCTION 3 #####

def create_model(optimizer, loss, activation_1, activation_2, activation_3):
    
    model = Sequential()
    # a basic feed-forward model
    model.add(Flatten()) 
    # takes our 28x28 and makes it 1x784
    model.add(Dense(128, activation=activation_1)) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(128, activation=activation_2)) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(2, activation=activation_3))  
    # our output layer. 2 units for 2 classes. Softmax for probability distribution

    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    
    return model

# fix random seed for reproducibility
np.random.seed(seed)

# Test all combinations of the following parameters:
param_grid = {'epochs': [6],
              'batch_size': [10],
              'optimizer': ['SGD'],
              'loss': ['binary_crossentropy'],
              'activation_1': ['relu'],
              'activation_2': ['relu'],
              'activation_3': ['softmax', 'relu', 'tanh', 'sigmoid']
              }

my_classifier = KerasClassifier(create_model)

grid = GridSearchCV(my_classifier, param_grid, cv=3, n_jobs=1, verbose=1)

grid_result = grid.fit(x_train, y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    
# Print Results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# Print Test Accuracy
print("Test Accuracy", grid_result.score(x_test, y_test))

Fitting 3 folds for each of 4 candidates, totalling 12 fits
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:  7.1min finished


Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Best: 0.988464 using {'activation_1': 'relu', 'activation_2': 'relu', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.988464 (0.000696) with: {'activation_1': 'relu', 'activation_2': 'relu', 'activation_3': 'softmax', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.369631 (0.265103) with: {'activation_1': 'relu', 'activation_2': 'relu', 'activation_3': 'relu', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.438681 (0.134332) with: {'activation_1': 'relu', 'activation_2': 'relu', 'activation_3': 'tanh', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
0.986746 (0.000362) with: {'activation_1': 'relu', 'activation_2': 'relu', 'activation_3': 'sigmoid', 'batch_size': 10, 'epochs': 6, 'loss': 'binary_crossentropy', 'optimizer': 'SGD'}
Best: 0.988464 using {'activati

In [34]:
##### Final Model #####
##### Epochs - The more the better usually. #####

def create_model():
    
    model = Sequential()
    # a basic feed-forward model
    model.add(Flatten()) 
    # takes our 28x28 and makes it 1x784
    model.add(Dense(128, activation='relu')) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(128, activation='relu')) 
    # a simple fully-connected layer, 128 units, relu activation
    model.add(Dense(2, activation='softmax'))  
    # our output layer. 2 units for 2 classes. Softmax for probability distribution

    model.compile(loss=keras.losses.binary_crossentropy, 
                  optimizer=keras.optimizers.SGD(), 
                  metrics=['accuracy'])
    
    return model

# fix random seed for reproducibility
np.random.seed(seed)

# Test all combinations of the following parameters:
param_grid = {'epochs': [20],
              'batch_size': [20, 40, 60]
              }

my_classifier = KerasClassifier(create_model)

grid = GridSearchCV(my_classifier, param_grid, cv=3, n_jobs=1, verbose=1)

grid_result = grid.fit(x_train, y_train)

# summarize results
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    
# Print Results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

# Print Test Accuracy
print(" Test Accuracy", grid_result.score(x_test, y_test))

Fitting 3 folds for each of 3 candidates, totalling 9 fits
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 

Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:  5.8min finished


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
0.989282 (0.001140) with: {'batch_size': 20, 'epochs': 20}
0.988792 (0.001273) with: {'batch_size': 40, 'epochs': 20}
0.987564 (0.001224) with: {'batch_size': 60, 'epochs': 20}
Best: 0.989282 using {'batch_size': 20, 'epochs': 20}
 Test Accuracy 0.9898058233909237
