In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.optimizers import Adam
from sklearn.model_selection import KFold
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train.shape

(60000, 28, 28)

In [3]:
## reshaping format to (batch, height, width, channels). As all the images are in grayscale, the number of channels is 1
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

## Convert Datatype of pixels to float 
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

## normalize to range 0-1 by Dividing each image by 255
X_train/=255
X_test/=255

X_train.shape

(60000, 28, 28, 1)

In [4]:
#converting output into a layer output
number_of_classes = 10

Y_train = np_utils.to_categorical(y_train, number_of_classes)
Y_test = np_utils.to_categorical(y_test, number_of_classes)

y_train[0], Y_train[0]

(5, array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32))

In [5]:
# Architecture One
def defineModelArch1():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape=(28,28,1)))    #first layer
    model.add(Activation('relu'))
    BatchNormalization(axis=-1)

    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    BatchNormalization(axis=-1)     #normalizes the matrix

    model.add(Conv2D(64,(3, 3)))
    model.add(Activation('relu'))
    BatchNormalization(axis=-1)

    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))

    model.add(Flatten())

    BatchNormalization()
    model.add(Dense(512))
    model.add(Activation('relu'))
    BatchNormalization()
    model.add(Dropout(0.2))     #to reduce overfitting
    model.add(Dense(10))
    model.add(Activation('softmax'))
    #model.summary()
    model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    return model

In [6]:
# Architecture Two 
def defineModelArch2():
    model2 = Sequential()
    model2.add(Conv2D(32, (3, 3), input_shape=(28,28,1)))
    model2.add(Activation('relu'))
    BatchNormalization(axis=-1)
    model2.add(MaxPooling2D(pool_size=(2,2)))

    model2.add(Flatten())
    # Fully connected layer
    BatchNormalization()
    model2.add(Dense(512))
    model2.add(Activation('relu'))
    BatchNormalization()
    model2.add(Dropout(0.2))     #to reduce overfitting
    model2.add(Dense(10))
    model2.add(Activation('softmax'))
    model2.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    return model2

In [14]:
# Architecture Three 
def defineModelArch3():
    model3 = Sequential()
    model3.add(Conv2D(64, (3, 3), input_shape=(28,28,1)))
    model3.add(Activation('relu'))
    BatchNormalization(axis=-1)
    model3.add(MaxPooling2D(pool_size=(4,4)))

    model3.add(Flatten())
    # Fully connected layer
    BatchNormalization()
    model3.add(Dense(512))
    model3.add(Activation('relu'))
    BatchNormalization()
    model3.add(Dropout(0.2))     #to reduce overfitting
    model3.add(Dense(10))
    model3.add(Activation('softmax'))

    model3.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    return model3

In [8]:
# Architecture Four 
def defineModelArch4():
    model4 = Sequential()
    model4.add(Conv2D(32, (3, 3), input_shape=(28,28,1)))    #first layer
    model4.add(Activation('relu'))
    BatchNormalization(axis=-1)

    model4.add(Conv2D(32, (3, 3)))
    model4.add(Activation('relu'))
    model4.add(MaxPooling2D(pool_size=(4,4)))
    BatchNormalization(axis=-1)     #normalizes the matrix

    model4.add(Conv2D(64,(3, 3)))
    model4.add(Activation('relu'))
    BatchNormalization(axis=-1)

    model4.add(Conv2D(64, (3, 3)))
    model4.add(Activation('relu'))
    model4.add(MaxPooling2D(pool_size=(2,2)))

    model4.add(Flatten())
    # Fully connected layer
    BatchNormalization()
    model4.add(Dense(512))
    model4.add(Activation('relu'))
    BatchNormalization()
    model4.add(Dropout(0.2))     #to reduce overfitting
    model4.add(Dense(10))
    model4.add(Activation('softmax'))

    model4.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    return model4

In [9]:
# evaluate a model using k-fold cross-validation
def evaluate_Summerize_model(dataX, dataY, model, n_folds=5):
	# prepare cross validation
	kfold = KFold(n_folds, shuffle=True, random_state=1)
	# enumerate splits
	for train_ix, test_ix in kfold.split(dataX):
        # select rows for train and test
		trainX, trainY, testX, testY = dataX[train_ix], dataY[train_ix], dataX[test_ix], dataY[test_ix]
		# fit model
		model.fit(trainX, trainY, epochs=1, steps_per_epoch=trainX.shape[0])
		# evaluate model
		result = model.evaluate(testX, testY, steps = testX.shape[0])
		print('Accurcy :','> %.3f' % (result[1] * 100.0))

In [10]:
# Architecture One
model = defineModelArch1()
evaluate_Summerize_model(X_train, Y_train, model, n_folds=5)

Accurcy : > 97.517
Accurcy : > 97.983
Accurcy : > 97.833
Accurcy : > 96.625
Accurcy : > 98.350


In [11]:
# Architecture Two
model = defineModelArch2()
evaluate_Summerize_model(X_train, Y_train, model, n_folds=5)

Accurcy : > 97.733
Accurcy : > 98.417
Accurcy : > 98.442
Accurcy : > 99.192
Accurcy : > 99.058


In [17]:
# Architecture Three
model = defineModelArch3()
evaluate_Summerize_model(X_train, Y_train, model, n_folds=3)

Accurcy : > 95.880
Accurcy : > 98.465
Accurcy : > 98.795


In [13]:
# Architecture Four
model = defineModelArch4()
evaluate_Summerize_model(X_train, Y_train, model, n_folds=2)

Accurcy : > 97.090
Accurcy : > 97.797
