**Building a "Deep" Convolutional Neural Network**

Prerequisite

In [1]:
# importing necessary tools
%matplotlib inline
import numpy as np                      #advanced math library
import matplotlib.pyplot as plt         #MATLAB like plotting routine
import random                           #For generating random numbers
# importing some additional tools also
from keras.datasets import mnist        # MNIST dataset is included in Keras    
from keras.models import Sequential     # Model type to be used


from keras.layers.core import Dense, Dropout, Activation  # Types of layers to import
from keras.utils import np_utils                          # NumPy related tool

Image generator
Importing libraries

In [2]:
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D,GlobalAveragePooling2D,Flatten
from keras.layers.normalization.batch_normalization import BatchNormalization

In [3]:
#The MNIST data is split between 60,000 images (28 X 28 pixel)
#training images and 10,000 (28 X 28 pixel) images
(X_train,y_train),(X_test,y_test)=mnist.load_data()

print("X_train_shape", X_train.shape)
print("X_test_shape", X_test.shape)
print("y_train_shape", y_train.shape)
print("y_test_shape", y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
X_train_shape (60000, 28, 28)
X_test_shape (10000, 28, 28)
y_train_shape (60000,)
y_test_shape (10000,)


In [4]:
X_train=X_train.reshape(60000,28,28,1)         # reshape 60,000 28 X 28 matrices into
X_test=X_test.reshape(10000,28,28,1)           # reshape 10,000 28 X 28 matrices into

X_train=X_train.astype('float32')              #change integers to 32-bit floating point
X_test=X_test.astype('float32')

X_train /=255                                  #normalize each value for each pixel
X_test /= 255

In [5]:
# one hot format classes
nb_classes=10    # number of unique digits

Y_train=np_utils.to_categorical(y_train,nb_classes)
Y_test=np_utils.to_categorical(y_test,nb_classes)


In [6]:
from keras.engine import sequential
# The Sequential model is a linear stack of layers and is very common
model= Sequential()

In [7]:
#Convolution Layer 1
model.add(Conv2D(32,(3,3),input_shape=(28,28,1)))  #32 different 3X3 Kernels, so 32 feature maps

When we compute a BatchNormalization along an axis, we preserve the dimensions of the array, and we normalize with respect to the mean and standard deviation over every other axis. So in your 2D example BatchNormalization with axis=1 is subtracting the mean for axis=0, just as you expect. 

In [8]:
model.add(BatchNormalization(axis=-1))       # normalize each feature map before activation

In [9]:
convLayer01 = Activation('relu')             # activation

In [10]:
#ConvolutionLayer2
model.add(Conv2D(32,(3,3)))                  #32 different 3X3 Kernels, so 32 feature maps

In [11]:
model.add(BatchNormalization(axis=-1))       # normalize each feature map before activation

In [12]:
convLayer02 = Activation('relu')             # activation

In [13]:
convLayer02=MaxPooling2D(pool_size=(2,2))    # Pool the max values over a 2X2 Kernel

In [14]:
model.add(convLayer02)

In [15]:
#convolutionLayer3
model.add(Conv2D(64,(3,3)))                     # 64 different 3X3 Kernels, so 64 feature maps
model.add(BatchNormalization(axis=-1))          # normalize each feature map before activation
convLayer03=Activation('relu')                  # activation
model.add(convLayer03)

In [16]:
#convolutionLayer4
model.add(Conv2D(64,(3,3)))                     # 64 different 3X3 Kernels, so 64 feature maps
model.add(BatchNormalization(axis=-1))          # normalize each feature map before activation
convLayer03=Activation('relu')                  # activation
convLayer04=MaxPooling2D(pool_size=(2,2))       # Pool the max values over a 2X2 Kernel
model.add(convLayer04)
model.add(Flatten())                            # flatten final 4X4X64 

In [17]:
#Fully Connected Layer 5
model.add(Dense(512))                           # 512 FCN nodes
model.add(BatchNormalization())                 # normalisation
model.add(Activation('relu'))                   # activation

In [18]:
#Fully Connected layer 6
model.add(Dropout(0.2))                         # 20% dropout of random 
model.add(Dense(10))                            # final 10 FCN nodes
model.add(Activation('softmax'))                # softmax activation

In [19]:
#Summarize the built model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 batch_normalization (BatchN  (None, 26, 26, 32)       128       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 32)        9248      
                                                                 
 batch_normalization_1 (Batc  (None, 24, 24, 32)       128       
 hNormalization)                                                 
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 12, 32)       0         
 )                                                               
                                                        

In [20]:
# let's use the Adam optimizer for learning 
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

In [21]:
# data augmentation prevents overfitting by slightly changing the data randomly
# Keras has a great built-in feature to do automatic augmentation
gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08,
                         shear_range=0.3,
                         height_shift_range=0.08, zoom_range=0.08)
test_gen = ImageDataGenerator()

In [22]:
# We can then feed our augmented data in batches
# Besides loss function consideration as before, this method actually results in significant memory savings because we are actually
# LOADING the data into the network in batches before processing each batch

train_generator= gen.flow(X_train,Y_train,batch_size=128)
test_generator= test_gen.flow(X_test,Y_test,batch_size=128)

In [23]:
# We can now train our model which is fed data by our batch loader steps per epoch should always be total
# size of the set divided by the batch size

# SIGNIFICANT MEMORY SAVINGS (important for larger, deeper networks)
model.fit_generator(train_generator,steps_per_epoch=60000//128,epochs=5,verbose=1,
                    validation_data=test_generator,
                    validation_steps=10000//128)

  import sys


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f0fa363b650>

In [24]:
score=model.evaluate(X_test,Y_test)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Test score: 0.0313723124563694
Test accuracy: 0.9904999732971191
