In [1]:
from keras.utils import np_utils
from keras.datasets import mnist
from keras.initializers import RandomNormal

Using TensorFlow backend.


In [2]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import time
# https://gist.github.com/greydanus/f6eee59eaf1d90fcb3b534a25362cea4
# https://stackoverflow.com/a/14434334
# this function is used to update the plots for each epoch and error
def plt_dynamic(x, vy, ty, ax, colors=['b']):
    ax.plot(x, vy, 'b', label="Validation Loss")
    ax.plot(x, ty, 'r', label="Train Loss")
    plt.legend()
    plt.grid()
    fig.canvas.draw()

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [4]:
print("Number of training examples :", X_train.shape[0], "and each image is of shape (%d, %d)"%(X_train.shape[1], X_train.shape[2]))
print("Number of training examples :", X_test.shape[0], "and each image is of shape (%d, %d)"%(X_test.shape[1], X_test.shape[2]))

Number of training examples : 60000 and each image is of shape (28, 28)
Number of training examples : 10000 and each image is of shape (28, 28)


In [5]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2])

In [6]:
X_train = X_train/255
X_test = X_test/255

In [7]:
print(X_train[0])

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

In [8]:
print("After reshaping the training sample :", X_train.shape[0], "and each image is of shape (%d)"%(X_train.shape[1]))
print("After reshaping the testing sample :", X_test.shape[0], "and each image is of shape (%d)"%(X_test.shape[1]))

After reshaping the training sample : 60000 and each image is of shape (784)
After reshaping the testing sample : 10000 and each image is of shape (784)


In [9]:
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

In [10]:
print("After converting the output into a vector : ",y_train[0])
print("After converting the output into a vector : ",y_test[0])

After converting the output into a vector :  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
After converting the output into a vector :  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]


In [11]:
from keras.models import Sequential
from keras.layers import Activation, Dropout, Dense, BatchNormalization

# 2 layer Architecture

In [13]:
output_dim = 10
input_dim = X_train.shape[1]
batch_size = 128
epochs = 20

In [16]:
model = Sequential()

hidden_layer_1 = Dense(512,input_shape=(input_dim,),activation='relu',name='hidden_layer_1')
hidden_layer_2 = Dense(128,activation='relu',name='hidden_layer_2')
output = Dense(output_dim, input_dim=input_dim, activation='softmax', name="output_layer")

model.add(hidden_layer_1)
model.add(hidden_layer_2)
model.add(output)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

























In [17]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_layer_1 (Dense)       (None, 512)               401920    
_________________________________________________________________
hidden_layer_2 (Dense)       (None, 128)               65664     
_________________________________________________________________
output_layer (Dense)         (None, 10)                1290      
Total params: 468,874
Trainable params: 468,874
Non-trainable params: 0
_________________________________________________________________


In [50]:
history = model.fit(X_train, y_train, batch_size=batch_size,
                    epochs=epochs, validation_data=(X_test, y_test),
                    verbose=1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [51]:
score = model.evaluate(X_test, y_test, verbose=0) 

In [52]:
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

Test score: 0.10698660328163441
Test accuracy: 0.9806


In [53]:
fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,epochs+1))
vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

<IPython.core.display.Javascript object>

# 2 Layers with batch Normalization

In [19]:
model = Sequential()

hidden_layer_1 = Dense(512,input_shape=(input_dim,),activation='relu',name='hidden_layer_1')
batch_1 = BatchNormalization()
hidden_layer_2 = Dense(128,activation='relu',name='hidden_layer_2')
batch_2 = BatchNormalization()
output = Dense(output_dim, input_dim=input_dim, activation='softmax', name="output_layer")

model.add(hidden_layer_1)
model.add(batch_1)
model.add(hidden_layer_2)
model.add(batch_2)
model.add(output)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_layer_1 (Dense)       (None, 512)               401920    
_________________________________________________________________
batch_normalization_2 (Batch (None, 512)               2048      
_________________________________________________________________
hidden_layer_2 (Dense)       (None, 128)               65664     
_________________________________________________________________
batch_normalization_3 (Batch (None, 128)               512       
_________________________________________________________________
output_layer (Dense)         (None, 10)                1290      
Total params: 471,434
Trainable params: 470,154
Non-trainable params: 1,280
_________________________________________________________________


In [21]:
epochs=10
history = model.fit(X_train, y_train, batch_size=batch_size,
                    epochs=epochs, validation_data=(X_test, y_test),
                    verbose=1)



















Train on 60000 samples, validate on 10000 samples
Epoch 1/10






























Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [22]:
score = model.evaluate(X_test, y_test, verbose=0)
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

Test score: 0.07202633481275988
Test accuracy: 0.9796


In [24]:
fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,epochs+1))
vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

<IPython.core.display.Javascript object>

# 2 layer architecture with drop out

In [25]:
model = Sequential()

hidden_layer_1 = Dense(512,input_shape=(input_dim,),activation='relu',name='hidden_layer_1')
batch_1 = BatchNormalization()
drop_out_1 = Dropout(rate=0.50)
hidden_layer_2 = Dense(128,activation='relu',name='hidden_layer_2')
drop_out_2 = Dropout(rate=0.25)
batch_2 = BatchNormalization()
output = Dense(output_dim, input_dim=input_dim, activation='softmax', name="output_layer")

model.add(hidden_layer_1)
model.add(batch_1)
model.add(drop_out_1)
model.add(hidden_layer_2)
model.add(batch_2)
model.add(drop_out_2)
model.add(output)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [26]:
epochs=10
history = model.fit(X_train, y_train, batch_size=batch_size,
                    epochs=epochs, validation_data=(X_test, y_test),
                    verbose=1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [27]:
score = model.evaluate(X_test, y_test, verbose=0)
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

Test score: 0.05805287803456886
Test accuracy: 0.9819


In [28]:
fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,epochs+1))
vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

<IPython.core.display.Javascript object>

# 3. Layer Architecture

In [36]:
model = Sequential()
hidden_layer_1 = Dense(500,input_dim=input_dimension,activation='relu',name='hidden_layer_1')
hidden_layer_2 = Dense(200,activation='relu',name='hidden_layer_2')
hidden_layer_3 = Dense(100,activation='relu',name='hidden_layer_3')
output = Dense(output_dimension, input_dim=input_dimension, activation='softmax', name="output_layer")

model.add(hidden_layer_1)
model.add(hidden_layer_2)
model.add(hidden_layer_3)
model.add(output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [37]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_layer_1 (Dense)       (None, 500)               392500    
_________________________________________________________________
hidden_layer_2 (Dense)       (None, 200)               100200    
_________________________________________________________________
hidden_layer_3 (Dense)       (None, 100)               20100     
_________________________________________________________________
output_layer (Dense)         (None, 10)                1010      
Total params: 513,810
Trainable params: 513,810
Non-trainable params: 0
_________________________________________________________________


In [39]:
epochs=20
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [40]:
score = model.evaluate(X_test, y_test, verbose=0) 

In [41]:
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

Test score: 0.10319470737203341
Test accuracy: 0.9843


In [42]:
fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,epochs+1))
vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

<IPython.core.display.Javascript object>

# 3.Layers with batch normalization

In [30]:
model = Sequential()
hidden_layer_1 = Dense(500,input_dim=input_dim,activation='relu',name='hidden_layer_1')
batch_1 = BatchNormalization()
hidden_layer_2 = Dense(200,activation='relu',name='hidden_layer_2')
hidden_layer_3 = Dense(100,activation='relu',name='hidden_layer_3')
output = Dense(output_dim, input_dim=input_dim, activation='softmax', name="output_layer")

model.add(hidden_layer_1)
model.add(batch_1)
model.add(hidden_layer_2)
model.add(hidden_layer_3)
model.add(output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [31]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_layer_1 (Dense)       (None, 500)               392500    
_________________________________________________________________
batch_normalization_6 (Batch (None, 500)               2000      
_________________________________________________________________
hidden_layer_2 (Dense)       (None, 200)               100200    
_________________________________________________________________
hidden_layer_3 (Dense)       (None, 100)               20100     
_________________________________________________________________
output_layer (Dense)         (None, 10)                1010      
Total params: 515,810
Trainable params: 514,810
Non-trainable params: 1,000
_________________________________________________________________


In [32]:
epochs=5
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [33]:
score = model.evaluate(X_test, y_test, verbose=0)
print('Test score:', score[0]) 
print('Test accuracy:', score[1])


fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,epochs+1))
vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.07970767500349903
Test accuracy: 0.9764


<IPython.core.display.Javascript object>

# 3 Layers with Dropouts

In [35]:
model = Sequential()
hidden_layer_1 = Dense(500,input_dim=input_dim,activation='relu',name='hidden_layer_1')
batch_1 = BatchNormalization()
drop_out = Dropout(rate=0.5)
hidden_layer_2 = Dense(200,activation='relu',name='hidden_layer_2')
hidden_layer_3 = Dense(100,activation='relu',name='hidden_layer_3')
output = Dense(output_dim, input_dim=input_dim, activation='softmax', name="output_layer")

model.add(hidden_layer_1)
model.add(batch_1)
model.add(drop_out)
model.add(hidden_layer_2)
model.add(hidden_layer_3)
model.add(output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_layer_1 (Dense)       (None, 500)               392500    
_________________________________________________________________
batch_normalization_8 (Batch (None, 500)               2000      
_________________________________________________________________
dropout_4 (Dropout)          (None, 500)               0         
_________________________________________________________________
hidden_layer_2 (Dense)       (None, 200)               100200    
_________________________________________________________________
hidden_layer_3 (Dense)       (None, 100)               20100     
_________________________________________________________________
output_layer (Dense)         (None, 10)                1010      
Total params: 515,810
Trainable params: 514,810
Non-trainable params: 1,000
_________________________________________________________________


In [36]:
epochs=5
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [37]:
score = model.evaluate(X_test, y_test, verbose=0)
print('Test score:', score[0]) 
print('Test accuracy:', score[1])


fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,epochs+1))
vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.07950529584407341
Test accuracy: 0.9752


<IPython.core.display.Javascript object>

# 5 Layer architecture

In [44]:
model = Sequential()
hidden_layer_1 = Dense(700,input_dim=input_dimension,activation='relu',name='hidden_layer_1')
hidden_layer_2 = Dense(500,activation='relu',name='hidden_layer_2')
hidden_layer_3 = Dense(300,activation='relu',name='hidden_layer_3')
hidden_layer_4 = Dense(150,activation='relu',name='hidden_layer_4')
hidden_layer_5 = Dense(50,activation='relu',name='hidden_layer_5')
output = Dense(output_dimension, input_dim=input_dimension, activation='softmax', name="output_layer")

model.add(hidden_layer_1)
model.add(hidden_layer_2)
model.add(hidden_layer_3)
model.add(hidden_layer_4)
model.add(hidden_layer_5)
model.add(output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_layer_1 (Dense)       (None, 700)               549500    
_________________________________________________________________
hidden_layer_2 (Dense)       (None, 500)               350500    
_________________________________________________________________
hidden_layer_3 (Dense)       (None, 300)               150300    
_________________________________________________________________
hidden_layer_4 (Dense)       (None, 150)               45150     
_________________________________________________________________
hidden_layer_5 (Dense)       (None, 50)                7550      
_________________________________________________________________
output_layer (Dense)         (None, 10)                510       
Total params: 1,103,510
Trainable params: 1,103,510
Non-trainable params: 0
_________________________________________________________________


In [46]:
epochs=5
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [47]:
score = model.evaluate(X_test, y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,epochs+1))
vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.08003212646024621
Test accuracy: 0.9821


<IPython.core.display.Javascript object>

# 5 layers with batch normalization

In [39]:
model = Sequential()
hidden_layer_1 = Dense(700,input_dim=input_dim,activation='relu',name='hidden_layer_1')
batch_1 = BatchNormalization()
hidden_layer_2 = Dense(500,activation='relu',name='hidden_layer_2')
batch_2 = BatchNormalization()
hidden_layer_3 = Dense(300,activation='relu',name='hidden_layer_3')
hidden_layer_4 = Dense(150,activation='relu',name='hidden_layer_4')
hidden_layer_5 = Dense(50,activation='relu',name='hidden_layer_5')
output = Dense(output_dim, input_dim=input_dim, activation='softmax', name="output_layer")

model.add(hidden_layer_1)
model.add(batch_1)
model.add(hidden_layer_2)
model.add(batch_2)
model.add(hidden_layer_3)
model.add(hidden_layer_4)
model.add(hidden_layer_5)
model.add(output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_layer_1 (Dense)       (None, 700)               549500    
_________________________________________________________________
batch_normalization_9 (Batch (None, 700)               2800      
_________________________________________________________________
hidden_layer_2 (Dense)       (None, 500)               350500    
_________________________________________________________________
batch_normalization_10 (Batc (None, 500)               2000      
_________________________________________________________________
hidden_layer_3 (Dense)       (None, 300)               150300    
_________________________________________________________________
hidden_layer_4 (Dense)       (None, 150)               45150     
_________________________________________________________________
hidden_layer_5 (Dense)       (None, 50)                7550      
__________

In [40]:
epochs=5
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [41]:
score = model.evaluate(X_test, y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,epochs+1))
vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.09106961331287167
Test accuracy: 0.9761


<IPython.core.display.Javascript object>

# 5 layer architecture with Dropouts

In [42]:
model = Sequential()
hidden_layer_1 = Dense(700,input_dim=input_dim,activation='relu',name='hidden_layer_1')
batch_1 = BatchNormalization()
droput_out_1 = Dropout(rate=0.5)
hidden_layer_2 = Dense(500,activation='relu',name='hidden_layer_2')
batch_2 = BatchNormalization()
droput_out_2 = Dropout(rate=0.5)
hidden_layer_3 = Dense(300,activation='relu',name='hidden_layer_3')
hidden_layer_4 = Dense(150,activation='relu',name='hidden_layer_4')
hidden_layer_5 = Dense(50,activation='relu',name='hidden_layer_5')
output = Dense(output_dim, input_dim=input_dim, activation='softmax', name="output_layer")

model.add(hidden_layer_1)
model.add(batch_1)
model.add(drop_out_1)
model.add(hidden_layer_2)
model.add(batch_2)
model.add(drop_out_2)
model.add(hidden_layer_3)
model.add(hidden_layer_4)
model.add(hidden_layer_5)
model.add(output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden_layer_1 (Dense)       (None, 700)               549500    
_________________________________________________________________
batch_normalization_11 (Batc (None, 700)               2800      
_________________________________________________________________
dropout_1 (Dropout)          multiple                  0         
_________________________________________________________________
hidden_layer_2 (Dense)       (None, 500)               350500    
_________________________________________________________________
batch_normalization_12 (Batc (None, 500)               2000      
_________________________________________________________________
dropout_2 (Dropout)          multiple                  0         
_________________________________________________________________
hidden_layer_3 (Dense)       (None, 300)               150300    
__________

In [43]:
epochs=5
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1)

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [44]:
score = model.evaluate(X_test, y_test, verbose=0) 
print('Test score:', score[0]) 
print('Test accuracy:', score[1])

fig,ax = plt.subplots(1,1)
ax.set_xlabel('epoch') ; ax.set_ylabel('Categorical Crossentropy Loss')

# list of epoch numbers
x = list(range(1,epochs+1))
vy = history.history['val_loss']
ty = history.history['loss']
plt_dynamic(x, vy, ty, ax)

Test score: 0.07919207404989284
Test accuracy: 0.9747


<IPython.core.display.Javascript object>