### Importing the Libraries

In [36]:
## importing libraries
import tensorflow as tf
from tensorflow.keras import utils
from tensorflow.keras.datasets import mnist 
import seaborn as sns
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Activation 
import keras
from keras.layers.normalization import BatchNormalization
from tensorflow.keras.layers import Dropout

### Loading the data

In [2]:
## Loading the data directly to train and test
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [3]:
print("Total Number of training images", X_train.shape[0], " Dimensions:", X_train.shape[1], '*', X_train.shape[2])
print('Total Number of testing images', X_test.shape[0], " Dimensions:", X_test.shape[1], '*', X_test.shape[2])

Total Number of training images 60000  Dimensions: 28 * 28
Total Number of testing images 10000  Dimensions: 28 * 28


### Data Preparation Steps

In [4]:
## Converting the 28*28 pixel image in a 1D vector of 1*784

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1]*X_test.shape[2])

In [5]:
print(X_train.shape)
print(X_test.shape)

(60000, 784)
(10000, 784)


In [6]:
print(X_train[5])

[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0  13  25 100 122   7   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0  33 151 208 252 252 252 146   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0  40 152 244 252 253 224 211 252 232
  40   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  15
 152 239 252 252 252 216  31  37 252 252  60   0   

In [7]:
## Data Normalization
X_train = X_train/255  ## As the value ranges from 0 to 255
X_test = X_test/255

In [43]:
## Converting the target variable into a 10D vector/ Like a one hot encoding for the target variable
Y_train = utils.to_categorical(Y_train,10)
Y_test = utils.to_categorical(Y_test,10)

### Training Parameters

In [13]:
## Model Parmaters
output = 10
input_dim = X_train.shape[1]
batch = 128
epochs = 20

### Model 1: - MLP with Sigmoid activations and Adam Optimizer

In [14]:
## Building an MLP with Sigmoid activation using Adam Optimizer
model_sig = Sequential()
model_sig.add(Dense(512, activation='sigmoid', input_shape=(input_dim,)))
model_sig.add(Dense(128, activation='sigmoid'))
model_sig.add(Dense(output, activation = 'softmax'))

model_sig.summary()
model_sig.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_sig.fit(X_train, Y_train, batch_size=batch, epochs=epochs, verbose=1, validation_data=(X_test, Y_test))

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_4 (Dense)              (None, 128)               65664     
_________________________________________________________________
dense_5 (Dense)              (None, 10)                1290      
Total params: 468,874
Trainable params: 468,874
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [19]:
## Model Evaluation
prediction = model_sig.evaluate(X_test, Y_test, verbose=0)
print("Test Loss:", prediction[0],  " Test Accuracy:", prediction[1])

Test Loss: 0.07478052209437083  Test Accuracy: 0.9811


### Model 2: - MLP With Relu activations and Adam Optimizer

In [21]:
## Model 2:- MLP with relu activations using adam optimizer
model_relu = Sequential()
model_relu.add(Dense(512, activation='relu', input_shape=(input_dim,), kernel_initializer=RandomNormal(mean=0.0, stddev=0.062, seed=None)))
model_relu.add(Dense(128, activation='relu', kernel_initializer=RandomNormal(mean=0.0, stddev=0.125, seed=None)) )
model_relu.add(Dense(output, activation='softmax'))

print(model_relu.summary())

model_relu.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_relu.fit(X_train, Y_train, batch_size=batch, epochs=epochs, verbose=1, validation_data=(X_test, Y_test))

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_9 (Dense)              (None, 128)               65664     
_________________________________________________________________
dense_10 (Dense)             (None, 10)                1290      
Total params: 468,874
Trainable params: 468,874
Non-trainable params: 0
_________________________________________________________________
None
Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [23]:
## Model Evaluation
prediction = model_relu.evaluate(X_test, Y_test, verbose=0)
print("Test Loss:", prediction[0],  " Test Accuracy:", prediction[1])

Test Loss: 0.09018854618649179  Test Accuracy: 0.9823


### Model 3:- MLP with Sigmoid activations and Dropout Layers

In [41]:
## Model 3: - with sigmoid activations and some dropout layers

model_dropout = Sequential()

model_dropout.add(Dense(512, activation='sigmoid', input_shape=(input_dim,), kernel_initializer=RandomNormal(mean=0.0, stddev=0.039, seed=None)))
model_dropout.add(Dropout(0.5))

model_dropout.add(Dense(128, activation='sigmoid', kernel_initializer=RandomNormal(mean=0.0, stddev=0.55, seed=None)) )
model_dropout.add(Dropout(0.5))

model_dropout.add(Dense(output, activation='softmax'))

model_dropout.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model_dropout.fit(X_train, Y_train, batch_size=batch, epochs=epochs, verbose=1, validation_data=(X_test, Y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [42]:
## Model evaluation
prediction = model_dropout.evaluate(X_test, Y_test, verbose=0) 
print('Test loss:', prediction[0]) 
print('Test accuracy:', prediction[1])

Test loss: 0.06742319069547811
Test accuracy: 0.9802
