# importing necessary libraries

In [2]:
from tensorflow.keras.datasets import mnist
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib qt

# loading dataset

In [3]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [4]:
# we have to convert the labels into categorical data for processing
from tensorflow.keras.utils import to_categorical

In [5]:
print('Training data shape:',train_images.shape, train_labels.shape)
print('Test data shape:',test_images.shape, test_labels.shape)

Training data shape: (60000, 28, 28) (60000,)
Test data shape: (10000, 28, 28) (10000,)


In [6]:
# Number of unique labels in training data

In [7]:
%matplotlib qt
classes = np.unique(train_labels)
classes_num = len(classes)
print('total no of outputs:',classes_num)
print('output classes:',classes)

plt.figure(figsize=[10,5])

# Display the first image in training data

plt.subplot(1,2,1)
plt.imshow(train_images[0,:,:],cmap='gray')
plt.title('Ground truth : {}'.format(train_labels[0]))

# Display the first image in test data

plt.subplot(1,2,2)
plt.imshow(test_images[0,:,:],cmap='gray')
plt.title('Ground Truth : {}'.format(test_labels[0]))

total no of outputs: 10
output classes: [0 1 2 3 4 5 6 7 8 9]


Text(0.5, 1.0, 'Ground Truth : 7')

<h1>Process the data</h1>

In [8]:
# Change from matrix to array of dimension 28x28 to array of dimension 784
# it will be fed to the network as a single feature
dim_data = np.prod(train_images.shape[1:])
train_data = train_images.reshape(train_images.shape[0], dim_data)
test_data = test_images.reshape(test_images.shape[0], dim_data)

In [9]:
# Change to float datatype and scale values b/w 0 to 1
train_data = train_data.astype('float32')
test_data = test_data.astype('float32')

In [10]:
"""
Convert the labels from integer to categorical ( one-hot ) encoding
since that is the format required by Keras to perform multiclass
classification. One-hot encoding is a type of boolean representation of 
integer data. It converts the integer to an array of all zeros except a 
1 at the index of the integer.
For example, using a one-hot encoding for 10 classes, 
the integer 5 will be encoded as 0000010000.
"""
# Change the labels from integer to categorical data
train_labels_one_hot = to_categorical(train_labels)
test_labels_one_hot = to_categorical(test_labels)

<h1> Creating the network

In [11]:
"""
we will be using a network with 2 hidden layers and an output layer
with 10 units. The number of units in the hidden layers is 
kept to be 512. The input to the network is the 784-dimensional array 
converted from the 28×28 image.

We will use the Sequential model for building the network.
In the Sequential model, we can just stack up layers by adding the desired 
layer one by one. We use the Dense layer, also called fully connected layer
since we are building a feedforward network in which all the neurons from one layer
are connected to the neurons in the previous layer. Apart from the Dense layer,
we add the ReLU activation function which is required to introduce 
non-linearity to the model. This will help the network learn non-linear
decision boundaries. The last layer is a softmax layer as it is a multiclass
classification problem. For binary classification, we can use sigmoid
"""

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(512, activation='relu',input_shape = (dim_data,)))
model.add(Dense(512, activation='relu'))
model.add(Dense(classes_num, activation='softmax'))

<h1> Configure the network

In [12]:
"""
configure the optimizer to be rmsprop. We also specify 
the loss type which is categorical cross entropy which is used 
for multiclass classification. We also specify the metrics 
( accuracy in this case ) which we want to track during the 
training process. You can also try using any other optimizer
such as adam or SGD.
"""

model.compile(optimizer='rmsprop',loss='categorical_crossentropy',
              metrics=['accuracy'])

<h1>Training the model

In [13]:
"""
The network is ready to get trained. This is done using the fit()
function in Keras. We specify the number of epochs as 20.
This means that the whole dataset will be fed to the network 20 times.
We will be using the test data for validation.
"""

history = model.fit(train_data, train_labels_one_hot, batch_size=256,
                    epochs=20,verbose=1,
                    validation_data=(test_data,test_labels_one_hot))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<h1>Evaluate the trained model

In [14]:
# we check the performance on the whole test data
# using the evaluate() method

[test_loss, test_acc] = model.evaluate(test_data,test_labels_one_hot)
print("Evaluation result on test data : Loss = {}, accuracy = {}"
      .format(test_loss, test_acc))

Evaluation result on test data : Loss = 0.6483175158500671, accuracy = 0.9760000109672546


<h1> check for overfitting

In [15]:
"""
The fit() function returns a history object which has a
dictionary of all the metrics which were required to be tracked
during training. We can use the data in the history object to
plot the loss and accuracy curves to check how the training process went.
You can use the history.history.keys() function to check what
metrics are present in the history. It should look like the following:

['accuracy', 'loss', 'val_accuracy', 'val_loss']
"""

# plot the loss curves

plt.figure(figsize=[8,6])
plt.plot(history.history['loss'],'r',linewidth=3.0)
plt.plot(history.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss','Validation loss'],fontsize=18)
plt.xlabel('Epochs',fontsize=16)
plt.ylabel('Loss',fontsize=16)
plt.title('Loss curves',fontsize=16)

#plot th accuracy curves
plt.figure(figsize=[8,6])
plt.plot(history.history['accuracy'],'r',linewidth=3.0)
plt.plot(history.history['val_accuracy'],'b',linewidth=3.0)
plt.legend(['Training Accuracy','Validation Accuracy'],fontsize=18)
plt.xlabel('Epochs',fontsize=16)
plt.ylabel('Accuracy',fontsize=16)
plt.title('Accuracy curves',fontsize=16)

Text(0.5, 1.0, 'Accuracy curves')

In [65]:
"""
Although the accuracy obtained above is very good,
if you see the loss and accuracy curves in the above figures,
you’ll notice that the validation loss initially decrease,
but then it starts increasing gradually.
Also, there is a substantial difference between the training
and test accuracy. This is a clear sign of Overfitting
which means that the network has memorized the training 
data very well, but is not guaranteed to work on unseen data.
Thus, the difference in the training and test accuracy.
"""
print('Overfitting')

Overfitting


# Adding regularization to the model

<h1>Overfitting occurs mainly because the network parameters are getting too biased towards the training data. We can add a dropout layer to overcome this problem to a certain extent. In case of dropout, a fraction of neurons is randomly turned off during the training process, reducing the dependency on the training set by some amount.

In [34]:
from tensorflow.keras.layers import Dropout

model_reg = Sequential()
model_reg.add(Dense(512, activation='relu', input_shape=(dim_data,)))
model_reg.add(Dropout(0.4))
model_reg.add(Dense(512, activation='relu'))
model_reg.add(Dropout(0.4))
model_reg.add(Dense(classes_num, activation='softmax'))

# check performance after regularization

In [35]:
model_reg.compile(optimizer='rmsprop',loss='categorical_crossentropy',
              metrics=['accuracy'])

In [36]:
history_reg = model_reg.fit(train_data, train_labels_one_hot, batch_size=256,
                    epochs=20,verbose=1,
                    validation_data=(test_data,test_labels_one_hot))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [30]:
#Plot the Loss Curves
plt.figure(figsize=[8,6])
plt.plot(history_reg.history['loss'],'r',linewidth=3.0)
plt.plot(history_reg.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss', 'Validation Loss'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Loss',fontsize=16)
plt.title('Loss Curves',fontsize=16)

#Plot the Accuracy Curves
plt.figure(figsize=[8,6])
plt.plot(history_reg.history['accuracy'],'r',linewidth=3.0)
plt.plot(history_reg.history['val_accuracy'],'b',linewidth=3.0)
plt.legend(['Training Accuracy', 'Validation Accuracy'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Accuracy',fontsize=16)
plt.title('Accuracy Curves',fontsize=16)

Text(0.5, 1.0, 'Accuracy Curves')

In [20]:
# our first image in the test set is number 7
# let's see what our model predicts

In [21]:
# predict the image
print('Model prediction:{}'.format(model_reg.predict_classes
                                  (test_data[[4],:])[0]))

# display the predicted image
plt.imshow(test_images[4],cmap='gray')
plt.title('Ground truth: {}'.format(test_labels[4]))

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
Model prediction:4


Text(0.5, 1, 'Ground truth: 4')

In [107]:
#Prediction score
model_reg.predict(test_data[[4],:])

array([[2.2336685e-24, 2.6785913e-14, 4.8821407e-13, 3.7296501e-23,
        1.0000000e+00, 2.4403984e-17, 1.1423391e-18, 1.1998451e-10,
        4.2371769e-15, 5.1824118e-09]], dtype=float32)

In [136]:
# above we can see that the 5th index predicts a 
# score of 1 which means the confidence of being 
# digit 4

# I tried this model using relu, sigmoid and tanh in which relu gave best accuracy on training data but also resulted into overfitting.
# After regularization it showed the best results