In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
mnist_data,mnist_info = tfds.load(name='mnist',as_supervised=True,with_info=True)

local data directory. If you'd instead prefer to read directly from our public
GCS bucket (recommended if you're running on GCP), you can instead set
data_dir=gs://tfds-data/datasets.



[1mDownloading and preparing dataset mnist/3.0.0 (download: 11.06 MiB, generated: Unknown size, total: 11.06 MiB) to /root/tensorflow_datasets/mnist/3.0.0...[0m


HBox(children=(FloatProgress(value=0.0, description='Dl Completed...', max=4.0, style=ProgressStyle(descriptio…



[1mDataset mnist downloaded and prepared to /root/tensorflow_datasets/mnist/3.0.0. Subsequent calls will reuse this data.[0m


**Pre Processing the data**

In [3]:
#Split the data into train and test.
mnist_train,mnist_test = mnist_data['train'],mnist_data['test']

#Defining the percentage of the training and validation sets.
num_validation = 0.1 * mnist_info.splits['train'].num_examples #10% of the training will be taken for validation.
num_validation = tf.cast(num_validation, tf.int64)  #10% could be a non whole number. Let's cast into to a whole number.

num_test = mnist_info.splits['test'].num_examples
num_test = tf.cast(num_test, tf.int64)

#We have to scale the image data
def scale(image,label):
  image = tf.cast(image, tf.float32)
  image /= 255 #Since the images are grayscale with values between 0 and 255, let's covert the values between 0 and 1
  return image,label

#Let's use the function scale on train and test datasets
mnist_train_scaled = mnist_train.map(scale)
mnist_test_scaled = mnist_test.map(scale)

#We have to shuffle the data to ensure it is randomly distributed throughout.

Buffer_size = 10000    #A practice for when datasets are huge and all data can't be loaded into the memory.

mnist_train_scaled_shuffled = mnist_train_scaled.shuffle(Buffer_size)

#Splitting train and validation after shuffling
validation_set = mnist_train_scaled_shuffled.take(num_validation)
train_set = mnist_train_scaled_shuffled.skip(num_validation)

# Defining batch size for batch gradient descent.
batch_size = 100

train_set = train_set.batch(batch_size) #Training set needs both FW prop and Back prop. Hence divide it into batches of 100
validation_set = validation_set.batch(num_validation) #Take the entire set as 1 batch
test_set = mnist_test_scaled.batch(num_test) #Take entire test set as 1 batch since it only needs fw prop.

#Extracting and converting validation inputs and targets into seperate variables.
validation_ips,validation_targets = next(iter(validation_set)) #iter makes the data iterable but doesn't load it. Next loads the data.



**Modelling**

In [4]:
#Although CNNs are more efficient for learning image data, we will use a MLP for the Mnist dataaset.

input_size = 784
hidden_layer_size = 200
output_size = 10

model = tf.keras.Sequential([
                             tf.keras.layers.Flatten(input_shape=(28,28,1)),
                             tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
                             tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
                             tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
                             tf.keras.layers.Dense(output_size,activation='softmax')
])

In [5]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy']) #For multiple classes we can either use categorical or 
                                                                                            #sparse categorical.Since we haven't one hot encoded our targets,
                                                                                            #we use sparse categorical which automatically one hot encodes.

In [29]:
epochs = 5
model.fit(train_set,epochs=epochs,verbose=2,validation_data=(validation_ips,validation_targets))

Epoch 1/5
540/540 - 9s - loss: 0.4245 - accuracy: 0.8811 - val_loss: 0.2090 - val_accuracy: 0.9407
Epoch 2/5
540/540 - 9s - loss: 0.1931 - accuracy: 0.9429 - val_loss: 0.1503 - val_accuracy: 0.9568
Epoch 3/5
540/540 - 9s - loss: 0.1440 - accuracy: 0.9575 - val_loss: 0.1297 - val_accuracy: 0.9622
Epoch 4/5
540/540 - 9s - loss: 0.1186 - accuracy: 0.9650 - val_loss: 0.1034 - val_accuracy: 0.9700
Epoch 5/5
540/540 - 9s - loss: 0.1005 - accuracy: 0.9704 - val_loss: 0.0921 - val_accuracy: 0.9735


<tensorflow.python.keras.callbacks.History at 0x7f820b1a47f0>

The first model used the hyperparameters : hidden_layer_size = 50, depth : 2 hidden layers, Relu activation function,  adam optimizer, batch_size : 100

validation accuracy : 97.35%

In [33]:
epochs = 5
model.fit(train_set,epochs=epochs,verbose=2,validation_data=(validation_ips,validation_targets))

Epoch 1/5
540/540 - 10s - loss: 0.2748 - accuracy: 0.9200 - val_loss: 0.1360 - val_accuracy: 0.9615
Epoch 2/5
540/540 - 10s - loss: 0.1027 - accuracy: 0.9694 - val_loss: 0.0817 - val_accuracy: 0.9768
Epoch 3/5
540/540 - 10s - loss: 0.0688 - accuracy: 0.9791 - val_loss: 0.0609 - val_accuracy: 0.9820
Epoch 4/5
540/540 - 9s - loss: 0.0504 - accuracy: 0.9845 - val_loss: 0.0506 - val_accuracy: 0.9847
Epoch 5/5
540/540 - 10s - loss: 0.0391 - accuracy: 0.9875 - val_loss: 0.0436 - val_accuracy: 0.9852


<tensorflow.python.keras.callbacks.History at 0x7f81d4e57978>

The second model used the hyperparameters : hidden_layer_size = 200, depth : 2 hidden layers, Relu activation function, adam optimizer, batch_size : 100

validation accuracy : 98.5%

In [36]:
epochs = 5
model.fit(train_set,epochs=epochs,verbose=2,validation_data=(validation_ips,validation_targets))

Epoch 1/5
540/540 - 10s - loss: 0.2640 - accuracy: 0.9214 - val_loss: 0.1151 - val_accuracy: 0.9673
Epoch 2/5
540/540 - 10s - loss: 0.1037 - accuracy: 0.9668 - val_loss: 0.0831 - val_accuracy: 0.9723
Epoch 3/5
540/540 - 10s - loss: 0.0679 - accuracy: 0.9786 - val_loss: 0.0551 - val_accuracy: 0.9828
Epoch 4/5
540/540 - 10s - loss: 0.0515 - accuracy: 0.9837 - val_loss: 0.0715 - val_accuracy: 0.9768
Epoch 5/5
540/540 - 10s - loss: 0.0441 - accuracy: 0.9856 - val_loss: 0.0446 - val_accuracy: 0.9863


<tensorflow.python.keras.callbacks.History at 0x7f820b207da0>

The third model used the hyperparameters : hidden_layer_size = 200, depth : 3 hidden layers, Relu activation function, adam optimizer, batch_size : 100

validation accuracy : 98.63%

In [40]:
epochs = 5
model.fit(train_set,epochs=epochs,verbose=2,validation_data=(validation_ips,validation_targets))

Epoch 1/5
540/540 - 10s - loss: 0.2805 - accuracy: 0.9166 - val_loss: 0.1561 - val_accuracy: 0.9532
Epoch 2/5
540/540 - 10s - loss: 0.1314 - accuracy: 0.9598 - val_loss: 0.1033 - val_accuracy: 0.9687
Epoch 3/5
540/540 - 10s - loss: 0.0906 - accuracy: 0.9719 - val_loss: 0.0847 - val_accuracy: 0.9743
Epoch 4/5
540/540 - 10s - loss: 0.0658 - accuracy: 0.9786 - val_loss: 0.0610 - val_accuracy: 0.9817
Epoch 5/5
540/540 - 10s - loss: 0.0510 - accuracy: 0.9839 - val_loss: 0.0508 - val_accuracy: 0.9833


<tensorflow.python.keras.callbacks.History at 0x7f820ac6b4e0>

The fourth model used the hyperparameters : hidden_layer_size = 200, depth : 3 hidden layers, tanh activation function, adam optimizer, batch_size : 100

validation accuracy : 98.33%

In [45]:
epochs = 5
model.fit(train_set,epochs=epochs,verbose=2,validation_data=(validation_ips,validation_targets))

Epoch 1/5
540/540 - 11s - loss: 0.2763 - accuracy: 0.9155 - val_loss: 0.1199 - val_accuracy: 0.9655
Epoch 2/5
540/540 - 11s - loss: 0.1049 - accuracy: 0.9673 - val_loss: 0.1006 - val_accuracy: 0.9678
Epoch 3/5
540/540 - 11s - loss: 0.0803 - accuracy: 0.9750 - val_loss: 0.0726 - val_accuracy: 0.9773
Epoch 4/5
540/540 - 11s - loss: 0.0613 - accuracy: 0.9815 - val_loss: 0.0644 - val_accuracy: 0.9798
Epoch 5/5
540/540 - 11s - loss: 0.0504 - accuracy: 0.9843 - val_loss: 0.0544 - val_accuracy: 0.9837


<tensorflow.python.keras.callbacks.History at 0x7f820031db00>

The fifth model used the hyperparameters : hidden_layer_size = 200, depth : 5 hidden layers, relu activation function, adam optimizer, batch_size : 100

validation accuracy : 98.37%

In [6]:
epochs = 10
model.fit(train_set,epochs=epochs,verbose=2,validation_data=(validation_ips,validation_targets))

Epoch 1/10
540/540 - 9s - loss: 0.2635 - accuracy: 0.9204 - val_loss: 0.1256 - val_accuracy: 0.9608
Epoch 2/10
540/540 - 9s - loss: 0.1002 - accuracy: 0.9692 - val_loss: 0.0805 - val_accuracy: 0.9768
Epoch 3/10
540/540 - 9s - loss: 0.0696 - accuracy: 0.9789 - val_loss: 0.0758 - val_accuracy: 0.9770
Epoch 4/10
540/540 - 9s - loss: 0.0500 - accuracy: 0.9844 - val_loss: 0.0602 - val_accuracy: 0.9830
Epoch 5/10
540/540 - 9s - loss: 0.0421 - accuracy: 0.9869 - val_loss: 0.0461 - val_accuracy: 0.9868
Epoch 6/10
540/540 - 9s - loss: 0.0348 - accuracy: 0.9885 - val_loss: 0.0365 - val_accuracy: 0.9903
Epoch 7/10
540/540 - 9s - loss: 0.0289 - accuracy: 0.9909 - val_loss: 0.0409 - val_accuracy: 0.9878
Epoch 8/10
540/540 - 9s - loss: 0.0237 - accuracy: 0.9929 - val_loss: 0.0269 - val_accuracy: 0.9918
Epoch 9/10
540/540 - 9s - loss: 0.0221 - accuracy: 0.9923 - val_loss: 0.0300 - val_accuracy: 0.9907
Epoch 10/10
540/540 - 9s - loss: 0.0218 - accuracy: 0.9926 - val_loss: 0.0301 - val_accuracy: 0.9905

<tensorflow.python.keras.callbacks.History at 0x7fc70250fcf8>

The sixth model used the hyperparameters : hidden_layer_size = 200, depth : 3 hidden layers, relu activation function, adam optimizer, batch_size : 100

validation accuracy : 99.38%

In [9]:
##Testing the model on the test data.
test_loss,test_accuracy = model.evaluate(train_set)

print('Test loss : {loss:.2f}, Test accuracy : {accuracy:.2f}%'.format(loss=test_loss,accuracy=test_accuracy*100))

Test loss : 0.02, Test accuracy : 99.48%
