In [1]:
##Import relevant packages

import numpy as np
import tensorflow as tf

In [2]:
!pip install tensorflow-datasets




In [3]:
import tensorflow_datasets as tfds


In [4]:
##Data chosen is MNIST

mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [5]:
##Split dataset into train, validation and test datasets

mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

##No validation split in the original dataset which means I have to split myself. I will use 10% of the train data as validation dataset
##Note that mnist_info.splits splits the dataset based on the info contained in the dataset(here: no of samples)
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples

##The tf.cast function helps round up the results from splits into an int just incase it is a float
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

##Do same for test dataset
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

##Scale inputs
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)

test_data = mnist_test.map(scale)

##Shuffle dataset in batches. I set buffer size to 10000
BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

##Batch train data cos of backward propagation using '.batch()' function

BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

##To reshape the validation dataset
validation_inputs, validation_targets = next(iter(validation_data))


In [6]:
##Outline the model

input_size = 784
output_size = 10
hidden_layer_size= 200

model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape=(28,28,1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax'),
                            ])

  super().__init__(**kwargs)


In [7]:
##Choose the optimizer and the loss function

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [8]:
##Training the model

NUM_EPOCHS = 5

model.fit(train_data, epochs = NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 6s - 11ms/step - accuracy: 0.9204 - loss: 0.2672 - val_accuracy: 0.9628 - val_loss: 0.1249
Epoch 2/5
540/540 - 4s - 8ms/step - accuracy: 0.9689 - loss: 0.1004 - val_accuracy: 0.9765 - val_loss: 0.0808
Epoch 3/5
540/540 - 4s - 8ms/step - accuracy: 0.9783 - loss: 0.0695 - val_accuracy: 0.9727 - val_loss: 0.0904
Epoch 4/5
540/540 - 4s - 7ms/step - accuracy: 0.9837 - loss: 0.0520 - val_accuracy: 0.9843 - val_loss: 0.0548
Epoch 5/5
540/540 - 4s - 7ms/step - accuracy: 0.9862 - loss: 0.0422 - val_accuracy: 0.9895 - val_loss: 0.0413


<keras.src.callbacks.history.History at 0x27c1bb4ecf0>

To improve accuracy of the validation dataset (which is the deciding factor here), I switched the hidden layer size from 50 to 100 to 200, and the added one more hidden layer and found increase in accuracy than the previous trainings. This further reflects and helps me realize how important these hyperparaneters and activations are to a model. 

In [9]:
model.evaluate(test_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 351ms/step - accuracy: 0.9799 - loss: 0.0674


[0.06740030646324158, 0.9799000024795532]

97% accuracy on test data!!! Not bad!