In [1]:
# Importing the relevant packages
import tensorflow as tf
import tensorflow_datasets as tfds

## Downloading and preprocessing the data

In [None]:
# Before continuing with our model and training, our first job is to preprocess the dataset
# This is a very important step in all of machine learning

# The MNIST dataset is, in general, highly processed already - after all its 28x28 grayscale images of clearly visible digits
# Thus, our preprocessing will be limited to scaling the pixel values, shuffling the data and creating a validation set

# NOTE: When finally deploying a model in practice, it might be a good idea to include the prerpocessing as initial layers
# In that way, the users could just plug the data (images) directly, instead of being required to resize/rescale it before

In [2]:
# Defining some constants/hyperparameters
BUFFER_SIZE = 70_000 # for reshuffling
BATCH_SIZE = 128
NUM_EPOCHS = 20

In [3]:
# Downloading the MNIST dataset

# When 'with_info' is set to True, tfds.load() returns two variables:
# - the dataset (including the train and test sets) is like a dictionary
# - meta info regarding the dataset itself

mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)


Downloading and preparing dataset 11.06 MiB (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to /root/tensorflow_datasets/mnist/3.0.1...


Dl Completed...:   0%|          | 0/5 [00:00<?, ? file/s]

Dataset mnist downloaded and prepared to /root/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.


In [4]:
# Extracting the train and test datasets
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']


In [5]:
# Creating a function to scale our image data (it is recommended to scale the pixel values in the range [0,1] )
def scale(image, label):
    image = tf.cast(image, tf.float32) # to load the image for better and no error
    image /= 255.

    return image, label

In [6]:
# Scaling the data from 0, 1 instead of  0, 255, using the map function.
train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)


In [7]:
# Defining the size of the validation set
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)
num_validation_samples

<tf.Tensor: shape=(), dtype=int64, numpy=6000>

In [8]:
# Defining the size of the test set
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [9]:
# Reshuffling the dataset
train_and_validation_data = train_and_validation_data.shuffle(BUFFER_SIZE)

In [10]:
# Splitting the dataset into training + validation
train_data = train_and_validation_data.skip(num_validation_samples)
validation_data = train_and_validation_data.take(num_validation_samples)

In [11]:
# Batching the data
# NOTE: For proper functioning of the model, we need to create one big batch for the validation and test sets
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

## Creating the model and training it

In [12]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(50, 5, activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(50, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
model.summary()

In [14]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

In [15]:
early_stoping=tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0.001,
    patience=2,
    verbose=1,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
)

In [16]:
model.fit(
    train_data,
    epochs=NUM_EPOCHS,
    callbacks=[early_stoping],
    validation_data=validation_data,
    verbose=2
)

Epoch 1/20
422/422 - 68s - 162ms/step - accuracy: 0.9227 - loss: 0.2760 - val_accuracy: 0.9797 - val_loss: 0.0753
Epoch 2/20
422/422 - 76s - 181ms/step - accuracy: 0.9782 - loss: 0.0731 - val_accuracy: 0.9838 - val_loss: 0.0539
Epoch 3/20
422/422 - 63s - 148ms/step - accuracy: 0.9830 - loss: 0.0548 - val_accuracy: 0.9847 - val_loss: 0.0566
Epoch 4/20
422/422 - 83s - 196ms/step - accuracy: 0.9861 - loss: 0.0451 - val_accuracy: 0.9872 - val_loss: 0.0454
Epoch 5/20
422/422 - 63s - 148ms/step - accuracy: 0.9883 - loss: 0.0377 - val_accuracy: 0.9895 - val_loss: 0.0325
Epoch 6/20
422/422 - 85s - 202ms/step - accuracy: 0.9896 - loss: 0.0333 - val_accuracy: 0.9907 - val_loss: 0.0339
Epoch 7/20
422/422 - 67s - 160ms/step - accuracy: 0.9911 - loss: 0.0286 - val_accuracy: 0.9918 - val_loss: 0.0254
Epoch 8/20
422/422 - 67s - 159ms/step - accuracy: 0.9922 - loss: 0.0249 - val_accuracy: 0.9918 - val_loss: 0.0287
Epoch 9/20
422/422 - 68s - 160ms/step - accuracy: 0.9935 - loss: 0.0215 - val_accuracy: 

<keras.src.callbacks.history.History at 0x7991884d2230>

In [17]:
test_loss, test_accuracy = model.evaluate(test_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.9904 - loss: 0.0304


## Testing our model

In [19]:
print('Test loss: {0:.4f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.0304. Test accuracy: 99.04%


In [20]:
import matplotlib.pyplot as plt
import numpy as np

### Plotting images and the results

In [None]:
for images, labels in test_data.take(1):
    test_images = images.numpy()
    test_labels = labels.numpy()
images_plot =