In [1]:
# Importing the relevant packages
import tensorflow as tf
import tensorflow_datasets as tfds
import datetime

## Downloading and preprocessing the data

In [2]:
# Defining some constants/hyperparameters
BUFFER_SIZE = 70_000 # for reshuffling
BATCH_SIZE = 128
NUM_EPOCHS = 20

In [3]:
# Downloading the MNIST dataset
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [4]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

In [5]:
# Creating a function to scale our data
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.

    return image, label

In [6]:
# Scaling the data
train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

In [7]:
# Defining the size of validation set
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

In [8]:
# Defining size of test set
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [9]:
# Reshuffling the dataset
train_and_validation_data = train_and_validation_data.shuffle(BUFFER_SIZE)

In [10]:
# Splitting the dataset into training + validation
train_data = train_and_validation_data.skip(num_validation_samples)
validation_data = train_and_validation_data.take(num_validation_samples)

In [11]:
# Batching the data
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

## Creating the model and training it

In [12]:
# Now that we have preprocessed the dataset, we can define our CNN and train it

In [13]:
# Outlining the model/architecture of our CNN
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(50, 5, activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Conv2D(50, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)), 
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10)
])

In [14]:
# A brief summary of the model and parameters
model.summary(line_length = 75)

Model: "sequential"
___________________________________________________________________________
Layer (type)                     Output Shape                  Param #     
conv2d (Conv2D)                  (None, 24, 24, 50)            1300        
___________________________________________________________________________
max_pooling2d (MaxPooling2D)     (None, 12, 12, 50)            0           
___________________________________________________________________________
conv2d_1 (Conv2D)                (None, 10, 10, 50)            22550       
___________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)   (None, 5, 5, 50)              0           
___________________________________________________________________________
flatten (Flatten)                (None, 1250)                  0           
___________________________________________________________________________
dense (Dense)                    (None, 10)                    12510

In [15]:
# Defining the loss function
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [16]:
# Compiling the model with Adam optimizer and the cathegorical crossentropy as a loss function
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

In [17]:
# Defining early stopping to prevent overfitting
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    mode = 'auto',
    min_delta = 0,
    patience = 2,
    verbose = 0, 
    restore_best_weights = True
)

In [18]:
# Logging the training process data to use later in tensorboard
log_dir = "logs\\fit\\" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [19]:
# Train the network
model.fit(
    train_data, 
    epochs = NUM_EPOCHS, 
    callbacks = [tensorboard_callback, early_stopping], 
    validation_data = validation_data,
    verbose = 2
)

Epoch 1/20
422/422 - 46s - loss: 0.2689 - accuracy: 0.9208 - val_loss: 0.0808 - val_accuracy: 0.9743
Epoch 2/20
422/422 - 19s - loss: 0.0716 - accuracy: 0.9781 - val_loss: 0.0583 - val_accuracy: 0.9820
Epoch 3/20
422/422 - 20s - loss: 0.0548 - accuracy: 0.9835 - val_loss: 0.0472 - val_accuracy: 0.9860
Epoch 4/20
422/422 - 20s - loss: 0.0443 - accuracy: 0.9861 - val_loss: 0.0342 - val_accuracy: 0.9893
Epoch 5/20
422/422 - 20s - loss: 0.0366 - accuracy: 0.9886 - val_loss: 0.0249 - val_accuracy: 0.9922
Epoch 6/20
422/422 - 20s - loss: 0.0322 - accuracy: 0.9897 - val_loss: 0.0339 - val_accuracy: 0.9893
Epoch 7/20
422/422 - 20s - loss: 0.0279 - accuracy: 0.9914 - val_loss: 0.0223 - val_accuracy: 0.9923
Epoch 8/20
422/422 - 20s - loss: 0.0257 - accuracy: 0.9921 - val_loss: 0.0225 - val_accuracy: 0.9945
Epoch 9/20
422/422 - 20s - loss: 0.0210 - accuracy: 0.9934 - val_loss: 0.0197 - val_accuracy: 0.9943
Epoch 10/20
422/422 - 20s - loss: 0.0187 - accuracy: 0.9946 - val_loss: 0.0151 - val_accura

<tensorflow.python.keras.callbacks.History at 0x21ea984cd68>

## Testing our model

In [20]:
# Testing our model
test_loss, test_accuracy = model.evaluate(test_data)

      1/Unknown - 3s 3s/step - loss: 0.0333 - accuracy: 0.98 - 3s 3s/step - loss: 0.0333 - accuracy: 0.9895

In [21]:
# Printing the test results
print('Test loss: {0:.4f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.0333. Test accuracy: 98.95%


## Visualizing in Tensorboard

In [23]:
# Loading the Tensorboard extension
%load_ext tensorboard
%tensorboard --logdir "logs/fit"

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 21472), started 0:00:54 ago. (Use '!kill 21472' to kill it.)