In [1]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds

In [2]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [3]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

In [4]:
num_validtion_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validtion_samples = tf.cast(num_validtion_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [5]:
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

In [6]:
BUFFER_SIZE = 10000
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)
validation_data = shuffled_train_and_validation_data.take(num_validtion_samples)
train_data = shuffled_train_and_validation_data.skip(num_validtion_samples)

In [7]:
BATCH_SIZE = 100
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validtion_samples)
test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))
# iter to make validation data iterable, next load the next batch. since there is only one batch it will load inputs and targets

In [8]:
input_size = 784
output_size = 10
hidden_layer_size = 50

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [10]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [11]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 7s - loss: 0.4077 - accuracy: 0.8847 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 7s - loss: 0.1841 - accuracy: 0.9461 - val_loss: 0.1803 - val_accuracy: 0.9522
Epoch 3/5
540/540 - 8s - loss: 0.1375 - accuracy: 0.9602 - val_loss: 0.1618 - val_accuracy: 0.9587
Epoch 4/5
540/540 - 9s - loss: 0.1127 - accuracy: 0.9666 - val_loss: 0.1254 - val_accuracy: 0.9652
Epoch 5/5
540/540 - 7s - loss: 0.0961 - accuracy: 0.9713 - val_loss: 0.1092 - val_accuracy: 0.9697


<tensorflow.python.keras.callbacks.History at 0x135bcc780>

In [12]:
input_size = 784
output_size = 10
hidden_layer_size = 100

In [13]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [14]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [15]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 8s - loss: 0.3350 - accuracy: 0.9037 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 7s - loss: 0.1381 - accuracy: 0.9594 - val_loss: 0.1441 - val_accuracy: 0.9602
Epoch 3/5
540/540 - 7s - loss: 0.0948 - accuracy: 0.9713 - val_loss: 0.1042 - val_accuracy: 0.9712
Epoch 4/5
540/540 - 7s - loss: 0.0736 - accuracy: 0.9779 - val_loss: 0.0867 - val_accuracy: 0.9748
Epoch 5/5
540/540 - 7s - loss: 0.0582 - accuracy: 0.9823 - val_loss: 0.0716 - val_accuracy: 0.9787


<tensorflow.python.keras.callbacks.History at 0x141a7cd30>

## Exercises

1. The *width* (the hidden layer size) of the algorithm. Try a hidden layer size of 200. How does the validation accuracy of the model change? What about the time it took the algorithm to train? Can you find a hidden layer size that does better?

In [16]:
input_size = 784
output_size = 10
hidden_layer_size = 200

In [17]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [18]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [19]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 8s - loss: 0.2787 - accuracy: 0.9186 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 7s - loss: 0.1059 - accuracy: 0.9686 - val_loss: 0.1007 - val_accuracy: 0.9702
Epoch 3/5
540/540 - 7s - loss: 0.0696 - accuracy: 0.9789 - val_loss: 0.0696 - val_accuracy: 0.9790
Epoch 4/5
540/540 - 8s - loss: 0.0524 - accuracy: 0.9836 - val_loss: 0.0557 - val_accuracy: 0.9837
Epoch 5/5
540/540 - 7s - loss: 0.0401 - accuracy: 0.9874 - val_loss: 0.0483 - val_accuracy: 0.9858


<tensorflow.python.keras.callbacks.History at 0x13bb8fc50>

2. The *depth* of the algorithm. Add another hidden layer to the algorithm. This is an extremely important exercise! How does the validation accuracy change? What about the time it took the algorithm to train? Hint: Be careful with the shapes of the weights and the biases.

In [20]:
input_size = 784
output_size = 10
hidden_layer_size = 200

In [21]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [22]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [23]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 8s - loss: 0.2621 - accuracy: 0.9228 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 8s - loss: 0.1041 - accuracy: 0.9677 - val_loss: 0.1126 - val_accuracy: 0.9683
Epoch 3/5
540/540 - 9s - loss: 0.0695 - accuracy: 0.9789 - val_loss: 0.0748 - val_accuracy: 0.9763
Epoch 4/5
540/540 - 8s - loss: 0.0524 - accuracy: 0.9835 - val_loss: 0.0599 - val_accuracy: 0.9818
Epoch 5/5
540/540 - 8s - loss: 0.0432 - accuracy: 0.9864 - val_loss: 0.0433 - val_accuracy: 0.9852


<tensorflow.python.keras.callbacks.History at 0x137e16048>

3. The *width and depth* of the algorithm. Add as many additional layers as you need to reach 5 hidden layers. Moreover, adjust the width of the algorithm as you find suitable. How does the validation accuracy change? What about the time it took the algorithm to train?

In [24]:
input_size = 784
output_size = 10
hidden_layer_size = 1000

In [25]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [26]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [27]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 21s - loss: 0.2354 - accuracy: 0.9299 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 20s - loss: 0.1102 - accuracy: 0.9691 - val_loss: 0.1139 - val_accuracy: 0.9688
Epoch 3/5
540/540 - 20s - loss: 0.0793 - accuracy: 0.9779 - val_loss: 0.0688 - val_accuracy: 0.9807
Epoch 4/5
540/540 - 21s - loss: 0.0664 - accuracy: 0.9816 - val_loss: 0.1113 - val_accuracy: 0.9728
Epoch 5/5
540/540 - 21s - loss: 0.0549 - accuracy: 0.9846 - val_loss: 0.0626 - val_accuracy: 0.9822


<tensorflow.python.keras.callbacks.History at 0x1366ef748>

4. Fiddle with the activation functions. Try applying sigmoid transformation to both layers. The sigmoid activation is given by the string 'sigmoid'.

In [28]:
input_size = 784
output_size = 10
hidden_layer_size = 100

In [29]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='sigmoid'),
    tf.keras.layers.Dense(hidden_layer_size, activation='sigmoid'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [30]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [31]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 8s - loss: 0.7357 - accuracy: 0.8230 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 8s - loss: 0.2574 - accuracy: 0.9254 - val_loss: 0.2370 - val_accuracy: 0.9357
Epoch 3/5
540/540 - 8s - loss: 0.1952 - accuracy: 0.9430 - val_loss: 0.1990 - val_accuracy: 0.9475
Epoch 4/5
540/540 - 8s - loss: 0.1558 - accuracy: 0.9541 - val_loss: 0.1661 - val_accuracy: 0.9552
Epoch 5/5
540/540 - 8s - loss: 0.1304 - accuracy: 0.9615 - val_loss: 0.1417 - val_accuracy: 0.9612


<tensorflow.python.keras.callbacks.History at 0x138da93c8>

5. Fiddle with the activation functions. Try applying a ReLu to the first hidden layer and tanh to the second one. The tanh activation is given by the string 'tanh'.

In [32]:
input_size = 784
output_size = 10
hidden_layer_size = 100

In [33]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='tanh'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [34]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [35]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 8s - loss: 0.3195 - accuracy: 0.9093 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 8s - loss: 0.1304 - accuracy: 0.9610 - val_loss: 0.1221 - val_accuracy: 0.9657
Epoch 3/5
540/540 - 8s - loss: 0.0906 - accuracy: 0.9729 - val_loss: 0.1001 - val_accuracy: 0.9712
Epoch 4/5
540/540 - 8s - loss: 0.0688 - accuracy: 0.9790 - val_loss: 0.0702 - val_accuracy: 0.9798
Epoch 5/5
540/540 - 8s - loss: 0.0547 - accuracy: 0.9836 - val_loss: 0.0625 - val_accuracy: 0.9808


<tensorflow.python.keras.callbacks.History at 0x138d26710>

In [36]:
input_size = 784
output_size = 10
hidden_layer_size = 100

In [37]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [38]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [39]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 8s - loss: 0.3349 - accuracy: 0.9029 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 7s - loss: 0.1374 - accuracy: 0.9599 - val_loss: 0.1356 - val_accuracy: 0.9613
Epoch 3/5
540/540 - 7s - loss: 0.0983 - accuracy: 0.9707 - val_loss: 0.0982 - val_accuracy: 0.9723
Epoch 4/5
540/540 - 7s - loss: 0.0747 - accuracy: 0.9778 - val_loss: 0.0864 - val_accuracy: 0.9737
Epoch 5/5
540/540 - 7s - loss: 0.0604 - accuracy: 0.9811 - val_loss: 0.0701 - val_accuracy: 0.9797


<tensorflow.python.keras.callbacks.History at 0x139056908>

# Test the model

In [40]:
test_loss, test_accuracy = model.evaluate(test_data)

      1/Unknown - 1s 1s/step - loss: 0.0835 - accuracy: 0.9746

In [41]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.08. Test accuracy: 97.46%
