In [1]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds

In [2]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [3]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

In [4]:
num_validtion_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validtion_samples = tf.cast(num_validtion_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [5]:
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

In [6]:
BUFFER_SIZE = 10000
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)
validation_data = shuffled_train_and_validation_data.take(num_validtion_samples)
train_data = shuffled_train_and_validation_data.skip(num_validtion_samples)

In [7]:
BATCH_SIZE = 100
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validtion_samples)
test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))
# iter to make validation data iterable, next load the next batch. since there is only one batch it will load inputs and targets

In [8]:
input_size = 784
output_size = 10
hidden_layer_size = 50

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [10]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [11]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 7s - loss: 0.4119 - accuracy: 0.8814 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 8s - loss: 0.1849 - accuracy: 0.9453 - val_loss: 0.1788 - val_accuracy: 0.9497
Epoch 3/5
540/540 - 7s - loss: 0.1422 - accuracy: 0.9575 - val_loss: 0.1470 - val_accuracy: 0.9545
Epoch 4/5
540/540 - 7s - loss: 0.1183 - accuracy: 0.9640 - val_loss: 0.1170 - val_accuracy: 0.9645
Epoch 5/5
540/540 - 7s - loss: 0.0979 - accuracy: 0.9701 - val_loss: 0.1048 - val_accuracy: 0.9677


<tensorflow.python.keras.callbacks.History at 0x144870d30>

In [12]:
input_size = 784
output_size = 10
hidden_layer_size = 100

In [13]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [14]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [15]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 7s - loss: 0.3320 - accuracy: 0.9073 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 7s - loss: 0.1338 - accuracy: 0.9610 - val_loss: 0.1227 - val_accuracy: 0.9653
Epoch 3/5
540/540 - 7s - loss: 0.0936 - accuracy: 0.9721 - val_loss: 0.0919 - val_accuracy: 0.9743
Epoch 4/5
540/540 - 8s - loss: 0.0718 - accuracy: 0.9783 - val_loss: 0.0706 - val_accuracy: 0.9797
Epoch 5/5
540/540 - 7s - loss: 0.0580 - accuracy: 0.9821 - val_loss: 0.0588 - val_accuracy: 0.9810


<tensorflow.python.keras.callbacks.History at 0x143355048>

## Exercises

1. The *width* (the hidden layer size) of the algorithm. Try a hidden layer size of 200. How does the validation accuracy of the model change? What about the time it took the algorithm to train? Can you find a hidden layer size that does better?

In [17]:
input_size = 784
output_size = 10
hidden_layer_size = 200

In [18]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [19]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [20]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 8s - loss: 0.2729 - accuracy: 0.9209 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 7s - loss: 0.1065 - accuracy: 0.9680 - val_loss: 0.0899 - val_accuracy: 0.9730
Epoch 3/5
540/540 - 7s - loss: 0.0700 - accuracy: 0.9786 - val_loss: 0.0685 - val_accuracy: 0.9788
Epoch 4/5
540/540 - 7s - loss: 0.0515 - accuracy: 0.9840 - val_loss: 0.0567 - val_accuracy: 0.9818
Epoch 5/5
540/540 - 7s - loss: 0.0400 - accuracy: 0.9872 - val_loss: 0.0473 - val_accuracy: 0.9853


<tensorflow.python.keras.callbacks.History at 0x1433b25f8>

2. The *depth* of the algorithm. Add another hidden layer to the algorithm. This is an extremely important exercise! How does the validation accuracy change? What about the time it took the algorithm to train? Hint: Be careful with the shapes of the weights and the biases.

In [21]:
input_size = 784
output_size = 10
hidden_layer_size = 200

In [23]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [24]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [25]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 8s - loss: 0.2617 - accuracy: 0.9230 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 7s - loss: 0.1016 - accuracy: 0.9686 - val_loss: 0.1001 - val_accuracy: 0.9690
Epoch 3/5
540/540 - 8s - loss: 0.0716 - accuracy: 0.9771 - val_loss: 0.0731 - val_accuracy: 0.9758
Epoch 4/5
540/540 - 7s - loss: 0.0533 - accuracy: 0.9829 - val_loss: 0.0644 - val_accuracy: 0.9810
Epoch 5/5
540/540 - 7s - loss: 0.0420 - accuracy: 0.9864 - val_loss: 0.0520 - val_accuracy: 0.9848


<tensorflow.python.keras.callbacks.History at 0x14480ec50>

3. The *width and depth* of the algorithm. Add as many additional layers as you need to reach 5 hidden layers. Moreover, adjust the width of the algorithm as you find suitable. How does the validation accuracy change? What about the time it took the algorithm to train?

In [26]:
input_size = 784
output_size = 10
hidden_layer_size = 1000

In [27]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [28]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [29]:
NUM_EPOCHS = 5

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 17s - loss: 0.2353 - accuracy: 0.9292 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 18s - loss: 0.1082 - accuracy: 0.9694 - val_loss: 0.0901 - val_accuracy: 0.9748
Epoch 3/5
540/540 - 19s - loss: 0.0803 - accuracy: 0.9768 - val_loss: 0.0942 - val_accuracy: 0.9750
Epoch 4/5
540/540 - 18s - loss: 0.0644 - accuracy: 0.9817 - val_loss: 0.0605 - val_accuracy: 0.9828
Epoch 5/5
540/540 - 18s - loss: 0.0558 - accuracy: 0.9844 - val_loss: 0.0510 - val_accuracy: 0.9848


<tensorflow.python.keras.callbacks.History at 0x13d06bc88>

4. Fiddle with the activation functions. Try applying sigmoid transformation to both layers. The sigmoid activation is given by the string 'sigmoid'.

In [30]:
input_size = 784
output_size = 10
hidden_layer_size = 100

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])