In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

Exercise 13.

In [2]:
from tensorflow.keras.datasets import fashion_mnist

(x_train_full, y_train_full), (x_test, y_teset) = fashion_mnist.load_data()

x_train_full = x_train_full.astype(np.float32) / 255.
x_test = x_test.astype(np.float32) / 255.

x_train, x_valid = x_train_full[5000:], x_train_full[:5000]
y_train, y_valid = y_train_full[5000:], y_train_full[:5000]

a.

In [3]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(10, activation='softmax'),
])

In [4]:
def random_batch(x, y, batch_size=32):
    idx = np.random.randint(len(x), size=batch_size)
    return x[idx], y[idx]


def progress_bar(iteration, total, size=30):
    running = iteration < total
    c = '>' if running else '='
    p = (size - 1) * iteration // total
    fmt = '{{:-{}d}}/{{}} [{{}}]'.format(len(str(total)))
    params = [iteration, total, '=' * p + c + '.' * (size - p - 1)]
    return fmt.format(*params)


def print_status_bar(iteration, total, loss, metrics=None, size=30):
    metrics = ' - '.join(["{}: {:.4f}".format(m.name, m.result()) 
                         for m in [loss] + (metrics or [])])
    end = '' if iteration < total else '\n'
    print('\r{} - {}'.format(progress_bar(iteration, total), metrics), end=end)

In [5]:
n_epochs = 5
batch_size = 32
n_steps = len(x_train) // batch_size
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [6]:
for epoch in range(1, n_epochs + 1):
    print('Epoch {}/{}'.format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        x_batch, y_batch = random_batch(x_train, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(x_batch)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        for variable in model.variables:
            if variable.constraint is not None:
                variable.assign(variable.constraint(variable))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    y_pred = model(x_valid)
    val_loss = np.mean(loss_fn(y_valid, y_pred))
    val_acc = np.mean(keras.metrics.sparse_categorical_accuracy(
        tf.constant(y_valid, dtype=np.float32), y_pred))
    print(f'\t\t\t\t\t\tval_loss: {val_loss:.4f} - val_accuracy: {val_acc:.4f}')
    for metric in [mean_loss] + metrics:
        metric.reset_states()

Epoch 1/5
						val_loss: 0.4200 - val_accuracy: 0.8516
Epoch 2/5
						val_loss: 0.3973 - val_accuracy: 0.8596
Epoch 3/5
						val_loss: 0.4346 - val_accuracy: 0.8558
Epoch 4/5
						val_loss: 0.4037 - val_accuracy: 0.8654
Epoch 5/5
						val_loss: 0.4282 - val_accuracy: 0.8548


In [7]:
try:
    from tqdm.notebook import trange
    from collections import OrderedDict
    with trange(1, n_epochs + 1, desc='All epochs') as epochs:
        for epoch in epochs:
            with trange(1, n_steps + 1, desc='Epoch {}/{}'.format(epoch, n_epochs)) as steps:
                for step in steps:
                    x_batch, y_batch = random_batch(x_train, y_train)
                    with tf.GradientTape() as tape:
                        y_pred = model(x_batch)
                        main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
                        loss = tf.add_n([main_loss] + model.losses)
                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                    for variable in model.variables:
                        if variable.constraint is not None:
                            variable.assign(variable.constraint(variable))
                    status = OrderedDict()
                    mean_loss(loss)
                    status['loss'] = mean_loss.result().numpy()
                    for metric in metrics:
                        metric(y_batch, y_pred)
                        status[metric.name] = metric.result().numpy()
                    steps.set_postfix(status)
                y_pred = model(x_valid)
                status['val_loss'] = np.mean(loss_fn(y_valid, y_pred))
                status['val_accuracy'] = np.mean(keras.metrics.sparse_categorical_accuracy(
                    tf.constant(y_valid, dtype=np.float32), y_pred))
                steps.set_postfix(status)
            for metric in [mean_loss] + metrics:
                metric.reset_states()
except ImportError as ex:
    print('To run this cell, please install tqdm, ipywidgets and restart jupyter')

All epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/1718 [00:00<?, ?it/s]

b.

In [8]:
lower_layers = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation='relu'),
])

upper_layers = keras.models.Sequential([
    keras.layers.Dense(10, activation='softmax'),
])

model = keras.models.Sequential([
    lower_layers,
    upper_layers,
])

In [9]:
lower_optimizer = keras.optimizers.SGD(learning_rate=1e-4)
upper_optimizer = keras.optimizers.Nadam(learning_rate=1e-3)

In [10]:
for epoch in range(1, n_epochs + 1):
    print('Epoch {}/{}'.format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        x_batch, y_batch = random_batch(x_train, y_train)
        with tf.GradientTape(persistent=True) as tape:
            y_pred = model(x_batch)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)
        for layers, optimizer in ((lower_layers, lower_optimizer), 
                                  (upper_layers, upper_optimizer)):
            gradients = tape.gradient(loss, layers.trainable_variables)
            optimizer.apply_gradients(zip(gradients, layers.trainable_variables))
        del tape
        for variable in model.variables:
            if variable.constraint is not None:
                variable.assign(variable.constraint(variable))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    y_pred = model(x_valid)
    val_loss = np.mean(loss_fn(y_valid, y_pred))
    val_acc = np.mean(keras.metrics.sparse_categorical_accuracy(
        tf.constant(y_valid, dtype=np.float32), y_pred))
    print(f'\t\t\t\t\t\tval_loss: {val_loss:.4f} - val_accuracy: {val_acc:.4f}')
    for metric in [mean_loss] + metrics:
        metric.reset_states()

Epoch 1/5
						val_loss: 0.6891 - val_accuracy: 0.7828
Epoch 2/5
						val_loss: 0.5901 - val_accuracy: 0.8016
Epoch 3/5
						val_loss: 0.5508 - val_accuracy: 0.8144
Epoch 4/5
						val_loss: 0.5264 - val_accuracy: 0.8204
Epoch 5/5
						val_loss: 0.5117 - val_accuracy: 0.8254


In [11]:
try:
    from tqdm.notebook import trange
    from collections import OrderedDict
    with trange(1, n_epochs + 1, desc='All epochs') as epochs:
        for epoch in epochs:
            with trange(1, n_steps + 1, desc='Epoch {}/{}'.format(epoch, n_epochs)) as steps:
                for step in steps:
                    x_batch, y_batch = random_batch(x_train, y_train)
                    with tf.GradientTape(persistent=True) as tape:
                        y_pred = model(x_batch)
                        main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
                        loss = tf.add_n([main_loss] + model.losses)
                    for layers, optimizer in ((lower_layers, lower_optimizer), 
                                              (upper_layers, upper_optimizer)):
                        gradients = tape.gradient(loss, layers.trainable_variables)
                        optimizer.apply_gradients(zip(gradients, layers.trainable_variables))
                    del tape
                    for variable in model.variables:
                        if variable.constraint is not None:
                            variable.assign(variable.constraint(variable))
                    status = OrderedDict()
                    mean_loss(loss)
                    status['loss'] = mean_loss.result().numpy()
                    for metric in metrics:
                        metric(y_batch, y_pred)
                        status[metric.name] = metric.result().numpy()
                    steps.set_postfix(status)
                y_pred = model(x_valid)
                status['val_loss'] = np.mean(loss_fn(y_valid, y_pred))
                status['val_accuracy'] = np.mean(keras.metrics.sparse_categorical_accuracy(
                    tf.constant(y_valid, dtype=np.float32), y_pred))
                steps.set_postfix(status)
            for metric in [mean_loss] + metrics:
                metric.reset_states()
except ImportError as ex:
    print('To run this cell, please install tqdm, ipywidgets and restart jupyter')

All epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/1718 [00:00<?, ?it/s]