## Train a model using a custom training loop to tackle the Fashion MNIST dataset
### Geron 2nd edition, chapter 12, exercise 13

In [3]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

In [4]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

In [5]:
X_valid, X_train = X_train_full[:5000] / 255.0, X_train_full[5000:] / 255.0
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test / 255.0

In [6]:
l2_reg = keras.regularizers.l2(0.05)
model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=[28, 28]),
        keras.layers.Dense(300, activation="relu", kernel_initializer="he_normal", kernel_regularizer=l2_reg),
        keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal", kernel_regularizer=l2_reg),
        keras.layers.Dense(10, activation="softmax")
     ])
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
dense_1 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1010      
Total params: 266,610
Trainable params: 266,610
Non-trainable params: 0
_________________________________________________________________


In [7]:
def random_batch(X, y, batch_size=32):
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

In [8]:
def print_status_bar(iteration, total, loss, metrics=None):
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result())
                         for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    print("\r{}/{} - ".format(iteration, total) + metrics,
          end=end)

In [14]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = keras.optimizers.SGD
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

### Custom Training Loop starts here

In [12]:
for epoch in range(1, n_epochs + 1):
    print("Epoch {}/{}".format(epoch, n_epochs))
    for step in range(1, n_steps + 1):
        X_batch, y_batch = random_batch(X_train, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch, training=True)
            main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
            loss = tf.add_n([main_loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        mean_loss(loss)
        for metric in metrics:
            metric(y_batch, y_pred)
        print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)
    print_status_bar(len(y_train), len(y_train), mean_loss, metrics)
    for metric in [mean_loss] + metrics:
        metric.reset_states()


Epoch 1/10
55000/55000 - mean: 1.7295 - sparse_categorical_accuracy: 0.5807
Epoch 2/10
55000/55000 - mean: 1.2150 - sparse_categorical_accuracy: 0.6606
Epoch 3/10
55000/55000 - mean: 1.0805 - sparse_categorical_accuracy: 0.6818
Epoch 4/10
55000/55000 - mean: 0.9912 - sparse_categorical_accuracy: 0.7109
Epoch 5/10
55000/55000 - mean: 0.9597 - sparse_categorical_accuracy: 0.7189
Epoch 6/10
55000/55000 - mean: 0.9348 - sparse_categorical_accuracy: 0.7268
Epoch 7/10
55000/55000 - mean: 0.9188 - sparse_categorical_accuracy: 0.7313
Epoch 8/10
55000/55000 - mean: 0.9061 - sparse_categorical_accuracy: 0.7331
Epoch 9/10
55000/55000 - mean: 0.8938 - sparse_categorical_accuracy: 0.7349
Epoch 10/10
55000/55000 - mean: 0.8916 - sparse_categorical_accuracy: 0.7389
