# Developing a custom training routine for training a NN on the Fashion MNIST data

## Importing libraries

In [1]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
from ipynb.fs.full.Useful_funcs import pre_model
from tqdm.notebook import trange
from collections import OrderedDict

## Loading and prerprocessing dataset

In [2]:
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

In [3]:
# Scaling the data and converting it to float32
x_train_full = x_train_full.astype(np.float32) / 255
x_valid, x_train = x_train_full[:5000], x_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
x_test = x_test.astype(np.float32) / 255

## Building the model

In [4]:
x_train.shape

(55000, 28, 28)

In [5]:
pre_model()

In [6]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape = [28, 28]))
model.add(keras.layers.Dense(100, activation = 'relu'))
model.add(keras.layers.Dense(10, activation = 'softmax'))

In [7]:
n_epochs = 5
batch_size = 32
n_steps = len(x_train) // batch_size
optimizer = keras.optimizers.Nadam(lr = 0.01)
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [8]:
def random_batch(x, y, batch_size = 32):
    ids = np.random.randint(len(x), size = batch_size) # Collecting random indexes
    return x[ids], y[ids]

In [9]:
with trange(1, n_epochs + 1, desc = 'All epochs') as epochs:
    for epoch in epochs:
        with trange(1, n_steps + 1, desc = 'Epoch {}/{}'.format(epoch, n_epochs)) as steps:
            for step in steps:
                x_batch, y_batch = random_batch(x_train, y_train)
                with tf.GradientTape() as tape:
                    y_pred = model(x_batch)
                    main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
                    loss = tf.add_n([main_loss] + model.losses)
                gradients = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                for variable in model.variables:
                    if variable.constraint is not None:
                        variable.assign(variable.constraint(variable))
                status = OrderedDict()
                mean_loss(loss)
                status['loss'] = mean_loss.result().numpy()
                for metric in metrics:
                    metric(y_batch, y_pred)
                    status[metric.name] = metric.result().numpy()
                steps.set_postfix(status)
            y_pred = model(x_valid)
            status['val_loss'] = np.mean(loss_fn(y_valid, y_pred))
            status['val_accuracy'] = np.mean(keras.metrics.sparse_categorical_accuracy(tf.constant(y_valid, dtype = np.float32), y_pred))
            steps.set_postfix(status)
        for metric in [mean_loss] + metrics:
            metric.reset_states()

HBox(children=(FloatProgress(value=0.0, description='All epochs', max=5.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='Epoch 1/5', max=1718.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 2/5', max=1718.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 3/5', max=1718.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 4/5', max=1718.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 5/5', max=1718.0, style=ProgressStyle(description_w…





- Now we will be using a different optimizer with a different learning rate for the upper and lower layers.

In [10]:
pre_model()

In [11]:
lower_model = keras.models.Sequential()
lower_model.add(keras.layers.Flatten(input_shape = [28, 28]))
lower_model.add(keras.layers.Dense(100, activation = 'relu'))

In [12]:
upper_model = keras.models.Sequential()
upper_model.add(keras.layers.Dense(10, activation = 'softmax'))

In [13]:
model = keras.models.Sequential()
model.add(lower_model)
model.add(upper_model)

In [14]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential (Sequential)      (None, 100)               78500     
_________________________________________________________________
sequential_1 (Sequential)    (None, 10)                1010      
Total params: 79,510
Trainable params: 79,510
Non-trainable params: 0
_________________________________________________________________


In [15]:
lower_optimizer = keras.optimizers.SGD(lr = 1e-4)
upper_optimizer = keras.optimizers.Nadam(lr = 1e-3)

In [17]:
n_epochs = 5
batch_size = 32
n_steps = len(x_train) // batch_size
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [19]:
with trange(1, n_epochs + 1, desc = 'All epochs') as epochs:
    for epoch in epochs:
        with trange(1, n_steps + 1, desc = 'Epoch {}/{}'.format(epoch, n_epochs)) as steps:
            for step in steps:
                x_batch, y_batch = random_batch(x_train, y_train)
                with tf.GradientTape(persistent = True) as tape:
                    y_pred = model(x_batch)
                    main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
                    loss = tf.add_n([main_loss] + model.losses)
                for layers, optimizer in ((lower_model, lower_optimizer), (upper_model, upper_optimizer)):
                    gradients = tape.gradient(loss, model.trainable_variables)
                    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
                del tape
                for variable in model.variables:
                    if variable.constraint is not None:
                        variable.assign(variable.constraint(variable))
                status = OrderedDict()
                mean_loss(loss)
                status['loss'] = mean_loss.result().numpy()
                for metric in metrics:
                    metric(y_batch, y_pred)
                    status[metric.name] = metric.result().numpy()
                steps.set_postfix(status)
            y_pred = model(x_valid)
            status['val_loss'] = np.mean(loss_fn(y_valid, y_pred))
            status['val_accuracy'] = np.mean(keras.metrics.sparse_categorical_accuracy(tf.constant(y_valid, dtype = np.float32), y_pred))
            steps.set_postfix(status)
        for metric in [mean_loss] + metrics:
            metric.reset_states()

HBox(children=(FloatProgress(value=0.0, description='All epochs', max=5.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='Epoch 1/5', max=1718.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 2/5', max=1718.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 3/5', max=1718.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 4/5', max=1718.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Epoch 5/5', max=1718.0, style=ProgressStyle(description_w…



