## Description

##### Implementation of "Deep Residual Learning for Image Recognition" paper - https://ieeexplore.ieee.org/document/7780459

##### Training of residual network imagenet

### Libraries

In [1]:
import tensorflow as tf
import numpy as np
import typing

In [2]:
from ipynb.fs.full.residual_learning_imagenet import ResidualImageNet

In [3]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

### Training Parameters

In [4]:
EPOCHS = 10 # for the sake of dummy training, set to 50, paper mentions 60e^4!

In [5]:
TRAIN_PORTN = 0.8 # portion of train data size

In [6]:
NUM_CLASSES = 1000

In [7]:
NUM_SAMPLES = 100

In [8]:
BATCH_SIZE = 10

### Training Functions

In [9]:
def loss_fun(y_true, y_pred):
    return tf.keras.losses.CategoricalCrossentropy()(y_true, y_pred)

In [10]:
# NOTE: need to upgrade to tf 2.11 for weight decay parameter
def sgd_optimizer(learning_rate):
    return tf.keras.optimizers.SGD(
        learning_rate=learning_rate,
        momentum=0.9,
        name='SGD'
)

In [11]:
def train_step(model, optimizer, x_batch, y_batch):
    """
        Training step for each training dataset batch
    """
    with tf.GradientTape() as tape:
        y_pred = model(x_in=x_batch)
        loss_value = loss_fun(y_true=y_batch, y_pred=y_pred)
    
    grads = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss_value

In [12]:
def test_step(model, x_batch, y_batch):
    """
        Test step for each test dataset batch
    """
    y_pred = model(x_in=x_batch)
    return loss_fun(y_true=y_batch, y_pred=y_pred)

In [13]:
def test_seq_desc_order(seq):
    return all(earlier >= later for earlier, later in zip(seq, seq[1:]))

In [14]:
def all_same(seq):
    return len(set(seq)) == 1

In [15]:
def train_model(model, learning_rate: float, lr_factor: int, nr_epochs_for_lr_change: int):

    mean_train_epoch_losses = []
    
    for epoch in range(EPOCHS):
        print(f"working on epoch: {epoch}...")
        losses_train = []
        for train_idx, (x_batch_train, y_batch_train) in enumerate(zip(train_data_x, train_data_y)):
            optimizer = sgd_optimizer(learning_rate=learning_rate)
            loss_train_value = train_step(model=model, optimizer=optimizer, x_batch=x_batch_train, y_batch=y_batch_train)
            losses_train.append(loss_train_value.numpy())
            print(f"Training step is {train_idx}")

            if train_idx % 100 == 0:
                print(f"Loss at step {train_idx} is {loss_train_value.numpy():.2f}.")
        
        mean_train_epoch_losses.append(np.mean(losses_train))
        print(f"Finished epoch training nr: {epoch}. Running test inference...")
        
        if len(mean_train_epoch_losses) > nr_epochs_for_lr_change:
            # get last three results
            print(f"last {nr_epochs_for_lr_change} mean epoch losses train results are: {mean_train_epoch_losses[-nr_epochs_for_lr_change:]}")
            if all_same(seq=mean_train_epoch_losses[-nr_epochs_for_lr_change:]): # check if there has been no decrease for more than nr_epochs_for_lr_change epochs
                learning_rate = learning_rate*lr_factor # reduce by learning factor
                print(f"updated learning rate is: {learning_rate}")

        losses_test = []
        for test_idx, (x_batch_test, y_batch_test) in enumerate(zip(test_data_x, test_data_y)):
            loss_test_value = test_step(model=model, x_batch=x_batch_test, y_batch=y_batch_test)
            losses_test.append(loss_test_value.numpy())
            print(f"Test step is {test_idx}")

        print(f"Average (mean) epoch test set loss: {np.mean(losses_test):.2f}.")

### Residual Image Net Dummy Training

#### Create dummy data

In [16]:
input_shape_x = (NUM_SAMPLES, 500, 500, 3)
input_shape_y = (NUM_SAMPLES, NUM_CLASSES)

data_x = tf.random.normal(input_shape_x)
data_x = tf.constant(data_x, dtype=tf.float32)

data_y = tf.one_hot(np.arange(1, NUM_SAMPLES+1, 1) , NUM_CLASSES, dtype=tf.int32) # dummy/random 1-hot encoded class outputs
data_y = tf.constant(data_y, dtype=tf.int32)

#### Create train and test sets

In [17]:
train_size = int(input_shape_x[0]*TRAIN_PORTN)

train_data_x = tf.data.Dataset.from_tensor_slices(data_x[:train_size])
train_data_x = train_data_x.batch(BATCH_SIZE)

train_data_y = tf.data.Dataset.from_tensor_slices(data_y[:train_size])
train_data_y = train_data_y.batch(BATCH_SIZE)

test_data_x = tf.data.Dataset.from_tensor_slices(data_x[train_size:])
test_data_x = test_data_x.batch(BATCH_SIZE)

test_data_y = tf.data.Dataset.from_tensor_slices(data_y[train_size:])
test_data_y = test_data_y.batch(BATCH_SIZE)

#### Build and train model

In [18]:
residual_image_net = ResidualImageNet(num_classes=NUM_CLASSES)

In [19]:
train_model(model=residual_image_net, learning_rate=0.1, lr_factor=0.1, nr_epochs_for_lr_change=3)

working on epoch: 0...
Training step is 0
Loss at step 0 is 15.32.
Training step is 1
Training step is 2
Training step is 3
Training step is 4
Training step is 5
Training step is 6
Training step is 7
Finished epoch training nr: 0. Running test inference...
Test step is 0
Test step is 1
Average (mean) epoch test set loss: 16.12.
working on epoch: 1...
Training step is 0
Loss at step 0 is 14.51.
Training step is 1
Training step is 2
Training step is 3
Training step is 4
Training step is 5
Training step is 6
Training step is 7
Finished epoch training nr: 1. Running test inference...
Test step is 0
Test step is 1
Average (mean) epoch test set loss: 16.12.
working on epoch: 2...
Training step is 0
Loss at step 0 is 14.51.
Training step is 1
Training step is 2
Training step is 3
Training step is 4
Training step is 5
Training step is 6
Training step is 7
Finished epoch training nr: 2. Running test inference...
Test step is 0
Test step is 1
Average (mean) epoch test set loss: 16.12.
working on