# Transfer Learning with TensorFlow Part 2

Fine-tuning within transfer learning.

In [None]:
# Check gpu
!nvidia-smi

In [None]:
### Helper Functions

from _helper_functions import create_tensorboard_callback, plot_loss_curves, unzip_data, walk_through_dir


## Get some data

link: https://www.tensorflow.org/api_docs/python/tf/keras/applications

In [None]:
# Get 10% of training data of 10 classes of food 101 data.
# Download data
!wget -nc -P ../Downloads/ https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_10_percent.zip

# Unzip
unzip_data('../Downloads/10_food_classes_10_percent.zip', '../Downloads')

# Check number of images and subdirectories in the dataset
walk_through_dir('../Downloads/10_food_classes_10_percent')

In [None]:
import tensorflow as tf
# Create training and test directory paths
train_dir = '../Downloads/10_food_classes_10_percent/train'
test_dir = '../Downloads/10_food_classes_10_percent/test'


IMG_SIZE = (224, 224)
BATCH_SIZE = 32
train_data_10_percent = tf.keras.preprocessing.image_dataset_from_directory(directory=train_dir,
                                                                            batch_size=BATCH_SIZE,
                                                                            image_size=IMG_SIZE,
                                                                            label_mode='categorical')
                                                                            
test_data = tf.keras.preprocessing.image_dataset_from_directory(directory=test_dir,
                                                                            batch_size=BATCH_SIZE,
                                                                            image_size=IMG_SIZE,
                                                                            label_mode='categorical')

In [None]:
print(train_data_10_percent.class_names)
# see a batch of data
for images, labels in train_data_10_percent.take(1):
    print(images, labels)

## Creating a model with the Functional API

In [None]:
from tensorflow.keras import layers, Model, applications
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy

# Creating a model with the Functional API
base_model = applications.EfficientNetB0(include_top=False)

# Freeze the base model (underlying pre-trained patterns aren't updated during training)
base_model.trainable = False

# Create inputs into our model
inputs = layers.Input(shape=(224, 224, 3), name="input_layer")

# Optional param: If using ResNet50V2 you will need to normalize inputs
# Not necessary for EfficientNet(s) it has rescaling built in if coming from applications
# x = layers.experimental.preprocessing.Rescaling(1./255)(inputs)

# Pass the inputs
x = base_model(inputs)
print(f'shape after passing inputs through base model: {x.shape}')

# Average pool the outputs of the base model 
# (aggregate all the most important pieces of information, reduce number of computations)
x = layers.GlobalAveragePooling2D(name="global_avg_pooling_layer")(x)
print(f'shape after GlobalAveragePooling2D: {x.shape}')

# Create the output activation layer
outputs = layers.Dense(10, activation='softmax', name="output_layer")(x)

# Combine inputs and outputs into a model
model_0 = Model(inputs=inputs, outputs=outputs, name="model_0")

# Commpile
model_0.compile(optimizer=Adam(),
                loss=CategoricalCrossentropy(),
                metrics=['accuracy'])

model_0_history = model_0.fit(train_data_10_percent,
                            epochs=5,
                            steps_per_epoch=len(train_data_10_percent),
                            validation_data=test_data,
                            validation_steps=int(0.25 * len(test_data)),
                            callbacks=create_tensorboard_callback(dir_name="../tensorflow_hub",
                                                                experiment_name="10_percent_feature_extraction")
                            )

In [None]:
# Evaluate on the full test dataset
model_0.evaluate(test_data)

In [None]:
# Check the layers in the base model
for layers_number, layer in enumerate(base_model.layers):
    print(f'{layers_number}: {layer.name}')

In [None]:
# print summary of the base model
base_model.summary()

In [None]:
# how about a summary of our whole model?
model_0.summary()

In [None]:
# Check out the models training curves
plot_loss_curves(model_0_history)

## Getting a feature vector from a trained model

Demonstrate the Global Average Pooling 2D layer:

We have tensor after our model goes through `base_model` of shape (None, 7, 7, 1280).

Then when it passes through `GlobalAveragePooling2D`, it turns into (None, 1280).

Let's use a similar shaped tensor of (1, 4, 4, 3) and then pass it to `GlobalAveragePooling2D`.


In [None]:
# Define the input shape
input_shape = (1, 4, 4, 3)

# Create a random tensor
tf.random.set_seed(42)
input_tensor = tf.random.normal(input_shape)
print(f'Random input tensor:\n {input_tensor}\n')

# Pass the random tensor through a global average pooling 2D layer
global_avg_pooled_tensor = layers.GlobalAveragePooling2D()(input_tensor)
print(f'Global average pooling 2D layer:\n {global_avg_pooled_tensor}\n')

# Check the shape of the different tensors
print(f'Shape of input tensor: {input_tensor.shape}')
print(f'Shape of global average pooled tensor: {global_avg_pooled_tensor.shape}')

In [None]:
# Replicate the Global Average Pooling 2D layer
tf.reduce_mean(input_tensor, axis=[1, 2])

In [None]:
# Pass the random tensor through a global max pooling 2D layer
global_max_pooled_tensor = layers.GlobalMaxPooling2D()(input_tensor)
print(f'Global max pooling 2D layer:\n {global_max_pooled_tensor}\n')
print(f'Shape of global max pooled tensor: {global_max_pooled_tensor.shape}')

## Running a series of transfer learning experiments

How does transfer learning work with 1% of the training data?

0. `model_0` - baseline model

1. `model_1` - use feature extraction transfer learning with 1% of the training data with data augmentation
2. `model_2` - use feature extraction transfer learning with 10% of the training data with data augmentation
3. `model_3` - use fine-tuning transfer learning with 10% of the training data with data augmentation
4. `model_4` - use fine-tuning transfer learning with 100% of the training data with data augmentation

**Note:** Throughout all experiments, the same test dataset will be used to evaluate our model.  This ensures consistency aross evaluation metrics.

#### Getting & preprocessing data for 1%

In [None]:
# Download and unzip data - preprocessed from Food101
!wget -nc -P ../Downloads/ https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_1_percent.zip
# Unzip
unzip_data('../Downloads/10_food_classes_1_percent.zip', '../Downloads')

# Check number of images and subdirectories in the dataset
walk_through_dir('../Downloads/10_food_classes_1_percent')

In [None]:
# Create training and test directory paths
train_dir_1_percent = '../Downloads/10_food_classes_1_percent/train'
test_dir_1_percent = '../Downloads/10_food_classes_1_percent/test'

# Setup dataloaders
train_data_1_percent = tf.keras.preprocessing.image_dataset_from_directory(directory=train_dir_1_percent,
                                                                            batch_size=BATCH_SIZE,
                                                                            image_size=IMG_SIZE,
                                                                            label_mode='categorical')
                                                                            
test_data_1_percent = tf.keras.preprocessing.image_dataset_from_directory(directory=test_dir_1_percent,
                                                                            batch_size=BATCH_SIZE,
                                                                            image_size=IMG_SIZE,
                                                                            label_mode='categorical')

#### Adding data augmentation

To add data augmentation into a model, we can use the layers inside:

* `tf.keras.layers.experimental.preprocessing()`

When passed as a layer to a model, data augmentation is automatically turned on during training but turned off during inference (does not augment testing data or new unseen data)

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers.experimental import preprocessing

# Create data augmentation stage with horizontal flipping, rotations, zooms, etc.

with tf.device('/cpu:0'):
    data_augmentation = Sequential([
        preprocessing.RandomFlip('horizontal'),
        preprocessing.RandomRotation(0.2),
        preprocessing.RandomZoom(0.2),
        preprocessing.RandomHeight(0.2),
        preprocessing.RandomWidth(0.2),
        # preprocessing.Rescale(1./255) # Keep for models like ResNet50V2 but EfficientNet's have rescaling built-in
    ], name="data_augmentation")

#### Visualize the data augmentation layer
This is because of a recent update to how augmentation layers work in TensorFlow 2.8.

A fix should be on the way from the TensorFlow team but for now, one way to fix it is to make sure the parameter training=True is passed to a data augmentation model.

This is because data augmentation is only intended to work during training and not testing.

Code before
This code appears at 5:46 in the next video.

augmented_img = data_augmentation(img)

Doing this would result in images sometimes not being augmented (changed).

Code after the fix
augmented_img = data_augmentation(img, training=True)

In [None]:
# View a random image and compare it to its augmented version

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import random

target_class = random.choice(train_data_1_percent.class_names)
target_dir = '../Downloads/10_food_classes_1_percent/train/' + target_class
# print(os.listdir())
random_image = random.choice(os.listdir(target_dir))
random_image_path = os.path.join(target_dir, random_image)

# Read in random image
img = mpimg.imread(random_image_path)
plt.imshow(img)
plt.title(f'Original random image | Class: {target_class}')
plt.axis(False)

# Plot our augmented random image
augmented_img = data_augmentation(tf.expand_dims(img, axis=0), training=True)
plt.figure()
plt.imshow(tf.squeeze(augmented_img)/255.)
plt.title(f'Augmented random image | Class: {target_class}')
plt.axis(False)



### Model 1 - Feature extraction 1% data augmentation
#### Feature extraction transfer learning on 1% of the data with augmentation

In [None]:
# Setup input shape and base model, freeze base model layers

input_shape = (IMG_SIZE + (3,))
base_model = tf.keras.applications.EfficientNetB0(include_top=False)
base_model.trainable = False

# Create input layer
inputs = layers.Input(shape=input_shape, name="input_layer")

# Add data augmentation Sequential model as a layer
x = data_augmentation(inputs) # Augment our training images (augmentation doesn't occur on test data)

# Give base_model the inputs (after augmentation) and don't train it
x = base_model(x, training=False) 
# Pass augmented images through base_model but keep it in inference mode, 
# this insures batchnorm layers don't update

# Pool output features of the base_model
x = layers.GlobalAveragePooling2D(name="global_average_pooling_layer")(x)

# Put a dense layer on as the output
outputs = layers.Dense(10, activation='softmax', name="output_layer")(x)

# Make a model using the inputs and outputs
model_1 = tf.keras.Model(inputs, outputs, name="model_1")

# Compile the model
model_1.compile(loss=CategoricalCrossentropy(),
                optimizer=Adam(),
                metrics=['accuracy'])

# Fit the model
history_model_1 = model_1.fit(train_data_1_percent,
                                epochs=5,
                                steps_per_epoch=len(train_data_1_percent),
                                validation_data=test_data, 
                                validation_steps=int(0.25 * len(test_data)),
                                # Track model training logs
                                callbacks=[create_tensorboard_callback(dir_name="../tensorflow_hub",
                                                                experiment_name="1_percent_data_aug")]
                                )


#### Evaluate and check loss curves

In [None]:
# Check out the model's summary
model_1.summary()

In [None]:
# Evaluate on the full test dataset
results_1_percent_data_aug = model_1.evaluate(test_data, steps=len(test_data))
results_1_percent_data_aug

In [None]:
plot_loss_curves(history_model_1)

### Model 2 - Feature extraction 10% data augmentation
#### Feature extraction transfer learning on 10% of the data with augmentation

In [None]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model, applications
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.layers import Dense, Flatten, Conv2D, GlobalAveragePooling2D, Input
# Same as model 1 but with 10% of data
IMG_SIZE = (224, 224)
input_shape = (IMG_SIZE + (3,))
base_model = tf.keras.applications.EfficientNetB0(include_top=False)
base_model.trainable = False

# Create input layer
inputs = Input(shape=input_shape, name="input_layer")

# Add data augmentation Sequential model as a layer
x = data_augmentation(inputs) # Augment our training images (augmentation doesn't occur on test data)

# Give base_model the inputs (after augmentation) and don't train it
x = base_model(x, training=False) 
# Pass augmented images through base_model but keep it in inference mode, 
# this insures batchnorm layers don't update

# Pool output features of the base_model
x = GlobalAveragePooling2D(name="global_average_pooling_layer")(x)

# Put a dense layer on as the output
outputs = Dense(10, activation='softmax', name="output_layer")(x)


# Make a model using the inputs and outputs
model_2 = tf.keras.Model(inputs, outputs, name="model_2")

# Compile the model
model_2.compile(loss=CategoricalCrossentropy(),
                optimizer=Adam(),
                metrics=['accuracy'])

In [None]:
model_2.summary()

#### ModelCheckpoint callback

**Model Checkpointing:** Save your model as it trains so you can stop training if needed and come back to continue where you left off.  Helpful if training takes a long time and can't be done in one sitting.

`tf.keras.callbacks.ModelCheckpoint`

In [None]:
# Set checkpoint path
checkpoint_path = "../checkpoints/10_percent_model_checkpoint_weights/checpoint.ckpt"

# Create a ModelCheckpoint callback that saves the model's weights only
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                                save_weights_only=True,
                                                                save_best_only=False,
                                                                save_freq="epoch", #default is save every epoch,
                                                                verbose=1
)

#### Fit model 2 passing in the ModelCheckpoint callback1

In [None]:
# Fit the model saving checkpoints every epoch
initial_epochs = 5
# Fit the model
history_model_2 = model_2.fit(train_data_10_percent,
                                epochs=initial_epochs,
                                steps_per_epoch=len(train_data_10_percent),
                                validation_data=test_data, 
                                validation_steps=int(0.25 * len(test_data)),
                                # Track model training logs
                                callbacks=[create_tensorboard_callback(dir_name="../tensorflow_hub",
                                                                experiment_name="10_percent_data_aug"), 
                                            model_checkpoint_callback
                                    ]
                                )

In [None]:
# Model 0 results
results_model_0 = model_0.evaluate(test_data)


In [None]:
# Check 10 percent results
results_10_percent_data_aug = model_2.evaluate(test_data)
results_10_percent_data_aug

In [None]:
# Plot model loss curves
plot_loss_curves(history_model_2)

#### Load in checkpointed weights

Returns a model to a specific checkpoint

In [None]:
# Load checkpoint
model_2.load_weights(checkpoint_path)

# Evaluate loaded weights model
results_loaded_weights = model_2.evaluate(test_data)



#### Comparing loaded weights

If the results from our previously evaluated model_2 match the loaded weights, everything has worked.
However, to check equality, the comparison has to be approximate.

Use `np.isclose()` to compare the results.

<u>**Parameters:**</u>

    a, b (array_like):

Input arrays to compare.

    rtol (float):

The relative tolerance parameter (see Notes).

    atol (float):

The absolute tolerance parameter (see Notes).

    equal_nan (bool):

Whether to compare NaN’s as equal. If True, NaN’s in a will be considered equal to NaN’s in b in the output array.

<u>**Returns:**</u>

    y (array_like):

Returns a boolean array of where a and b are equal within the given tolerance. If both a and b are scalars, returns a single boolean value.

In [None]:
import numpy as np
comparison = np.isclose(results_10_percent_data_aug, results_loaded_weights, atol=0.0001)
print(comparison)

# Actual difference 
print(np.array(results_10_percent_data_aug)-np.array(results_loaded_weights))

### Model 3 -  Fine Tuning

Use fine-tuning transfer learning with 10% of the training data with data augmentation

**Note:** Fine tuning usually works best *after* training a feature extraction model for a few epochs with large amounts of custom data.

In [None]:
# Layers in our loaded model
model_2.layers


In [None]:
# Check which layers (if any) are trainable

for layer in model_2.layers:
    print(layer, layer.trainable)

In [None]:
# print layer info for base model (EfficientNetB0)

for i, layer in enumerate(model_2.layers[2].layers):
    print(i, layer.name, f'\n\tTrainable: {layer.trainable}')

In [None]:
# How many trainable variables are in the base model?
print(len(model_2.layers[2].trainable_variables))

In [None]:
# make top 10 layers of base model trainable
base_model.trainable = True

# Freeze all layers except for the top 10 layers
for layer in base_model.layers[:-10]:
    layer.trainable = False

# Recompile the model to apply the changes
base_model.compile(loss=CategoricalCrossentropy(), optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

#### Enable training on specified layers
**Note:** When fine tuning, lower learning rate to avoid overfitting. Typically 10x lower than original learning rate though different sources will claim other values.

A good resource for info on this is the UMLFIT paper:
https://arxiv.org/pdf/1801.06146v5.pdf

Alternatively, dynamically change the learning rate

In [None]:
print(f'Trainable Variables: {len(model_2.layers[2].trainable_variables)}')
for i, layer in enumerate(base_model.layers):
    if (layer.trainable):
        print(f'Index: {i}\n\tName: {layer.name}\n\tTrainable: {layer.trainable}')


#### Fine tuning the model

In [None]:
# Fine tune for another 5 epochs
initial_epochs = 5 # reinitialized to 5
fine_tune_epochs = initial_epochs + 5

# Refit the model (same as model_2 except with more trainable layers)
history_fine_10_percent = model_2.fit(train_data_10_percent, 
    epochs=fine_tune_epochs,
    initial_epoch=history_model_2.epoch[-1], # start training from previous last epoch
    steps_per_epoch=len(train_data_10_percent), 
    validation_data=test_data, 
    validation_steps=int(0.25 * len(test_data)), 
    callbacks=[
        create_tensorboard_callback(dir_name="../tensorflow_hub", experiment_name="10_percent_fine_tune_last_10"), 
        model_checkpoint_callback]
    )

In [None]:
# Evaluate the fine-tuned model (model_3 which is actually model_2 fine-tuned for another 5 epochs)

results_fine_tune_10_percent = model_2.evaluate(test_data)
results_fine_tune_10_percent

In [None]:
plot_loss_curves(history_fine_10_percent)

#### Compare histories
The `plot_loss_curves` function works great with models which have only been fit once, however, we want something to compare one series of running `fit()` with another.

e.g. before and after fine-tuning (compare history)

In [None]:
def compare_histories(original_history, new_history, initial_epochs=5):
    """
    Compares two TensorFlow History Objects

    Args:
        original_history (tf history object):
            Original history object.
        new_history (tf history object):
            Fine tuned history object.
        initial_epochs (int):
            Number of epochs trained on original model.

    Returns:
        plots comparing total loss and accuracy for original and fine-tuned model.

    """

    # Get original history measurments
    acc = original_history.history['accuracy']
    loss = original_history.history['loss']

    val_acc = original_history.history['val_accuracy']
    val_loss = original_history.history['val_loss']

    # Combine original history with fine-tuned history
    total_acc = acc + new_history.history['accuracy']
    total_loss = loss + new_history.history['loss']

    total_val_acc = val_acc + new_history.history['val_accuracy']
    total_val_loss = val_loss + new_history.history['val_loss']

    # Make accuracy plots
    plt.figure(figsize=(8,8))
    plt.subplot(2, 1, 1)
    plt.plot(total_acc, label="Training Accuracy")
    plt.plot(total_val_acc, label="Val Accuracy")
    plt.plot([initial_epochs-1, initial_epochs-1], plt.ylim(), label="Start Fine-tuning")
    plt.legend(loc="lower right")
    plt.title("Training and Validation Accuracy")

     # Make loss plots
    plt.figure(figsize=(8,8))
    plt.subplot(2, 1, 1)
    plt.plot(total_loss, label="Training Loss")
    plt.plot(total_val_loss, label="Val Loss")
    plt.plot([initial_epochs-1, initial_epochs-1], plt.ylim(), label="Start Fine-tuning")
    plt.legend(loc="upper right")
    plt.title("Training and Validation Loss")

In [None]:
compare_histories(history_model_2, history_fine_10_percent, 5)

### Model 4 - Fine Tuning with 100% of the training data with augmentation

In [None]:
# Download data
!wget -nc -P ../Downloads/ https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_all_data.zip

# Unzip
unzip_data('../Downloads/10_food_classes_all_data.zip', '../Downloads')

# Check number of images and subdirectories in the dataset
walk_through_dir('../Downloads/10_food_classes_all_data')

In [None]:
import tensorflow as tf
# Create training and test directory paths
all_data_train_dir = '../Downloads/10_food_classes_all_data/train'
all_data_test_dir = '../Downloads/10_food_classes_all_data/test'


IMG_SIZE = (224, 224)
BATCH_SIZE = 32
train_data_all = tf.keras.preprocessing.image_dataset_from_directory(directory=all_data_train_dir,
                                                                            batch_size=BATCH_SIZE,
                                                                            image_size=IMG_SIZE,
                                                                            label_mode='categorical')
                                                                            
test_data_all = tf.keras.preprocessing.image_dataset_from_directory(directory=all_data_test_dir,
                                                                            batch_size=BATCH_SIZE,
                                                                            image_size=IMG_SIZE,
                                                                            label_mode='categorical')

initial_epochs = 5 # reinitialized to 5
fine_tune_epochs = initial_epochs + 5

# Refit the model (same as model_2 except with more trainable layers)
# history_fine_all_data = model_2.fit(train_data_all, 
#     epochs=fine_tune_epochs,
#     initial_epoch=history_model_2.epoch[-1], # start training from previous last epoch
#     steps_per_epoch=len(train_data_all), 
#     validation_data=test_data_all, 
#     validation_steps=int(0.25 * len(test_data_all)), 
#     callbacks=[
#         create_tensorboard_callback(dir_name="../tensorflow_hub", experiment_name="10_percent_fine_tune_last_10"), 
#         model_checkpoint_callback]
#     )

In [None]:
results_model_2 = model_2.evaluate(test_data)
np.isclose(results_model_2, results_fine_tune_10_percent, atol=0.0001)

In [None]:
# Revert model_2 (technically model 3) back to checkpoint version of model_2
model_2.load_weights(checkpoint_path)

In [None]:
# Evaluate loaded weights model
model_2.evaluate(test_data)

In [None]:
results_10_percent_data_aug

Here's what happened

model_1. Trained a feature extraction transfer learning model for 5 epochs on 10% of the data (with all base model layers frozen) and saved the model's weights using `ModelCheckpoint`.
model_2. Fine-tuned the same model on the same 10% of the data for a further 5 epochs with the top 10 layers of the base model unfrozen.
model_3. Saved the results and training logs each time.
model_4. Reloaded the model from 1 to do the same steps as 2 but with all of the data.

In [None]:
print(f'Trainable Variables: {len(model_2.layers[2].trainable_variables)}')
for i, layer in enumerate(base_model.layers):
    if (layer.trainable):
        print(f'Index: {i}\n\tName: {layer.name}\n\tTrainable: {layer.trainable}')


In [None]:
# Compile 
model_2.compile(optimizer=Adam(learning_rate=0.0001),
                loss='categorical_crossentropy',
                metrics=['accuracy'])
                

In [None]:
# Continue training model_2 with 100% of the data.
fine_tune_epochs = initial_epochs + 5
history_fine_10_classes_full = model_2.fit(train_data_all,
                                            epochs=fine_tune_epochs,
                                            initial_epoch=history_fine_10_percent.epoch[-1],
                                            steps_per_epoch=len(train_data_all),
                                            validation_data=test_data_all,
                                            validation_steps=int(0.25 * len(test_data_all)),
                                            callbacks=[
                                                create_tensorboard_callback(dir_name="../tensorflow_hub", experiment_name="full_10_classes_fine_tune_last_10"),
                                                model_checkpoint_callback]
                                            )



In [None]:
# Evaluate on all test data
results_fine_tune_full_data = model_2.evaluate(test_data_all)
results_fine_tune_full_data

In [None]:
compare_histories(history_fine_10_percent, history_fine_10_classes_full, initial_epochs)

## Viewing experiment data on TensorBoard

**Note:** TensorBoard is a visualization tool for TensorFlow. Any data uploaded will be public.

### Uploading to TensorBoard

In [None]:
# View tensorboard logs of transfer learning modelling experiments
# Upload TensorBoard dev records.

!tensorboard dev upload --logdir=../tensorflow_hub \
--name "Transfer Learning Experiment with 10 Food101 Classes" \
--description "Series of transfer learning experiments with varying amounts of data and fine-tuning" \
--one_shot # Exits the uploader once it has finished uploading

TensorBoard experiments available at: https://tensorboard.dev/experiment/kNcqp4oSQlGAAbALH07NZA/

### To delete an experiment
`!tensorboard dev delete --experiment_id kNcqp4oSQlGAAbALH07NZA`