# Transfer Learning and Fine Tuning
See https://keras.io/guides/transfer_learning/

In [6]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds

## The Cats vs Dogs Data Set

In this example a CNN classifier pre trained on ImageNet data will be transfer learned to a binary classifier and fine tuned, distinguishing between cats and dogs. As can be seen below. a few hundred images already suffice to reach high accuracies using this technique

In [5]:
tfds.disable_progress_bar()

train_ds, validation_ds, test_ds = tfds.load(
    "cats_vs_dogs",
    split=["train[:2%]", "train[40%:42%]", "train[50%:52%]"],
    as_supervised=True,  # Include labels
)

print("Number of training samples: %d" % tf.data.experimental.cardinality(train_ds))
print(
    "Number of validation samples: %d" % tf.data.experimental.cardinality(validation_ds)
)
print("Number of test samples: %d" % tf.data.experimental.cardinality(test_ds))


[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\Chinonso Agbo\tensorflow_datasets\cats_vs_dogs\4.0.1...[0m


KeyError: "There is no item named 'PetImages\\\\Cat\\\\0.jpg' in the archive"

#### Visualizing the data

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for i, (image, label) in enumerate(train_ds.take(9)):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(image)
    plt.title(int(label))
    plt.axis("off")

#### Setting up the preprocessing pipe line

In [None]:
size = (224, 224)

train_ds = train_ds.map(lambda x, y: (tf.image.resize(x, size), y))
validation_ds = validation_ds.map(lambda x, y: (tf.image.resize(x, size), y))
test_ds = test_ds.map(lambda x, y: (tf.image.resize(x, size), y))

In [None]:
batch_size = 32

train_ds = train_ds.cache().batch(batch_size).prefetch(buffer_size=10)
validation_ds = validation_ds.cache().batch(batch_size).prefetch(buffer_size=10)
test_ds = test_ds.cache().batch(batch_size).prefetch(buffer_size=10)

## Data Augmentation

Another popular method to avoid overfitting on little data is data augmentation. Images are randomly transformed, using selected operations, such that no images passes the training twice exaclty the same.

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
    ]
)
for images, labels in train_ds.take(1):
    plt.figure(figsize=(10, 10))
    first_image = images[0]
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        augmented_image = data_augmentation(
            tf.expand_dims(first_image, 0), training=True
        )
        plt.imshow(augmented_image[0].numpy().astype("int32"))
        plt.title(int(labels[0]))
        plt.axis("off")


## Pretrained Models

Many well published pretrained models can be found for keras on https://keras.io/api/applications/.

In [None]:
base_model = keras.applications.MobileNet(
    # Load weights pre-trained on ImageNet.
    weights="imagenet",  
    input_shape=(224, 224, 3),
    # Do not include the ImageNet classifier at the top.
    include_top=False,
) 
# Freeze the base_model
base_model.trainable = False
# As can be seen in the summary below, none of the models weights will be adapted during training.
base_model.summary()

We add the preprocessing and the scaling to the model and add a new Dense Layer on top of the pretrained model to adjust the feature extractor to the new taks during the transfer learning.

In [None]:
# Create new model on top
inputs = keras.Input(shape=(224, 224, 3))
x = data_augmentation(inputs)  # Apply random data augmentation

# Pre-trained Xception weights requires that input be scaled
# from (0, 255) to a range of (-1., +1.), the rescaling layer
# outputs: `(inputs * scale) + offset`
scale_layer = keras.layers.Rescaling(scale=1 / 127.5, offset=-1)
x = scale_layer(x)

# The base model contains batchnorm layers. We want to keep them in inference mode
# when we unfreeze the base model for fine-tuning, so we make sure that the
# base_model is running in inference mode here.
x = base_model(x, training=False)
# TODO: Add a GlobalAveragePooling, a Dropout and a Dense layer to the base model
outputs = x
model = keras.Model(inputs, outputs)

model.summary()

Training only the few parameters in the newly added top layer

In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(0.01),
    loss=keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=[keras.metrics.BinaryAccuracy()],
)

epochs = 10
model.fit(train_ds, epochs=epochs, validation_data=validation_ds)

In [None]:
# Unfreeze the base_model. Note that it keeps running in inference mode
# since we passed `training=False` when calling it. This means that
# the batchnorm layers will not update their batch statistics.
# This prevents the batchnorm layers from undoing all the training
# we've done so far.
base_model.trainable = True
model.summary()

model.compile(
    optimizer=keras.optimizers.Adam(1e-5),  # Low learning rate
    loss=keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=[keras.metrics.BinaryAccuracy()],
)

In [None]:
epochs = 12
model.fit(train_ds, epochs=epochs, validation_data=validation_ds)