<a href="https://colab.research.google.com/github/NoeDebrois/Artificial-NN-and-Deep-Learning/blob/main/Transfer_Learning_and_Fine_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Artificial Neural Networks and Deep Learning

---

## Lecture 4b: Transfer Learning and Fine Tuning

<img src="https://drive.google.com/uc?export=view&id=1gsJ4h701PWou3B_JsjbfhIZnefsn2mGN" width="500"/>

## 🌐 Connect Colab to Google Drive

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive/My Drive/[2024-2025] AN2DL/Lecture 4

## ⚙️ Import Libraries

In [None]:
# Set seed for reproducibility
seed = 42

# Import necessary libraries
import os

# Set environment variables before importing modules
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# Import necessary modules
import logging
import random
import numpy as np

# Set seeds for random number generators in NumPy and Python
np.random.seed(seed)
random.seed(seed)

# Import TensorFlow and Keras
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl

# Set seed for TensorFlow
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

# Reduce TensorFlow verbosity
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# Print TensorFlow version
print(tf.__version__)

# Import other libraries
import requests
from io import BytesIO
import cv2
from PIL import Image
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns

# Configure plot display settings
sns.set(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)
%matplotlib inline

## ⏳ Load Data

In [None]:
def load_oxford_pets():
    # Load dataset with TensorFlow Datasets, obtaining dataset info
    dataset, info = tfds.load(
        'oxford_iiit_pet',
        with_info=True,
        as_supervised=True
    )

    # Split dataset into training and test sets
    train_dataset, test_dataset = dataset['train'], dataset['test']

    # Define image preprocessing function
    def preprocess(image, label):
        image = tf.cast(image, tf.float32)
        height = tf.shape(image)[0]
        width = tf.shape(image)[1]
        crop_size = tf.minimum(height, width)
        height_offset = (height - crop_size) // 2
        width_offset = (width - crop_size) // 2

        # Centre-crop and resize image, normalising pixel values
        image = tf.image.crop_to_bounding_box(
            image,
            height_offset,
            width_offset,
            crop_size,
            crop_size
        )
        image = tf.image.resize(image, [128, 128])
        image = image / 255.0
        return image, label

    # Initialise NumPy arrays for training and test sets
    n_train = info.splits['train'].num_examples
    n_test = info.splits['test'].num_examples
    img_shape = (128, 128, 3)

    X_train = np.zeros((n_train, *img_shape), dtype=np.float32)
    y_train = np.zeros(n_train, dtype=np.int64)
    X_test = np.zeros((n_test, *img_shape), dtype=np.float32)
    y_test = np.zeros(n_test, dtype=np.int64)

    # Batch-process and store training data
    train_dataset = train_dataset.map(preprocess).batch(32)
    idx = 0
    for batch_images, batch_labels in train_dataset:
        batch_size = batch_images.shape[0]
        X_train[idx:idx + batch_size] = batch_images.numpy()
        y_train[idx:idx + batch_size] = batch_labels.numpy()
        idx += batch_size

        # Optional: print progress for training data
        if idx % 500 == 0:
            print(f"Processed {idx}/{n_train} training images")

    # Batch-process and store test data
    test_dataset = test_dataset.map(preprocess).batch(32)
    idx = 0
    for batch_images, batch_labels in test_dataset:
        batch_size = batch_images.shape[0]
        X_test[idx:idx + batch_size] = batch_images.numpy()
        y_test[idx:idx + batch_size] = batch_labels.numpy()
        idx += batch_size

        # Optional: print progress for test data
        if idx % 500 == 0:
            print(f"Processed {idx}/{n_test} test images")

    # Retrieve and format class names
    class_names = list(map(str.lower, info.features['label'].names))
    return (X_train, y_train), (X_test, y_test), class_names

# Execute function and load data
(X_train, y_train), (X_test, y_test), class_names = load_oxford_pets()

# Display data shapes for training and test sets
print("Training set shape (images):", X_train.shape)
print("Training set shape (labels):", y_train.shape)
print("Test set shape (images):", X_test.shape)
print("Test set shape (labels):", y_test.shape)

## 🔎 Inspect Data

In [None]:
# Number of images to display
num_img = 10

# Select random indices from the training set
random_indices = random.sample(range(len(X_train)), num_img)

# Create subplot layout for images
fig, axes = plt.subplots(2, num_img // 2, figsize=(20, 9))

for i, idx in enumerate(random_indices):
    ax = axes[i // 5, i % 5]

    # Display the image at the selected index
    ax.imshow(X_train[idx])

    # Add class name as title, formatting it to replace underscores with spaces and capitalise
    class_name = class_names[y_train[idx]]
    class_name = class_name.replace('_', ' ').title()
    ax.set_title(class_name, pad=5)

    # Remove axis lines for clearer display
    ax.axis('off')

# Adjust layout for better spacing
plt.tight_layout()
plt.show()

In [None]:
def plot_class_distribution(y_train, y_test, class_names):
    # Set seaborn style for the plot
    sns.set_style("whitegrid")
    plt.figure(figsize=(18, 6))

    # Calculate class distributions for training and test sets
    train_dist = np.bincount(y_train)
    test_dist = np.bincount(y_test)

    # Create x positions and set bar width
    x = np.arange(len(class_names))
    width = 0.35

    # Plot bars for training and test distributions
    plt.bar(x - width / 2, train_dist, width, label='Training', color='#2ecc71', alpha=0.7)
    plt.bar(x + width / 2, test_dist, width, label='Test', color='#3498db', alpha=0.7)

    # Customise plot title and labels
    plt.title('Class Distribution', pad=20, fontsize=14)
    plt.xlabel('Classes')
    plt.ylabel('Number of Images')

    # Set class names as x-axis labels with rotation
    plt.xticks(x, class_names, rotation=45, ha='right')

    # Add legend for training and test distributions
    plt.legend(loc='lower right')

    # Adjust layout for optimal spacing
    plt.tight_layout()
    plt.show()

# Execute function to plot class distribution
plot_class_distribution(y_train, y_test, class_names)

In [None]:
# Split test set into validation and test sets with stratification
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, random_state=seed, test_size=0.5, stratify=y_test)

# Convert class labels to categorical format for training, validation, and test sets
y_train = tfk.utils.to_categorical(y_train, num_classes=len(class_names))
y_val = tfk.utils.to_categorical(y_val, num_classes=len(class_names))
y_test = tfk.utils.to_categorical(y_test, num_classes=len(class_names))

# Print shapes of the datasets
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

## 🛠️ Train MobileNetV3Small from scratch

In [None]:
# Initialise MobileNetV3Small model without pretrained weights, for custom training
mobilenet = tfk.applications.MobileNetV3Small(
    input_shape=(128, 128, 3),
    include_top=False,
    weights=None,
    pooling='avg',
    include_preprocessing=True
)

# Display a summary of the model architecture
mobilenet.summary(expand_nested=True)

# Display model architecture with layer shapes and trainable parameters
tfk.utils.plot_model(mobilenet, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

In [None]:
# Create an input layer with shape (128, 128, 3)
inputs = tfk.Input(shape=(128, 128, 3), name='input_layer')

augmentation = tf.keras.Sequential([
    tfkl.RandomFlip("horizontal"),
    tfkl.RandomTranslation(0.2,0.2)
], name='preprocessing')

x = augmentation(inputs)

# Connect MobileNetV3Small to the input
x = mobilenet(x)

x = tfkl.Dropout(0.3, name='dropout')(x)

# Add a Dense layer with 2 units and softmax activation as the classifier
outputs = tfkl.Dense(y_train.shape[-1], activation='softmax', name='dense')(x)

# Create a Model connecting input and output
model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

# Compile the model with Categorical Cross-Entropy loss and Adam optimizer
model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=['accuracy'])

# Display a summary of the model architecture
model.summary(expand_nested=True)

# Display model architecture with layer shapes and trainable parameters
tfk.utils.plot_model(model, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

In [None]:
# Train the model
history = model.fit(
    x = X_train*255,
    y = y_train,
    batch_size = 64,
    epochs = 200,
    validation_data = (X_val*255, y_val),
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=10, restore_best_weights=True)]
).history

# Calculate and print the final validation accuracy
final_val_accuracy = round(max(history['val_accuracy'])* 100, 2)
print(f'Final validation accuracy: {final_val_accuracy}%')

# Save the trained model to a file with the accuracy included in the filename
model_filename = 'Pets_EfficientNetV2B0_'+str(final_val_accuracy)+'.keras'
model.save(model_filename)

# Delete the model to free up resources
del model

In [None]:
# Create figure and subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 6))

# Plot loss in first subplot
ax1.plot(history['loss'], alpha=.3, color='#ff7f0e', linestyle='--')
ax1.plot(history['val_loss'], label='Re-trained', alpha=.8, color='#ff7f0e')
ax1.set_title('Categorical Crossentropy')
ax1.legend(loc='upper left')
ax1.grid(alpha=.3)

# Plot accuracy in second subplot
ax2.plot(history['accuracy'], alpha=.3, color='#ff7f0e', linestyle='--')
ax2.plot(history['val_accuracy'], label='Re-trained', alpha=.8, color='#ff7f0e')
ax2.set_title('Accuracy')
ax2.grid(alpha=.3)

# Adjust layout to prevent overlapping
plt.tight_layout()
plt.show()

## 🛠️ Transfer Learning

<img src="https://radekosmulski.com/content/images/2021/08/imagenet_banner.jpeg" width="700"/>



In [None]:
# Initialise MobileNetV3Small model with pretrained weights, for transfer learning
mobilenet = tfk.applications.MobileNetV3Small(
    input_shape=(128, 128, 3),
    include_top=False,
    weights='imagenet',
    pooling='avg',
    include_preprocessing=True
)

# Display a summary of the model architecture
mobilenet.summary(expand_nested=True)

# Display model architecture with layer shapes and trainable parameters
tfk.utils.plot_model(mobilenet, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

<img src="https://drive.google.com/uc?export=view&id=1OYCCe7eQSSiscBZC1gc1TmIMOjN8R-Qk" width="800"/>

In [None]:
# Freeze all layers in MobileNetV3Small to use it solely as a feature extractor
mobilenet.trainable = False

# Define input layer with shape matching the input images
inputs = tfk.Input(shape=(128, 128, 3), name='input_layer')

# Apply data augmentation for training robustness
augmentation = tf.keras.Sequential([
    tfkl.RandomFlip("horizontal"),
    tfkl.RandomTranslation(0.2, 0.2)
], name='preprocessing')

x = augmentation(inputs)

# Pass augmented inputs through the MobileNetV3Small feature extractor
x = mobilenet(x)

# Add a dropout layer for regularisation
x = tfkl.Dropout(0.3, name='dropout')(x)

# Add final Dense layer for classification with softmax activation
outputs = tfkl.Dense(y_train.shape[-1], activation='softmax', name='dense')(x)

# Define the complete model linking input and output
tl_model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

# Compile the model with categorical cross-entropy loss and Adam optimiser
tl_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=['accuracy'])

# Display a summary of the model architecture
tl_model.summary(expand_nested=True)

# Display model architecture with layer shapes and trainable parameters
tfk.utils.plot_model(tl_model, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

In [None]:
# Train the model
tl_history = tl_model.fit(
    x=X_train * 255,
    y=y_train,
    batch_size=64,
    epochs=200,
    validation_data=(X_val * 255, y_val),
    callbacks=[tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=20, restore_best_weights=True)]
).history

# Calculate and print the best validation accuracy achieved
final_val_accuracy = round(max(tl_history['val_accuracy']) * 100, 2)
print(f'Final validation accuracy: {final_val_accuracy}%')

# Save the trained model to a file, including final accuracy in the filename
model_filename = 'Pets_MobileNetV3S_' + str(final_val_accuracy) + '.keras'
tl_model.save(model_filename)

# Free memory by deleting the model instance
del tl_model

In [None]:
# Create figure and subplots for loss and accuracy
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 6))

# Plot loss for both re-trained and transfer learning models
ax1.plot(history['loss'], alpha=0.3, color='#ff7f0e', linestyle='--')
ax1.plot(history['val_loss'], label='Re-trained', alpha=0.8, color='#ff7f0e')
ax1.plot(tl_history['loss'], alpha=0.3, color='#4D61E2', linestyle='--')
ax1.plot(tl_history['val_loss'], label='Transfer Learning', alpha=0.8, color='#4D61E2')
ax1.set_title('Categorical Crossentropy')
ax1.legend(loc='upper left')
ax1.grid(alpha=0.3)

# Plot accuracy for both re-trained and transfer learning models
ax2.plot(history['accuracy'], alpha=0.3, color='#ff7f0e', linestyle='--')
ax2.plot(history['val_accuracy'], label='Re-trained', alpha=0.8, color='#ff7f0e')
ax2.plot(tl_history['accuracy'], alpha=0.3, color='#4D61E2', linestyle='--')
ax2.plot(tl_history['val_accuracy'], label='Transfer Learning', alpha=0.8, color='#4D61E2')
ax2.set_title('Accuracy')
ax2.grid(alpha=0.3)

# Adjust layout to prevent label overlap and display the plots
plt.tight_layout()
plt.show()

## 🛠️ Fine Tuning

<img src="https://www.researchgate.net/publication/359405075/figure/fig2/AS:1182999492214798@1659060466845/Concept-of-fine-tuning-and-feature-extraction_W640.jpg" width="700"/>




In [None]:
# Re-load the model after transfer learning
ft_model = tfk.models.load_model('Pets_MobileNetV3S_TL_74.43.keras')

# Display a summary of the model architecture
ft_model.summary(expand_nested=True)

# Display model architecture with layer shapes and trainable parameters
tfk.utils.plot_model(ft_model, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

In [None]:
# Set the MobileNetV3Small model layers as trainable
ft_model.get_layer('MobileNetV3Small').trainable = True

# Set all MobileNetV3Small layers as non-trainable
for layer in ft_model.get_layer('MobileNetV3Small').layers:
    layer.trainable = False

# Enable training only for Conv2D and DepthwiseConv2D layers
for i, layer in enumerate(ft_model.get_layer('MobileNetV3Small').layers):
    if isinstance(layer, tf.keras.layers.Conv2D) or isinstance(layer, tf.keras.layers.DepthwiseConv2D):
        layer.trainable = True
        print(i, layer.name, type(layer).__name__, layer.trainable)

In [None]:
# Set the number of layers to freeze
N = 124

# Set the first N layers as non-trainable
for i, layer in enumerate(ft_model.get_layer('MobileNetV3Small').layers[:N]):
    layer.trainable = False

# Print layer indices, names, and trainability status
for i, layer in enumerate(ft_model.get_layer('MobileNetV3Small').layers):
    print(i, layer.name, layer.trainable)

# Display a summary of the model architecture
ft_model.summary(expand_nested=True)

# Display model architecture with layer shapes and trainable parameters
tfk.utils.plot_model(ft_model, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

In [None]:
# Compile the model
ft_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(1e-4), metrics=['accuracy'])

In [None]:
# Fine-tune the model
ft_history = ft_model.fit(
    x = X_train*255,
    y = y_train,
    batch_size = 64,
    epochs = 200,
    validation_data = (X_val*255, y_val),
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=20, restore_best_weights=True)]
).history

# Calculate and print the final validation accuracy
final_val_accuracy = round(max(ft_history['val_accuracy'])* 100, 2)
print(f'Final validation accuracy: {final_val_accuracy}%')

# Save the trained model to a file with the accuracy included in the filename
model_filename = 'Pets_MobileNetV3S_FT_'+str(final_val_accuracy)+'.keras'
ft_model.save(model_filename)

# Delete the model to free up resources
del ft_model

In [None]:
# Create figure and subplots for loss and accuracy
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 6))

# Plot categorical cross-entropy loss for both Transfer Learning and Fine Tuning stages
ax1.plot(tl_history['loss'], alpha=0.3, color='#4D61E2', linestyle='--')
ax1.plot(tl_history['val_loss'], label='Transfer Learning', alpha=0.8, color='#4D61E2')
ax1.plot(ft_history['loss'], alpha=0.3, color='#408537', linestyle='--')
ax1.plot(ft_history['val_loss'], label='Fine Tuning', alpha=0.8, color='#408537')
ax1.set_title('Categorical Crossentropy')
ax1.legend(loc='upper left')
ax1.grid(alpha=0.3)

# Plot accuracy for both Transfer Learning and Fine Tuning stages
ax2.plot(tl_history['accuracy'], alpha=0.3, color='#4D61E2', linestyle='--')
ax2.plot(tl_history['val_accuracy'], label='Transfer Learning', alpha=0.8, color='#4D61E2')
ax2.plot(ft_history['accuracy'], alpha=0.3, color='#408537', linestyle='--')
ax2.plot(ft_history['val_accuracy'], label='Fine Tuning', alpha=0.8, color='#408537')
ax2.set_title('Accuracy')
ax2.set_ylim([min(tl_history['val_accuracy']) * 0.95, max(ft_history['val_accuracy']) * 1.05])
ax2.grid(alpha=0.3)

# Adjust layout to prevent overlap and display the plots
plt.tight_layout()
plt.show()

## 🕹️ Use the Model - Make Inference

In [None]:
# Load the fine-tuned MobileNetV3 model for prediction on external images
ft_bn_model = tfk.models.load_model('Pets_MobileNetV3S_FT_76.94.keras')

# Display a summary of the model architecture
ft_model.summary(expand_nested=True)

# Display model architecture with layer shapes and trainable parameters
tfk.utils.plot_model(ft_model, expand_nested=True, show_trainable=True, show_shapes=True, dpi=70)

In [None]:
def crop_square(image):
    """Crop image to a square, centred on the middle section"""
    image = tf.cast(image, tf.float32)
    height = tf.shape(image)[0]
    width = tf.shape(image)[1]
    crop_size = tf.minimum(height, width)
    height_offset = (height - crop_size) // 2
    width_offset = (width - crop_size) // 2

    image = tf.image.crop_to_bounding_box(
        image,
        height_offset,
        width_offset,
        crop_size,
        crop_size
    )
    return image

def preprocess_for_model(image):
    """Resize image to match model input requirements"""
    image = tf.image.resize(image, [128, 128])
    return image

# Dictionary of image URLs and their actual classes
images = {
    'pug': 'https://blog.expodog.com/wp-content/uploads/2020/09/f896a647cfc5346c5b042a6a1e916065.jpg',
    'persian': 'https://images.squarespace-cdn.com/content/v1/5b1cc0f95b409bd4bfc3b316/1687322332437-DOVUPJBJMKQWZ2TRSQA2/sergey-semin-I9cHfDYLT3E-unsplash%281%29.jpg',
    'sphynx': 'https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEhcpzLYLRORZQaVRNizLejwVBFp8eIUPS_ZN7Zhw_J54RgeELoupne4wHwxj8o8vr6XXlFtJwTj8QGP7VIpbj_UwTU6bp4Z6lIIM80zkyhKBtN8YHeFQQScopDh0nw8loMvhmxvaurUuP8/s1600/Sphynx_associazione_mammagatta-7.jpg',
    'abyssinian': 'https://www.thesprucepets.com/thmb/MigiLYeVSJcj0zkNVkJ--rALMZI=/2121x0/filters:no_upscale():strip_icc()/GettyImages-165827729-efc11c02690f457a81ef6ccbfa8eb34d.jpg',
    'samoyed': 'https://images.ctfassets.net/440y9b545yd9/49v1AZmZdiPYkJ4A3vrayj/d7d7db21fed2ef30f5b8e3899633d292/Samoyed850.jpg'
}

# Create a plot for visualising model predictions
plt.figure(figsize=(25, 5))

for i, (true_class, url) in enumerate(images.items(), 1):
    # Load the image from the URL
    response = requests.get(url)
    img_original = np.array(Image.open(BytesIO(response.content))) / 255

    # Crop the image to a square for consistent processing
    img_square = crop_square(img_original)

    # Preprocess image copy for model prediction
    img_for_model = preprocess_for_model(img_square)

    # Predict class using preprocessed image
    prediction = ft_bn_model.predict(np.expand_dims(img_for_model * 255, axis=0), verbose=0)
    pred_class = class_names[np.argmax(prediction)]
    confidence = round(100 * np.max(prediction), 2)

    # Plot the cropped square image
    plt.subplot(1, 5, i)
    plt.imshow(img_square)
    plt.axis('off')

    # Add title with actual and predicted class info
    plt.title(f'Real: {true_class}\nPred: {pred_class}\nConf: {confidence}%', fontsize=14, pad=10)

# Adjust layout to prevent overlap and display
plt.tight_layout()
plt.show()

#  
<img src="https://airlab.deib.polimi.it/wp-content/uploads/2019/07/airlab-logo-new_cropped.png" width="350">

<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/9/95/Instagram_logo_2022.svg/800px-Instagram_logo_2022.svg.png" width="15"> **Instagram:** https://www.instagram.com/airlab_polimi/

<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/8/81/LinkedIn_icon.svg/2048px-LinkedIn_icon.svg.png" width="15"> **LinkedIn:** https://www.linkedin.com/company/airlab-polimi/
___
Credits: Eugenio Lomurno 📧 eugenio.lomurno@polimi.it





```
   Copyright 2024 Eugenio Lomurno

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
```

