[![Fixel Algorithms](https://i.imgur.com/AqKHVZ0.png)](https://fixelalgorithms.gitlab.io/)

# AI Program

## Machine Learning - Deep Learning - Transfer Learning

> Notebook by:
> - Royi Avital RoyiAvital@fixelalgorithms.com

## Revision History

| Version | Date       | User        |Content / Changes                                                   |
|---------|------------|-------------|--------------------------------------------------------------------|
| 1.0.000 | 29/05/2024 | Royi Avital | First version                                                      |

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/FixelAlgorithmsTeam/FixelCourses/blob/master/AIProgram/2024_02/0093DeepLearningTransferLearning.ipynb)

In [None]:
# Import Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Machine Learning
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import ParameterGrid

# Deep Learning
import torch
import torch.nn            as nn
import torch.nn.functional as F
from torch.optim.optimizer import Optimizer
from torch.optim.lr_scheduler import LRScheduler
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchinfo
from torchmetrics.classification import MulticlassAccuracy
import torchvision
from torchvision.transforms import v2 as TorchVisionTrns

# Miscellaneous
import copy
from enum import auto, Enum, unique
import math
import os
from platform import python_version
import random
import shutil
import time

# Typing
from typing import Callable, Dict, Generator, List, Optional, Self, Set, Tuple, Union

# Visualization
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# Jupyter
from IPython import get_ipython
from IPython.display import HTML, Image
from IPython.display import display
from ipywidgets import Dropdown, FloatSlider, interact, IntSlider, Layout, SelectionSlider
from ipywidgets import interact

## Notations

* <font color='red'>(**?**)</font> Question to answer interactively.
* <font color='blue'>(**!**)</font> Simple task to add code for the notebook.
* <font color='green'>(**@**)</font> Optional / Extra self practice.
* <font color='brown'>(**#**)</font> Note / Useful resource / Food for thought.

Code Notations:

```python
someVar    = 2; #<! Notation for a variable
vVector    = np.random.rand(4) #<! Notation for 1D array
mMatrix    = np.random.rand(4, 3) #<! Notation for 2D array
tTensor    = np.random.rand(4, 3, 2, 3) #<! Notation for nD array (Tensor)
tuTuple    = (1, 2, 3) #<! Notation for a tuple
lList      = [1, 2, 3] #<! Notation for a list
dDict      = {1: 3, 2: 2, 3: 1} #<! Notation for a dictionary
oObj       = MyClass() #<! Notation for an object
dfData     = pd.DataFrame() #<! Notation for a data frame
dsData     = pd.Series() #<! Notation for a series
hObj       = plt.Axes() #<! Notation for an object / handler / function handler
```

### Code Exercise

 - Single line fill

 ```python
 vallToFill = ???
 ```

 - Multi Line to Fill (At least one)

 ```python
 # You need to start writing
 ????
 ```

 - Section to Fill

```python
#===========================Fill This===========================#
# 1. Explanation about what to do.
# !! Remarks to follow / take under consideration.
mX = ???

???
#===============================================================#
```

In [None]:
# Configuration
# %matplotlib inline

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

# Matplotlib default color palette
lMatPltLibclr = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# sns.set_theme() #>! Apply SeaBorn theme

runInGoogleColab = 'google.colab' in str(get_ipython())

# Improve performance by benchmarking
torch.backends.cudnn.benchmark = True

# Reproducibility (Per PyTorch Version on the same device)
# torch.manual_seed(seedNum)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark     = False #<! Makes things slower


In [None]:
# Constants

FIG_SIZE_DEF    = (8, 8)
ELM_SIZE_DEF    = 50
CLASS_COLOR     = ('b', 'r')
EDGE_COLOR      = 'k'
MARKER_SIZE_DEF = 10
LINE_WIDTH_DEF  = 2

DATA_SET_FILE_NAME      = 'archive.zip'
DATA_SET_FOLDER_NAME    = 'IntelImgCls'

D_CLASSES  = {0: 'Buildings', 1: 'Forest', 2: 'Glacier', 3: 'Mountain', 4: 'Sea', 5: 'Street'}
L_CLASSES  = ['Buildings', 'Forest', 'Glacier', 'Mountain', 'Sea', 'Street']
T_IMG_SIZE = (150, 150, 3)

DATA_FOLDER_PATH    = 'Data'
TENSOR_BOARD_BASE   = 'TB'


In [None]:
# Download Auxiliary Modules for Google Colab
if runInGoogleColab:
    !wget https://raw.githubusercontent.com/FixelAlgorithmsTeam/FixelCourses/master/AIProgram/2024_02/DataManipulation.py
    !wget https://raw.githubusercontent.com/FixelAlgorithmsTeam/FixelCourses/master/AIProgram/2024_02/DataVisualization.py
    !wget https://raw.githubusercontent.com/FixelAlgorithmsTeam/FixelCourses/master/AIProgram/2024_02/DeepLearningPyTorch.py

In [None]:
# Courses Packages

from DataVisualization import PlotLabelsHistogram
from DeepLearningPyTorch import TBLogger, TestDataSet
from DeepLearningPyTorch import TrainModel, TrainModelSch


* <font color='blue'>(**!**)</font> Go through `TestDataSet`'s code.

In [None]:
# General Auxiliary Functions

def GenResNetModel( trainedModel: bool, numCls: int, resNetDepth: int = 18 ) -> nn.Module:
    # Read on the API change at: How to Train State of the Art Models Using TorchVision’s Latest Primitives
    # https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives

    if (resNetDepth == 18):
        modelFun = torchvision.models.resnet18
        modelWeights = torchvision.models.ResNet18_Weights.IMAGENET1K_V1
    elif (resNetDepth == 34):
        modelFun = torchvision.models.resnet34
        modelWeights = torchvision.models.ResNet34_Weights.IMAGENET1K_V1
    else:
        raise ValueError(f'The `resNetDepth`: {resNetDepth} is invalid!')

    if trainedModel:
        oModel = modelFun(weights = modelWeights)
        numFeaturesIn   = oModel.fc.in_features
        # Assuming numCls << 100
        oModel.fc       = nn.Sequential(
            nn.Linear(numFeaturesIn, 128), nn.ReLU(),
            nn.Linear(128, numCls),
        )
    else:
        oModel = modelFun(weights = None, num_classes = numCls)

    return oModel



## Transfer Learning

The ResNet model is considered to be one of the most successful architectures.  
Its main novelty is the _Skip Connection_ which improved the performance greatly.

By _hand waiving_ the contribution of the skip connection can be explained as:

 * Ensemble of model.
 * Skip vanishing 


This notebook presents the basics of _Transfer Learning_ in the context of image classification:
 - Loading a pretrained model on a classification task.
 - Adjusting its structure to the new classification task.
 - Finetuning the model.
 - Evaluating the model.

</br>

* <font color='brown'>(**#**)</font> A great recap on _Model Fine Tuning_ is given in the book [Dive into Deep Learning](https://d2l.ai): [Computer Vision - Fine Tuning](https://d2l.ai/chapter_computer-vision/fine-tuning.html).

In [None]:
# Parameters

# Data

# Model
dropP = 0.5 #<! Dropout Layer

# Training
batchSize   = 128
numWorkers  = 4 #<! Number of workers
numEpochs   = 10

# Visualization
numImg = 3


## Generate / Load Data

This notebook use the [Intel Image Classification Data Set](https://www.kaggle.com/datasets/puneet6060/intel-image-classification).  
The data set is composed of 6 classes: `Buildings`, `Forest`, `Glacier`, `Mountain`, `Sea`, `Street`.

1. Download the Zip file `archive.zip` from [Intel Image Classification Data Set](https://www.kaggle.com/datasets/puneet6060/intel-image-classification).
2. Copy / Move the file into `AIProgram/<YYYY_MM>/Data` folder.

The following code will arrange the data in a manner compatible with PyTorch's [`ImageFolder`](https://pytorch.org/vision/main/generated/torchvision.datasets.ImageFolder.html`).

* <font color='brown'>(**#**)</font> The data set originally appeared on [Analytics Vidhya - Practice Problem: Intel Scene Classification Challenge](https://datahack.analyticsvidhya.com/contest/practice-problem-intel-scene-classification-challe).
* <font color='brown'>(**#**)</font> Some of the images are not `150x150x3` hence they should be handled.
* <font color='brown'>(**#**)</font> Some of the images are not labeled correctly (See discussions on Kaggle).

In [None]:
# Arrange Data for Image Folder
# Assumes `archive.zip` in `./Data`

dataSetPath = os.path.join(DATA_FOLDER_PATH, DATA_SET_FOLDER_NAME)
if not os.path.isdir(dataSetPath):
    os.mkdir(dataSetPath)
lFiles = os.listdir(dataSetPath)

if '.processed' not in lFiles: #<! Run only once
    os.makedirs(os.path.join(dataSetPath, 'TMP'), exist_ok = True)
    os.makedirs(os.path.join(dataSetPath, 'Test'), exist_ok = True)
    for clsName in L_CLASSES:
        os.makedirs(os.path.join(dataSetPath, 'Train', clsName), exist_ok = True)
        os.makedirs(os.path.join(dataSetPath, 'Validation', clsName), exist_ok = True)
    
    shutil.unpack_archive(os.path.join(DATA_FOLDER_PATH, DATA_SET_FILE_NAME), os.path.join(dataSetPath, 'TMP'))

    for dirPath, lSubDir, lF in os.walk(os.path.join(dataSetPath, 'TMP')):
        if len(lF) > 0:
            if 'test' in dirPath:
                dstPath = os.path.join(dataSetPath, 'Validation')
            elif 'train' in dirPath:
                dstPath = os.path.join(dataSetPath, 'Train')
            else:
                dstPath = os.path.join(dataSetPath, 'Test')
            
            if 'buildings' in dirPath:
                for fileName in lF:
                    shutil.move(os.path.join(dirPath, fileName), os.path.join(dstPath, 'Buildings'))
            elif 'forest' in dirPath:
                for fileName in lF:
                    shutil.move(os.path.join(dirPath, fileName), os.path.join(dstPath, 'Forest'))
            elif 'glacier' in dirPath:
                for fileName in lF:
                    shutil.move(os.path.join(dirPath, fileName), os.path.join(dstPath, 'Glacier'))
            elif 'mountain' in dirPath:
                for fileName in lF:
                    shutil.move(os.path.join(dirPath, fileName), os.path.join(dstPath, 'Mountain'))
            elif 'sea' in dirPath:
                for fileName in lF:
                    shutil.move(os.path.join(dirPath, fileName), os.path.join(dstPath, 'Sea'))
            elif 'street' in dirPath:
                for fileName in lF:
                    shutil.move(os.path.join(dirPath, fileName), os.path.join(dstPath, 'Street'))
            else:
                for fileName in lF:
                    shutil.move(os.path.join(dirPath, fileName), dstPath)
    
    shutil.rmtree(os.path.join(dataSetPath, 'TMP'))

    hFile = open(os.path.join(dataSetPath, '.processed'), 'w')
    hFile.close()


In [None]:
# Load Data

dsTrain     = torchvision.datasets.ImageFolder(os.path.join(DATA_FOLDER_PATH, DATA_SET_FOLDER_NAME, 'Train'), transform = torchvision.transforms.ToTensor())
dsVal       = torchvision.datasets.ImageFolder(os.path.join(DATA_FOLDER_PATH, DATA_SET_FOLDER_NAME, 'Validation'), transform = torchvision.transforms.ToTensor())
dsTest      = TestDataSet(os.path.join(DATA_FOLDER_PATH, DATA_SET_FOLDER_NAME, 'Test'), transform = torchvision.transforms.ToTensor()) #<! Does not return a label
lClass      = dsTrain.classes
numSamples  = len(dsTrain)

print(f'The data set number of samples (Train): {numSamples}')
print(f'The data set number of samples (Validation): {len(dsVal)}')
print(f'The data set number of samples (Test): {len(dsTest)}')
print(f'The unique values of the labels: {np.unique(lClass)}')

* <font color='brown'>(**#**)</font> The dataset is indexible (Subscriptable). It returns a tuple of the features and the label.
* <font color='brown'>(**#**)</font> While data is arranged as `H x W x C` the transformer, when accessing the data, will convert it into `C x H x W`. 

In [None]:
# Element of the Data Set

mX, valY = dsTrain[0]

print(f'The features shape: {mX.shape}')
print(f'The label value: {valY}')

### Plot the Data

In [None]:
# Plot Data

vIdx = np.random.choice(numSamples, size = 9)
hF, vHa = plt.subplots(nrows = 3, ncols = 3, figsize = (10, 10))
vHa = vHa.flat

for ii, hA in enumerate(vHa):
    hA.imshow(dsTrain[vIdx[ii]][0].permute((1, 2, 0)).numpy())
    hA.tick_params(axis = 'both', left = False, top = False, right = False, bottom = False, 
                   labelleft = False, labeltop = False, labelright = False, labelbottom = False)
    hA.grid(False)
    hA.set_title(f'Index = {vIdx[ii]}, Label = {L_CLASSES[dsTrain[vIdx[ii]][1]]}')

plt.show()

* <font color='red'>(**?**)</font> If data is converted into _grayscale_, how would it effect the performance of the classifier? Explain.  
  You may assume the conversion is done using the mean value of the RGB pixel.

## Pre Process Data

This section:

 * Normalizes the data in a predefined manner.
 * Takes a sub set of the data.

Since the model is "borrowed" by _Transfer Learning_ one must:

1. Use the statistics from the original training set.
1. Adapt the input dimensions to match the original training set.

* <font color='brown'>(**#**)</font> The values in training are specified in documentation.  
  As an example, see [`ResNet50` Weights](https://pytorch.org/vision/stable/models/generated/torchvision.models.resnet50.html#torchvision.models.ResNet50_Weights).

In [None]:
# The Standardization Parameters
# ImageNet statistics
vMean = np.array([0.485, 0.456, 0.406])
vStd  = np.array([0.229, 0.224, 0.225])

print('µ =', vMean)
print('σ =', vStd)

In [None]:
# Check Image Dimensions (Run Only Once)
# Verifies all images have the same size: 3 x 150 x 150.

# for ii in range(len(dsTrain)):
#     xx, yy = dsTrain[ii]
#     imgH = xx.shape[1]
#     imgW = xx.shape[2]
#     if ((imgH != 150) or (imgW != 150)):
#         print(f'The image {dsTrain.imgs[ii][0]} has incorrect size')

In [None]:
# Update Transforms
# Using v2 Transforms.
# Taking care of the different dimensions of some images.
# Matching the input size of ImageNet.
oDataTrnsTrain = TorchVisionTrns.Compose([
    TorchVisionTrns.ToImage(),
    TorchVisionTrns.ToDtype(torch.float32, scale = True),
    TorchVisionTrns.Resize(224),
    TorchVisionTrns.CenterCrop(224), #<! Ensures size is 150 (Pads if needed)
    TorchVisionTrns.RandomHorizontalFlip(p = 0.5),
    TorchVisionTrns.Normalize(mean = vMean, std = vStd),
])
oDataTrnsVal = TorchVisionTrns.Compose([
    TorchVisionTrns.ToImage(),
    TorchVisionTrns.ToDtype(torch.float32, scale = True),
    TorchVisionTrns.Resize(224),
    TorchVisionTrns.CenterCrop(224), #<! Ensures size is 150 (Pads if needed)
    TorchVisionTrns.Normalize(mean = vMean, std = vStd),
])

# Using V1
# oDataTrnsTrain = torchvision.transforms.Compose([
#     torchvision.transforms.Resize(224),
#     torchvision.transforms.CenterCrop(224),
#     torchvision.transforms.RandomHorizontalFlip(0.5),
#     torchvision.transforms.ToTensor(),
#     torchvision.transforms.Normalize(mean = vMean, std = vStd),
# ])

# oDataTrnsVal = torchvision.transforms.Compose([
#     torchvision.transforms.Resize(224),
#     torchvision.transforms.CenterCrop(224),
#     torchvision.transforms.ToTensor(),
#     torchvision.transforms.Normalize(mean = vMean, std = vStd),
# ])

# Update the DS transformer
dsTrain.transform   = oDataTrnsTrain
dsVal.transform     = oDataTrnsVal

* <font color='red'>(**?**)</font> What does `RandomHorizontalFlip` do? Why can it be used?

In [None]:
# "Normalized" Image

mX, valY = dsTrain[5]

hF, hA = plt.subplots()
hImg = hA.imshow(np.transpose(mX, (1, 2, 0)))
hF.colorbar(hImg)
plt.show()

* <font color='red'>(**?**)</font> How can one get the original image from `mX`?

### Data Loaders

This section defines the data loaded.



In [None]:
# Data Loader

dlTrain = torch.utils.data.DataLoader(dsTrain, shuffle = True, batch_size = 1 * batchSize, num_workers = numWorkers, drop_last = True, persistent_workers = True)
dlVal   = torch.utils.data.DataLoader(dsVal, shuffle = False, batch_size = 2 * batchSize, num_workers = numWorkers, persistent_workers = True)


* <font color='blue'>(**!**)</font> Plot the histogram of labels of the data. Is it balanced?

In [None]:
# Iterate on the Loader
# The first batch.
tX, vY = next(iter(dlTrain)) #<! PyTorch Tensors

print(f'The batch features dimensions: {tX.shape}')
print(f'The batch labels dimensions: {vY.shape}')

In [None]:
# Looping
for ii, (tX, vY) in zip(range(1), dlVal): #<! https://stackoverflow.com/questions/36106712
    print(f'The batch features dimensions: {tX.shape}')
    print(f'The batch labels dimensions: {vY.shape}')

## Load the Model

This section loads the model.  
The number of outputs is adjusted to match the number of classes in the data.

In [None]:
# Loading a Pre Defined Model
oModelPreDef = GenResNetModel(trainedModel = False, numCls = len(L_CLASSES))


* <font color='blue'>(**!**)</font> Go through `GenResNetModel()`'s code.

In [None]:
# Model Information - Pre Defined
# Pay attention to the layers name.
torchinfo.summary(oModelPreDef, tX.shape, col_names = ['kernel_size', 'output_size', 'num_params'], device = 'cpu', row_settings = ['depth', 'var_names'])

* <font color='red'>(**?**)</font> Which layer should be adapted?
* <font color='red'>(**?**)</font> Does the last (_Head_) dense layer includes a bias? Explain.

In [None]:
# Model Information - Pre Trained
# Pay attention to the layers name.
oModelPreTrn = GenResNetModel(trainedModel = True, numCls = len(L_CLASSES))

In [None]:
# Model Information
# Pay attention to the variable name
torchinfo.summary(oModelPreTrn, tX.shape, col_names = ['kernel_size', 'output_size', 'num_params'], device = 'cpu', row_settings = ['depth', 'var_names'])

## Train the Model

This section trains the model.  
It compares pre trained model with pre defined model using the same number of epochs.


### Transfer Learning Fine Tuning

The training of the model on the new data is often called _fine tuning_ (See [Fine Tuning vs. Transfer Learning vs. Learning from Scratch](https://stats.stackexchange.com/questions/343763) for a discussion on the semantic).  
The concept is training the new layers of the model with the new data while keeping most of the "knowledge" of the model from its original training.  
The balance is done by the adaptation of the learning per layer with the extreme of zero learning rate for some layers (Freezing).  
The most used combinations are:

 - Freeze Layers  
   Freeze (Zero learning rate) the pre trained layers by disabling the gradient (`requires_grad`).
 - Smaller Learning Rate  
   Set a smaller learning rate to the pre trained layers.
 - Small Learning  
   Use small learning rate to the whole process.

In some cases, the policy used is a combination of 2 (Freeze at the beginning / end, the release, etc..).

* <font color='brown'>(**#**)</font> Freezing is also a regularization as its assists in preventing _over fitting_.
* <font color='brown'>(**#**)</font> [PyTorch Optimizer - Per Parameter Learning Rate](https://pytorch.org/docs/stable/optim.html#per-parameter-options).
* <font color='brown'>(**#**)</font> See [Dive into Deep Learning - Computer Vision - Fine Tuning](http://d2l.ai/chapter_computer-vision/fine-tuning.html).
* <font color='brown'>(**#**)</font> Guide to Fine Tuning in PyTorch: [Part I](https://scribe.rip/8990194b71e), [Part II](https://scribe.rip/b0f8f447546b).
* <font color='brown'>(**#**)</font> [How to Freeze Model Weights in PyTorch for Transfer Learning: Step by Step Tutorial](https://scribe.rip/a533a58051ef).

In [None]:
# Freeze Layers
# Iterating over the net, see https://stackoverflow.com/questions/54203451

for paramName, oPrm in oModelPreTrn.named_parameters():
    if not ('fc' in paramName):
        oPrm.requires_grad = False

In [None]:
# Run Device

runDevice = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') #<! The 1st CUDA device


In [None]:
# Models

lModel = [('Pre Defined Model', oModelPreDef), ('Pre Trained Model', oModelPreTrn)]


In [None]:
# Loss and Score Function

hL = nn.CrossEntropyLoss()
hS = MulticlassAccuracy(num_classes = len(lClass), average = 'micro')
hL = hL.to(runDevice) #<! Not required!
hS = hS.to(runDevice)

* <font color='brown'>(**#**)</font> The averaging mode `macro` averages samples per class and average the result of each class.
* <font color='brown'>(**#**)</font> The averaging mode `micro` averages all samples.
* <font color='red'>(**?**)</font> Check results with `average = 'micro'`. Explain how `shuffle - False` in the validation data loader affects the results.

In [None]:
# Training Loop

dModelHist = {}

for ii, (modelName, oModel) in enumerate(lModel):
    print(f'Training with the {modelName} model')
    oModel = oModel.to(runDevice) #<! Transfer model to device
    oOpt = torch.optim.AdamW(oModel.parameters(), lr = 1e-4, betas = (0.9, 0.99), weight_decay = 2e-4) #<! Define optimizer
    oSch = torch.optim.lr_scheduler.OneCycleLR(oOpt, max_lr = 2e-2, total_steps = numEpochs * len(dlTrain))
    _, lTrainLoss, lTrainScore, lValLoss, lValScore, lLearnRate = TrainModelSch(oModel, dlTrain, dlVal, oOpt, oSch, numEpochs, hL, hS)
    dModelHist[modelName] = lTrainLoss, lTrainScore, lValLoss, lValScore, lLearnRate

* <font color='green'>(**@**)</font> Add _TensorBoard_ based monitoring. You should use the `TBLogger` class.
* <font color='red'>(**?**)</font> Compare memory consumption during the training of the models. How can it be utilized?

In [None]:
# Plot Training Phase

hF, vHa = plt.subplots(nrows = 1, ncols = 3, figsize = (18, 5))
vHa = np.ravel(vHa)

for modelKey in dModelHist:
    hA = vHa[0]
    hA.plot(dModelHist[modelKey][0], lw = 2, label = f'Train {modelKey}')
    hA.plot(dModelHist[modelKey][2], lw = 2, label = f'Validation {modelKey}')
    hA.set_title('Cross Entropy Loss')
    hA.set_xlabel('Epoch')
    hA.set_ylabel('Loss')
    hA.legend()

    hA = vHa[1]
    hA.plot(dModelHist[modelKey][1], lw = 2, label = f'Train {modelKey}')
    hA.plot(dModelHist[modelKey][3], lw = 2, label = f'Validation {modelKey}')
    hA.set_title('Accuracy Score')
    hA.set_xlabel('Epoch')
    hA.set_ylabel('Score')
    hA.legend()

    hA = vHa[2]
    hA.plot(lLearnRate, lw = 2, label = f'{modelKey}')
    hA.set_title('Learn Rate Scheduler')
    hA.set_xlabel('Iteration')
    hA.set_ylabel('Learn Rate')
    hA.legend()

* <font color='green'>(**@**)</font> Build the `Test` data loader (You may use `dsTest`) and exam the models on few samples.
* <font color='green'>(**@**)</font> Redo the training with a different model.