In [None]:
# Choose what to run

Colab = True    # set to True if running this notebook in Colab
BigRam = True   # set to True if you have plenty of RAM and want to keep all models in memory
                # WARNING: some of these models are very big

BoW = False     # set to True if you want to run Bag of Words
BoWFT = False    # set to True if you want to Fine-Tune the Bag of Words

CNN = True      # set to True if you want to run Convolutional Neural Network
CNNFT = False    # set to True if you want to Fine-Tune the Convolutional Neural Network

In [None]:
# specify the local directory to saved models and image data are found

if not Colab:
    LocalDir = 'C:/Users/Filippo/Documents/Docs/Uni/AI/First Year/Second Semester/Supervised Learning/Exam/'

# 😡 Python stuff

In [None]:
# Colab imports
if Colab:
    !pip install -q optuna
    from google.colab import drive, files

import numpy as np                  # numpy because numpy is life
from PIL import Image               # Image to read images
import os                           # os to handle directories and files
import matplotlib.pyplot as plt     # matplotlib for cool graphs
import matplotlib.patheffects as pe
import pandas as pd                 # pandas for dataset management
import cv2                          # cv2 for image transformations
from tqdm import tqdm               # tqdm for cool progress bars
import random                       # random to generate random values
import pickle                       # pickle for saving various data objects
import time                         # time to keep track of computational times

# torch for neural network stuff
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchsummary import summary

# sklearn for clustering stuff
from sklearn.feature_extraction import image as skimage
from sklearn.cluster import KMeans, MiniBatchKMeans
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier, VotingClassifier
from scipy.special import softmax

# optuna for hyperparameter tuning
import optuna

In [None]:
# Check whether GPU is available

train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
    print('CUDA:\tNOT available\nGPU:\tdeactivated\n\t(⌐■_■)\n\t( •_•)>⌐■-■\n\t(•_•)')
else:
    print('CUDA:\tavailable\nGPU:\tactivated\n\t(•_•)\n\t( •_•)>⌐■-■\n\t(⌐■_■)')
device = torch.device('cuda:0' if train_on_gpu else 'cpu')

In [None]:
# google Colab stuff

if Colab:
    # mount drive
    drive.mount('/content/gdrive')

    # set you Google Drive path
    gpath = '/content/gdrive/MyDrive/AI4S&T/Supervised Learning/SUP - Exam/'

    # download TinyImageNet.zip
    !gdown '1BoE0v0e9gdWFXDLl4Y7en7J4qtt9vC-h'
    # unzip dataset
    !unzip -n -q '/content/TinyImageNet.zip'
    # set Directory
    LocalDir = '/content/'

    # create useful folders
    !mkdir saved
    !mkdir saved/classifiers
    !mkdir saved/neuralnets
    !mkdir saved/studies
else:
    # create useful folders
    if not os.path.isdir(f'{LocalDir}saved'):
        os.makedirs(f'{LocalDir}saved')
        os.makedirs(f'{LocalDir}saved/classifiers')
        os.makedirs(f'{LocalDir}saved/neuralnets')
        os.makedirs(f'{LocalDir}saved/studies')

# 🗃️ Dataset

In [None]:
# function to display images in a grid

def show_images(images, labels = True):
    if labels:
        imgs = [x[0] for x in images]
        labels = [x[1] for x in images]
    else:
        imgs = images

    n_imgs = len(imgs)
    r = max(int(np.sqrt(n_imgs)), 2)
    c = max(int(np.ceil(n_imgs/r)), 2)

    fig, axs = plt.subplots(ncols = c, nrows = r, figsize = (10, 10), gridspec_kw = {'wspace':0, 'hspace':0})
    i = 0
    for row in range(r):
        for col in range(c):
            if i < n_imgs:
                axs[row, col].imshow(imgs[i])
                if labels:
                    axs[row, col].set_title(str(labels[i]), loc = 'center', y = 0.05, fontsize = 15, color = 'white',
                        path_effects = [pe.withStroke(linewidth = 3, foreground = "black")])
                axs[row, col].axis('off')
            else:
                fig.delaxes(axs[row][col])
            i += 1
    plt.show()

In [None]:
#########################################################################################
# Custom Dataset
#########################################################################################

class DataSet(Dataset):
    def __init__(self, data_dir, dataframe, transforms = None):
        self.data_dir = data_dir            # data directory where all images are stored
        self.transforms = transforms        # transforms to apply to each image

        self.data = dataframe.to_numpy()   # save pandas dataframe as numpy array

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        image_name, label = self.data[index]
        image_path = os.path.join(self.data_dir, image_name).replace('\\','/')

        # load image and converts it to RGB (if not already)
        image = Image.open(image_path).convert('RGB')

        if self.transforms != None:
            image = self.transforms(image)

        return image, label

In [None]:
#########################################################################################
# Load Data
#########################################################################################

# Directory containing all images
DataDir = LocalDir + 'TinyImageNet/'

# Classes to predict
NumClasses = 100
Classes = np.array([x for x in range(NumClasses)])

# DEBUG ONLY: Images to use for faster testing
PercentageDataToUse = 1

# Augment class 56
Augment = False


# Split Train data into Train + Validation
DF_train_val = pd.read_csv(DataDir + 'train.txt', sep = ' ', header = None, names = ['image_name', 'label'])

n_images = DF_train_val.shape[0]
train_size = int(0.8 * n_images)
val_size = n_images - train_size

# Shuffle data around
DF_train_val = DF_train_val.sample(frac = 1, random_state = None)

# Get data for training and for validation
data_train = DF_train_val[ : train_size]
data_val = DF_train_val[-val_size : ]

# Get data for testing
data_test = pd.read_csv(DataDir + 'val.txt', sep = ' ', header = None, names = ['image_name', 'label'])

# Print number of Train/Val/Test images
print(f'Train:\t{len(data_train)} images')
print(f'Val:\t{len(data_val)} images')
print(f'Test:\t{len(data_test)} images')

In [None]:
# Visualize some Images from the training set
n_images = 16
random_indices = random.sample(range(len(data_train)), n_images)
image_to_display = DataSet(DataDir, data_train.iloc[random_indices])
show_images(image_to_display, labels = True)

## 🧬 Data Augmentation

In [None]:
#########################################################################################
# Data Augmentation
#########################################################################################

# after doing some tests, we found out that class 56
# was performing much much worse compared to the others (<0.5 accuracy)
# so we can try to augment the images of this class and see what happens


# BEFORE:   0.5 ACC
if Augment:
  # select elements that have label 56
  toAug = data_train.loc[data_train['label'] == 56]
  NumOld = len(toAug)
  print(f'before:\t{NumOld} images with label 56')

  # multiplication factor for number of label 56 images
  AugMultiplier = 2
  TotalNewImages = int((AugMultiplier+1)*len(toAug))
  ToCreate = TotalNewImages - len(toAug)

  # check if modified data hasn't already been added to DataFrame
  if len(data_train)<80001:
      for i in tqdm(range(ToCreate)):
          image_name, label = toAug.iloc[i%len(toAug)]

          image_path = os.path.join(DataDir, image_name).replace('\\','/')

          new_row = pd.DataFrame([{'image_name': f'{os.path.splitext(image_name)[0]}_MOD{i//NumOld}.jpg',
                                  'label': 56}])

          # load image
          image = np.array(Image.open(image_path).convert('RGB'))

          # flip image randomly
          image = cv2.flip(image, random.choice([-1, 0, 1]))

          # brigthen or darken image
          brightmask = np.ones(image. shape, dtype = 'uint8') * 30
          if random.random() > 0.5:
              image = cv2.add(image, brightmask)
          else:
              image = cv2.subtract(image, brightmask)

          # rotate image
          h, w = image.shape[:2]
          rot_mat = cv2.getRotationMatrix2D((w/2,h/2),45,1)
          image = cv2.warpAffine(image, rot_mat, (w,h))

          # add new image to DataFrame
          data_train = pd.concat([data_train, new_row], axis = 0, ignore_index=True)

          # save modified image
          image = Image.fromarray(image)
          image.save(f'{os.path.splitext(image_path)[0]}_MOD{i//NumOld}.jpg')

  toAug = data_train.loc[data_train["label"] == 56]
  print()
  print(f'after:\t{len(toAug)} images with label 53\n')

  print(f'Train:\t{len(data_train)} images')
  print(f'Val:\t{len(data_test)} images')
  print(f'Test:\t{len(data_val)} images')
  # AFTER:

# 👜 Bag of Words

## 🗂️ Data

In [None]:
#########################################################################################
# Data Preparation for Bag of Words
#########################################################################################

# Images will be resized to this value
ImageSize = 64

In [None]:
# Transform to apply to data
transform = transforms.Compose([
    transforms.Resize((ImageSize, ImageSize)),
])

# Datasets initialization
data_to_use = int(PercentageDataToUse * len(data_train))
TrainDataSet = DataSet(DataDir, data_train[:data_to_use], transform)

data_to_use = int(PercentageDataToUse * len(data_test))
TestDataSet = DataSet(DataDir, data_test[:data_to_use], transform)

# Print Train/Val/Test sizes
NumTraining = len(TrainDataSet)
print(f'Train:\t{NumTraining} images')

NumTesting = len(TestDataSet)
print(f'Test:\t{NumTesting} images')

## 🎯 Keypoints Extraction

In [None]:
# SIFT initialization

if BoW:
    sift = cv2.SIFT_create()
    orb = cv2.ORB_create()  # fast but terrible, don't use it :)
    Extractor = sift

In [None]:
# Extract keypoints and descriptors from each training image

if BoW:
    # if descriptors are already saved, load them
    if os.path.isfile(f'{LocalDir}/saved/descriptors'):
        print('Descriptors detected, loading ...')
        with open(f'{LocalDir}saved/descriptors', 'rb') as f:
            descriptors = pickle.load(f)
    else:
        # save predictors for each image
        descriptors = [[] for _ in range(NumTraining)]
        keypoints = [[] for _ in range(NumTraining)]

        print(f'Extracting keypoints ...')
        for i, img in enumerate(tqdm(TrainDataSet)):
            # load image for sift input
            img = cv2.cvtColor(np.array(TrainDataSet[i][0]), cv2.COLOR_BGR2GRAY)

            # extract keypoints and descriptors
            kp, des = Extractor.detectAndCompute(img, mask = None)

            # update descriptors lists
            if des is not None:
                descriptors[i] = des.astype(np.double)
                keypoints[i] = kp

        # save descriptors to local directory
        with open(f'{LocalDir}saved/descriptors', 'wb') as f:
            pickle.dump(descriptors, f)

    # flatten descriptors (used for training k-means)
    descriptors_flat = [descriptor for image_descriptors in descriptors for descriptor in image_descriptors]
    print(f'\nKeypoints:\t{len(descriptors_flat)} total\n\t\t{len(descriptors_flat)/NumTraining:.2f} average')

In [None]:
# # Visualize keypoints from random images

if BoW:
    n_images = 4
    random_indices = random.sample(range(NumTraining), n_images)
    img_key = [cv2.drawKeypoints(cv2.cvtColor(np.array(TrainDataSet[i][0]), cv2.COLOR_BGR2GRAY),
        keypoints[i], cv2.cvtColor(np.array(TrainDataSet[i][0]), cv2.COLOR_BGR2GRAY)) for i in random_indices]
    show_images(img_key, labels = False)

## 📊 Words Clustering

In [None]:
# Preprocess the descriptors applying a Standard scalar

if BoW:
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()

    # Apply the scaling to data
    descriptors_flat = scaler.fit_transform(np.array(descriptors_flat))

In [None]:
# MiniBatchesKmeans initialization

if BoW:
    # Set dictionary size (== number of words == number of clusters)
    NWords = 3000

    # Mini Batches k-Means is just k-Means but much much much much much faster
    # (and a tidbit, a smidgen less accurate)
    MBkMeans = MiniBatchKMeans(n_clusters = NWords, batch_size = 1024, n_init = 'auto', random_state = 0)

In [None]:
# Cluster the descriptors using k-means

if BoW:
    # if model is already saved, load it
    if os.path.isfile(f'{LocalDir}saved/classifiers/MBkMeans{NWords}.pkl'):
        print('Model detected, loading ...')
        with open(f'{LocalDir}saved/classifiers/MBkMeans{NWords}.pkl', 'rb') as f:
            MBkMeans = pickle.load(f)

    # otherwise train model and save it
    else:
        MBkMeans.fit(descriptors_flat)
        with open(f'{LocalDir}saved/classifiers/MBkMeans{NWords}.pkl', 'wb') as f:
            pickle.dump(MBkMeans, f)

    # free some RAM
    if not BoWFT: del descriptors_flat

##  🎰 Classifier
Train the classifier on training images with associated labels by feeding it the histogram computed by MBkMeans

In [None]:
#########################################################################################
# Words clustering
#########################################################################################

if BoW:
    # save histogram and labels for each image
    train_histograms = [[] for _ in range(NumTraining)]
    train_labels = [-1 for _ in range(NumTraining)]

    # compute BoW histogram for each image
    print(f'Clustering descriptors ...')
    for i in tqdm(range(NumTraining)):
        # load descriptors for current image
        if not isinstance(descriptors[i], list):
          des = scaler.transform(descriptors[i])
        else:
          des = []

        # save label for current image
        train_labels[i] = TrainDataSet[i][1]

        # compute histogram of words by clustering descriptors
        hist = np.zeros(NWords)
        if len(des) != 0:
            indices = MBkMeans.predict(des)
            counts = np.bincount(indices)
            hist[:len(counts)] += counts

        # normalize histogram and save it
        train_histograms[i] = hist/max(1, len(des))

    print(f'Done clustering.')

In [None]:
# list of Classifiers
Classifiers = [
    KNeighborsClassifier(n_neighbors = 10),
    SGDClassifier(loss = 'log_loss', penalty = 'l1'),
    SVC(kernel = 'poly', degree = 2, cache_size = 512),
    tree.DecisionTreeClassifier(),
    RandomForestClassifier(n_estimators = 300),
    AdaBoostClassifier(n_estimators=500),
    BaggingClassifier(),
    SVC()
    ]

In [None]:
#########################################################################################
# Classifier Training
#########################################################################################

if BoW:
    # train Classifiers to predict labels of images by their BoW histogram
    print(f'\nTraining classifiers ...')
    for _, c in enumerate(Classifiers):
        ti = time.time()
        print()

        # if model is already saved, skip
        if os.path.isfile(f'{LocalDir}saved/classifiers/{str(c)[:3]}{NWords}.pkl'):
            print(f'{str(c)[:3]} alredy trained ...')

        # otherwise train model and save it
        else:
            print(f'{str(c)[:3]}: training ...')
            c.fit(train_histograms, train_labels)
            with open(f'{LocalDir}saved/classifiers/{str(c)[:3]}{NWords}.pkl', 'wb') as f:
                pickle.dump(c, f)

        # delete classifier after saving it to reduce RAM usage
        del c

        elapsed = time.time() - ti
        print(f'\t... done ({int(elapsed)}s)')

In [None]:
#########################################################################################
# Classifier Testing
#########################################################################################
if BoW:
    # save predicted labels by each classifier
    pred_labels_c = [[] for _ in range(len(Classifiers))]

    # save histogram and label for each image
    test_histograms = [[] for _ in range(NumTesting)]
    test_labels = [-1 for _ in range(NumTesting)]

    # compute BoW histogram for each image
    print(f'Clustering descriptors ...')
    for i in tqdm(range(NumTesting)):
        # load image for sift input
        img = cv2.cvtColor(np.array(TestDataSet[i][0]), cv2.COLOR_BGR2GRAY)

        # save label for current image
        test_labels[i] = TestDataSet[i][1]

        # extract keypoints and descriptors
        _, des = Extractor.detectAndCompute(img, mask = None)

        # compute histogram of words by clustering descriptors
        hist = np.zeros(NWords)
        if des is not None:
            indices = MBkMeans.predict(scaler.transform(des.astype(np.double)))
            counts = np.bincount(indices)
            hist[:len(counts)] += counts

        test_histograms[i] = hist

    # predict labels of images by their BoW histogram
    # DYNAMIC: load model from file, use it, and delete it afterwards.
    print(f'\nTesting classifiers ...')
    for i, c in enumerate(Classifiers):
        ti = time.time()
        print(f'\n{str(c)[:3]}: testing ...')

        # load classifier
        with open(f'{LocalDir}saved/classifiers/{str(c)[:3]}{NWords}.pkl', 'rb') as f:
            c = pickle.load(f)

        # predict image label from its BoW histogram
        pred_labels_c[i] = c.predict(test_histograms)

        # delete model to reduce RAM usage
        if not BigRam: del c

        elapsed = time.time() - ti
        print(f'\t... done ({int(elapsed)}s)')

In [None]:
if BoW:
    from sklearn.metrics import accuracy_score
    print(accuracy_score(test_labels, pred_labels_c[0]))
    # se vai giu' nell'evaluation c'e' sta roba

# 🧠 Neural Network

In [None]:
if Colab and CNN:
    !gdown '1er2hFe6mhaAbK8tCoA--kUZNOxB7IznM'
    # if downloaded from here, MOVE IT TO THE CORRECT FOLDER (please)
    # saved/neuralnets/

In [None]:
# choose whether to fine-tune the network from scratch
# or to load a pre-trained one (need to specify path)

# Models:| efficientnet_b0 | mobilenet_v3_large | efficientnet_v2_s |
#        |    resnet50     |     vgg16_bn        ## don't do these (too big) ##

ConvNet_Name = 'efficientnet_b0'

LoadOrTrain = 'Load'
ModelsDir = f'{LocalDir}saved/neuralnets/'

## 🆘 Helper functions

In [None]:
# compute True Positives, False Positives, and False Negatives

def ConfusionMatrix(ground_truth, predictions, num_classes):
    # softmax predictions to have probability distribution (score)
    predictions = softmax(predictions)

    # pick the class with highest score as actual prediction
    pred_score, pred_class = np.max(predictions), np.argmax(predictions)

    # initialize True Positives, False Positives, and False Negatives lists
    TP, FP, FN = np.zeros(num_classes), np.zeros(num_classes), np.zeros(num_classes)

    # Confusion Matrix
    if pred_class == ground_truth:
        TP[pred_class] += 1
    else:
        FP[pred_class] += 1
        FN[ground_truth] += 1

    return TP, FP, FN

In [None]:
# attach new Configurable Classifier

def ConfigurableClassifier(ConvNet, params):#, PreTrained = True):
    in_features = params['in']
    l1 = params['l1']
    l2 = params['l2']
    dropout = params['dout']

    # load new Classifier
    ConvNet.classifier = nn.Sequential(
                nn.Linear(in_features, l1),
                nn.ReLU(),
                nn.Dropout(dropout),
                nn.Linear(l1, l2),
                nn.ReLU(),
                nn.Dropout(dropout),
                nn.Linear(l2, 100))

In [None]:
# load pre-trained model

# since torch doesn't provide a clear way to do this, we need to do it manually
# it is stupid, we know.
def LoadPretrained(ModelName):
    #
    p = [0,     # Model
         0,     # Image Size
         0,     # Crop Size
         0]     # Batch Size

    if ModelName == 'efficientnet_b0':
        p = [torchvision.models.efficientnet_b0(weights = 'IMAGENET1K_V1'),256,224,64]
    elif ModelName == 'mobilenet_v3_large':
        p = [torchvision.models.mobilenet_v3_large(weights = 'IMAGENET1K_V1'),232,224,64]
    elif ModelName == 'efficientnet_v2_s':
        p = [torchvision.models.efficientnet_v2_s(weights = 'IMAGENET1K_V1'),384,384,24]
    elif ModelName == 'resnet50':
        p = [torchvision.models.resnet50(weights = 'IMAGENET1K_V1'),256,224,32]
    elif ModelName == 'vgg16_bn':
        p = [torchvision.models.vgg16_bn(weights = 'IMAGENET1K_V1'),256,224,32]
    return p[0], p[1], p[2], p[3]

In [None]:
# since different models have different architectures, we need to manually change
# the classifier
# very stupid, we know.

def LoadClassifier(ConvNet, ModelName, verbose = False):
    if verbose: print(f'before:\n{ConvNet.classifier}\n\n***********************\n')
    i = 1   # index of first linear layer (manually look it up)
    if ModelName   == 'mobilenet_v3_large': i = 0
    in_f = ConvNet.classifier[i].in_features

    if   ModelName == 'efficientnet_b0':    params = {'in': in_f,'l1': 512, 'l2': 256,'dout':0.1}
    elif ModelName == 'mobilenet_v3_large': params = {'in': in_f,'l1': 512, 'l2': 256,'dout':0.1}
    elif ModelName == 'resnet50':           params = {'in': in_f,'l1': 1024,'l2': 512,'dout':0.3}
    elif ModelName == 'vgg16_bn':           params = {'in': in_f,'l1': 2048,'l2': 512,'dout':0.3}
    elif ModelName == 'efficientnet_v2_s':  params = {'in': in_f,'l1': 512, 'l2': 256,'dout':0.3}

    ConfigurableClassifier(ConvNet, params)
    if verbose: print(f'after:\n{ConvNet.classifier}')

In [None]:
# update network to "LOCK" parameters for non-classifier layers

def LockParameters(ConvNet):
    for key, value in dict(ConvNet.named_children()).items():
        if 'classifier' or 'fc' in key:
            for param in value.parameters():
                param.requires_grad = True
        else:
            for param in value.parameters():
                param.requires_grad = False

## 🤓 Definition

In [None]:
# Load the Entire Model

def LoadModel(ModelName, Load = False, verbose = False):
        # Load Model
        ConvNet, ImageSize, CropSize, BatchSize = LoadPretrained(ModelName)

        # Change Classifier
        LoadClassifier(ConvNet, ModelName, verbose = verbose)

        # Lock Parameters
        LockParameters(ConvNet)

        if Load:
            print(f'Using {ModelsDir}{ModelName}.pth')
            ConvNet.load_state_dict(torch.load(f'{ModelsDir}{ModelName}.pth'))

        # Send to GPU
        ConvNet = ConvNet.to(device)

        if verbose: print(f'\nUsing {ModelName}:\n\tImageSize: {CropSize}x{CropSize}\n\tBatchSize: {BatchSize}')

        return ConvNet, ImageSize, CropSize, BatchSize

In [None]:
# Model Initialization

if CNN:
    ConvNet, ImageSize, CropSize, BatchSize = LoadModel(ConvNet_Name, Load = False, verbose = True)

## 🗂️ Data

In [None]:
# Prepare dataset for network training

def PrepareData(DataDir, ImageSize, CropSize, BatchSize):
    # Images will be resized to this value
    ImageSize = ImageSize
    # BatchSize for network training
    BatchSize = BatchSize
    # CropSize for network training
    CropSize = CropSize
    # Validation batch size multiplier
    ValMultiplier = 1 if not BigRam else 4

    # Transform to apply to data
    transform = transforms.Compose([
        transforms.Resize((ImageSize, ImageSize)),
        transforms.CenterCrop((CropSize, CropSize)),
        transforms.ToTensor()
        ,transforms.Normalize(
        mean = [0.485, 0.456, 0.406],
        std =  [0.229, 0.224, 0.225])
    ])

    # Datasets initialization
    DataToUse = int(PercentageDataToUse * len(data_train))
    TrainDataSet = DataSet(DataDir, data_train[:DataToUse], transform)

    DataToUse = int(PercentageDataToUse * len(data_val))
    ValDataSet = DataSet(DataDir, data_val[:DataToUse], transform)

    DataToUse = int(PercentageDataToUse * len(data_test))
    TestDataSet = DataSet(DataDir, data_test[:DataToUse], transform)


    # Data Loaders for Neural Network batch input
    TrainLoader = DataLoader(TrainDataSet, batch_size = BatchSize,                 shuffle = True,  num_workers = 0)
    ValLoader   = DataLoader(ValDataSet,   batch_size = BatchSize * ValMultiplier, shuffle = True,  num_workers = 0)
    TestLoader  = DataLoader(TestDataSet,  batch_size = 1,                         shuffle = False, num_workers = 0)

    return TrainLoader, ValLoader, TestLoader

In [None]:
#########################################################################################
# Data Preparation for Neural Network
#########################################################################################

if CNN:
    # Images will be resized to this value
    ImageSize = ImageSize
    # BatchSize for network training
    BatchSize = BatchSize
    # CropSize for network training
    CropSize = CropSize
    # Validation batch size multiplier
    ValMultiplier = 1 if not BigRam else 4

    # Get Data Loaders
    TrainLoader, ValLoader, TestLoader = PrepareData(DataDir, ImageSize, CropSize, BatchSize)

    # Print Train/Val/Test sizes
    NumTraining = len(TrainLoader.dataset)
    print(f'Train:\t{NumTraining} images\n\t{len(TrainLoader)} batches')

    NumValidation = len(ValLoader.dataset)
    print(f'Val:\t{NumValidation} images\n\t{len(ValLoader)} batches')

    NumTesting = len(TestLoader.dataset)
    print(f'Test:\t{NumTesting} images\n\t{len(TestLoader)} batches')

## 🏋🏼 Training and Validation

In [None]:
#########################################################################################
# Network Training
#########################################################################################
def TrainAndValidateModel(ConvNet, ModelName, TrainLoader, params, verbose = True):# , NEpochs, patience, criterion, optimizer, scheduler):
    ConvNet = ConvNet.to(device)
    NEpochs = params['NEpochs']
    patience = params['patience']
    max_patience = patience
    criterion = params['criterion']
    optimizer = params['optimizer']
    optimizer.param_groups[0]['lr'] = params['learningrate']
    scheduler = params['scheduler']

    # if loss doesn't get better after this many epochs, stop training
    max_patience = 3
    patience = patience

    # save loss after each epoch
    train_losses = []
    val_losses = []

    # save labels of every image
    val_gts = []
    val_preds = []

    # save the model if model has lowest loss so far
    best_loss = np.inf

    if verbose: print(f'Training {ModelName} ...\n')
    for epoch in range(NEpochs):
        if patience == 0:
            if verbose: print('lost patience, killing the network.')
            break

        running_loss = 0.0
        epoch_loss = 0.0
        t10 = time.time()

        ##################################
        # TRAINING                       #
        ##################################
        ConvNet.train()
        if verbose: print(f'EPOCH {epoch+1}------------------------------------------')
        if verbose: print('Training:')
        tTrain = time.time()
        for i, data in enumerate(tqdm(TrainLoader, disable = not verbose)):
            # get the inputs
            inputs, labels = data
            inputs = inputs.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            logits = ConvNet(inputs)
            loss = criterion(logits.cuda(), labels.cuda())
            loss.backward()

            optimizer.step()
            scheduler.step()

            epoch_loss += loss.item()
            running_loss += loss.item()

        elapsed = int(time.time() - tTrain)
        if verbose: print(f'LOSS: {epoch_loss/i:.5f}')
        train_losses.append(epoch_loss/i)


        running_loss = 0.0
        tValid = time.time()

        ##################################
        # VALIDATION                     #
        ##################################
        ConvNet.eval()
        if verbose: print(f'Validation:')
        for i, data in enumerate(tqdm(ValLoader, disable = not verbose)):
            # get the inputs
            inputs, labels = data
            val_gts.append(labels)

            inputs = inputs.to(device)
            labels = labels.to(device)

            # predict labels
            with torch.no_grad():
                logits = ConvNet(inputs)

            # compute validation loss
            loss = criterion(logits.cuda(), labels.cuda())

            running_loss += loss.item()

        elapsed = int(time.time() - tValid)
        valLoss = running_loss / len(ValLoader)
        if verbose: print(f'LOSS: {valLoss:.5f}', end = '')
        val_losses.append(running_loss/len(ValLoader))


        # if latest model has best loss, save it
        if valLoss < best_loss:
            torch.save(ConvNet.state_dict(), f'{ModelsDir}{ModelName}.pth')
            best_loss = valLoss
            patience = max_patience
            if verbose: print(f'\t*SAVED*')
        else:
            if verbose: print()
            patience -= 1

        if verbose: print('\n')

    if verbose: print('Finished Training.')
    return train_losses, val_losses, best_loss

In [None]:
if CNN:
    # Train model
    if LoadOrTrain == 'Train':
        NEpochs = 10
        optimizer = optim.Adam(ConvNet.parameters())
        params = {
                'NEpochs': 10,                                  # Training Epochs
                'patience': 3,                                  # Patience (max number of epochs with no improvement)
                'criterion': nn.CrossEntropyLoss(),             # loss criterion
                'learningrate': 0.001,                          # learning rate
                'optimizer': optimizer,                         # backpropagation optimizer
                'scheduler': CosineAnnealingLR(optimizer,       # learning rate scheduler
                                  T_max = len(TrainLoader)*(NEpochs//2),
                                  eta_min = 1e-5)
                }
        torch.cuda.empty_cache()
        train_losses, val_losses, best_loss = TrainAndValidateModel(ConvNet, ConvNet_Name, TrainLoader, params, verbose = True)
        print(f'Done training after {len(val_losses)} epochs.\n Best val LOSS: {best_loss}')

    # Load model
    else:
        ConvNet.load_state_dict(torch.load(f'{ModelsDir}{ConvNet_Name}.pth'))

In [None]:
# plot Training and Validation losses
if CNN and LoadOrTrain == 'Train':
    plt.figure()
    plt.plot(train_losses, label = 'Train')
    plt.plot(val_losses, label = 'Val')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'{ConvNet_Name}')
    plt.legend()
    plt.show()

## 🔬 Testing

In [None]:
if CNN:
    # Models:| efficientnet_b0 | mobilenet_v3_large |
    #        |    resnet50     |     vgg16_bn       | efficientnet_v2_s  |   # don't do these (too big)

    ConvNet_Name = 'efficientnet_b0'    # MAKE SURE TO HAVE MODEL IN FOLDER

    ConvNet, _, _, _ = LoadModel(ConvNet_Name, Load = True, verbose = True)

In [None]:
#########################################################################################
# Network Testing
#########################################################################################

def TestModel(ConvNet, ModelName, TestLoader):
    ConvNet = ConvNet.to(device)
    ConvNet.eval()

    # save labels of every image
    test_gts = []
    test_preds = []

    # keep track of True Positives, False Positives, and False Negatives
    TP, FP, FN = np.zeros(NumClasses), np.zeros(NumClasses), np.zeros(NumClasses)

    print(f'Testing {ModelName} ...\n')
    for i, data in enumerate(tqdm(TestLoader)):
        # get the inputs
        input, label = data
        test_gts.append(label.item())

        input = input.to(device)

        # predict labels
        with torch.no_grad():
            outputs = ConvNet(input)
            test_preds.append(torch.argmax(outputs))

        # calculate ConfusionMatrix to update True Positives, False Positives, and False Negatives
        iTP, iFP, iFN = ConfusionMatrix(label.item(), outputs.cpu().numpy(), NumClasses)
        TP += iTP
        FP += iFP
        FN += iFN

    return TP, FP, FN, test_gts, test_preds

In [None]:
# Test the model
# extract True Positives, False Positives, False Negatives,
# test ground truths, test predictions

if CNN:
    TP, FP, FN, test_gts, test_preds = TestModel(ConvNet, ConvNet_Name, TestLoader)

# 🔍 Evaluation

## 📐 Evaluation Metrics

In [None]:
# mean Average Accuracy
# Compute average accuracy separetly for each class, then average over classes

def EvaluatemAA(TP, num_true):
    precisions = []
    for c in range(1, len(TP)):
        precisions.append(EvaluateAccuracy(TP[c], num_true))
    return np.mean(precisions)

In [None]:
# Accuracy
# Accuracy = Correct / Total

def EvaluateAccuracy(TP, total):
    return np.sum(TP)/(total)

## 👜 Bag of Words

In [None]:
# compute accuracy of each Classifier

if BoW:
    accuracies = []
    for i, c in enumerate(Classifiers):
        nailed = sum(pr == gt for pr, gt in zip(pred_labels_c[i], test_labels))
        accuracies.append(nailed / NumTesting)
        print(f'Accuracy {str(c)[:3]}:\t{accuracies[i]:.5f}')

## 🧠 Neural Network

In [None]:
# compute Accuracy and mean Average Preicision of the Neural Net

if CNN:
    # count occurrences of each label in dataset
    num_true = np.zeros(NumClasses)
    for c in Classes:
        num_true[c] = np.count_nonzero(np.array(test_gts) == c)

    # Accuracy
    accuracy = np.sum(TP) / NumTesting

    # mean Average Accuracy
    mAA = EvaluatemAA(TP, num_true)

    # accuracy per class
    accuracies = []
    for c in Classes:
        accuracy_c = EvaluateAccuracy(TP[c], num_true[c])
        accuracies.append(accuracy_c)

    # plot accuracy
    plt.figure(figsize = (12,6))

    # accuracy per class
    plt.bar(np.arange(NumClasses), accuracies, color = 'tab:blue')

    # mean average accuracy
    plt.axhline(y = mAA, color = 'black',
                linestyle = '-', linewidth = 3
                # , label = f'mAA: {mAA:.5f}'
                )
    plt.annotate(f'mAA {mAA:.3f}', (0-4.2, mAA+0.04), size = 10,
                bbox = dict(facecolor='white', edgecolor='black'))


    # lowest accuracy class
    xmin = np.argmin(TP)
    ymin = accuracies[xmin]
    plt.bar(xmin, ymin, color = 'tab:olive')
    plt.annotate(f'Class {xmin}\nAccuracy {ymin:.3f}',
                 (xmin+1.5, ymin-0.078), size = 10,
                 bbox = dict(facecolor='white', edgecolor='tab:olive'))

    # highes accuracy class
    xmax = np.argmax(TP)
    ymax = accuracies[xmax]
    plt.bar(xmax, ymax, color = 'tab:orange')
    plt.annotate(f'Class {xmax}\nAccuracy {ymax:.3f}',
                 (xmax+1.5, ymax-0.078), size = 10,
                 bbox = dict(facecolor='white', edgecolor='tab:orange'))

    plt.ylim([0,1.1])
    plt.xlabel('Class')
    plt.ylabel('Accuracy')
    # plt.legend(loc = 'upper left')
    plt.title(f'{ConvNet_Name}')
    plt.xticks(np.arange(100, step = 10))
    plt.show()

# 📈 Hyperparameters Tuning

## 👜 Bag of Words

In [None]:
if BoWFT:
    NWordsList = [x for x in range(400, 5000, 200)]
    MBKMeansList = []
    inertiaList = []
    for n in NWordsList:
        ti = time.time()
        temp_kmeans = MiniBatchKMeans(n_clusters = n, batch_size = 4* n, n_init = 'auto', random_state = 0).fit(descriptors_flat)
        MBKMeansList.append(temp_kmeans)
        inertiaList.append(temp_kmeans.inertia_)
        print(f'Words: {n}\t{time.time()-ti}')

In [None]:
plt.figure()
plt.scatter(NWordsList,inertiaList)
plt.plot(NWordsList, inertiaList)
plt.xticks(NWordsList, rotation = 90)
plt.title('Inertia vs number of clusters')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia [a.u.]')
plt.show()

## 🧠 Neural Network
- https://towardsdatascience.com/hyperparameter-tuning-of-neural-networks-with-optuna-and-pytorch-22e179efc837

In [None]:
ConvNet_Name = 'efficientnet_b0'

In [None]:
# define an objective function:
#       - load a model with certain parameters,
#               train and validate it
#       - try to minimize validation loss

def objective(trial):
        # Load Pre-Trained model
        model, _, _, _ = LoadModel(ConvNet_Name, Load = False, verbose = False)

        # Modify Classifier
        in_features = model.classifier[0].in_features
        classifier_params = {
                'in':   in_features,
                'l1':   trial.suggest_int('l1', low = 512, high = 896, step = 128),
                'l2':   trial.suggest_int('l2', low = 128, high = 512, step = 128),
                'dout': trial.suggest_float('dout', low = 0.0, high = 1.0, step = 0.2)
                }

        ConfigurableClassifier(model, classifier_params)

        optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'RMSprop', 'SGD'])
        lr = trial.suggest_loguniform('lr', 1e-4, 1e-1)
        optimizer = getattr(optim, optimizer_name)(model.parameters(), lr = lr)

        # Train and Validate Network
        train_params = {
                        'NEpochs': trial.suggest_int('NEpochs', 5, 10)
                        'patience': 2,
                        'criterion': nn.CrossEntropyLoss(),
                        'learningrate': lr,
                        'optimizer': optimizer,
                        'scheduler': CosineAnnealingLR(optimizer,
                                        T_max = len(TrainLoader)*(5//2),
                                        eta_min = 1e-5)
                        }
        _, _, best_loss = TrainAndValidateModel(model, ConvNet_Name, TrainLoader, train_params, verbose = False)

        with open(f'{LocalDir}/saved/study.pkl', 'wb') as f:
            pickle.dump(study, f'{LocalDir}saved/study{best_loss}.pkl')
        with open(f'{LocalDir}/saved/study.pkl', 'wb') as f:
            pickle.dump(study, f'{LocalDir}saved/study{best_loss}.pkl')
        # return Validation loss to minimize
        return best_loss

In [None]:
# Optuna hyper-parameters tuning

if CNNFT:
    # study initialization
    if os.path.isfile(f'{LocalDir}/saved/studies/study.pkl'):
        print('Study detected, loading ...')
        with open(f'{LocalDir}/saved/studies/study.pkl', 'rb') as f:
            study = pickle.load(f)

    _, ImageSize, CropSize, BatchSize = LoadModel(ConvNet_Name, Load = False, verbose = False)
    TrainLoader, ValLoader, TestLoader = PrepareData(DataDir, ImageSize, CropSize, 8)

    study = optuna.create_study(study_name = f'ConvNetSTUDY', direction = "minimize", sampler = optuna.samplers.TPESampler())

    # study execution 🐌.......🐌.................🐌.......................................🐌
    study.optimize(objective, n_trials = 30,
                    n_jobs = -1, show_progress_bar = True)

    # save study
    with open(f'{LocalDir}/saved/studies/study{study.best_trial.value}.pkl', 'wb') as f:
            pickle.dump(study, f'{LocalDir}/saved/studies/study{study.best_trial.value}.pkl')

In [None]:
# extract best hyperparameters

if CNNFT:
    best_trial = study.best_trial

    for key, value in best_trial.params.items():
        print("{}: {}".format(key, value))

In [None]:
if CNNFT:
    fig = optuna.visualization.plot_intermediate_values(study)
    fig.show()

In [None]:
if CNNFT:
    fig = optuna.visualization.plot_optimization_history(study)
    fig.show()

In [None]:
if CNNFT:
    fig = optuna.visualization.plot_param_importances(study)
    fig.show()