<a href="https://colab.research.google.com/github/ShengyuanWang/Brain-Tumor-Segementation-and-Classification/blob/main/transfer_learning_brain_tumor_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import os
import glob
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import layers, Sequential
from tensorflow.keras.utils import plot_model

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_recall_fscore_support
from sklearn.metrics import accuracy_score, top_k_accuracy_score, f1_score, matthews_corrcoef
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from scikitplot.metrics import plot_roc

In [None]:
class CFG:
    EPOCHS = 50
    BATCH_SIZE = 32
    SEED = 42
    TF_SEED = 768
    HEIGHT = 224
    WIDTH = 224
    CHANNELS = 3
    IMAGE_SIZE = (224, 224, 3)

<a id='1'></a>
# 1 | Dataset Exploration
<div style="padding: 4px;color:white;margin:10;font-size:200%;text-align:center;display:fill;border-radius:10px;overflow:hidden;background-image: url(https://i.postimg.cc/T1D2yGny/167.jpg); background-size: 100% auto;"></div>

In [None]:
# Define paths
DATASET_PATH = "/kaggle/input/brain-tumor-mri-dataset/Training/"

<a id='1.1'></a>
### Get image paths with glob

In [None]:
%%time
dataset_images = glob.glob(f"{DATASET_PATH}**/*.jpg")

<a id='1.2'></a>
### View the number of images present in the dataset

In [None]:
# Get dataset size
total = len(dataset_images)

# View samples counts
print(f'TOTAL: {total}')

<a id='1.3'></a>
### Create Pandas DataFrames for paths and labels

In [None]:
def generate_labels(image_paths):
    return [_.split('/')[-2:][0] for _ in image_paths]


def build_df(image_paths, labels):
    # Create dataframe
    df = pd.DataFrame({
        'image_path': image_paths,
        'label': generate_labels(labels)
    })

    # Shuffle and return df
    return df.sample(frac=1, random_state=CFG.SEED).reset_index(drop=True)

In [None]:
# Build the Dataset DataFrame
dataset_df = build_df(dataset_images, generate_labels(dataset_images))

In [None]:
# View first 5 samples in the dataset
dataset_df.head(5)

### Label Encode Image Labels



In [None]:
# Generate Label Encoder
label_encoder = LabelEncoder()

# Label Encode the Image Labels
dataset_df['label_encoded'] = label_encoder.fit_transform(dataset_df.label)

# View first 10 samples
dataset_df.head(10)

In [None]:
# Get class names and number of classes from label_encoder
num_classes = len(label_encoder.classes_)
class_names = label_encoder.classes_

print(f'Number of classes: {num_classes}')
print(f'Classes: {class_names}')

<a id='1.4'></a>
### Load & View Random Sample Image

In [None]:
def _load(image_path):
    # Read and decode an image file to a uint8 tensor
    image = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(image, channels=3)

    # Resize image
    image = tf.image.resize(image, [CFG.HEIGHT, CFG.WIDTH],
                            method=tf.image.ResizeMethod.LANCZOS3)

    # Convert image dtype to float32 and NORMALIZE!!!
    image = tf.cast(image, tf.float32)/255.

    # Return image
    return image

def view_sample(image, label, color_map='gray', fig_size=(8, 10)):
    plt.figure(figsize=fig_size)
    plt.imshow(tf.image.rgb_to_grayscale(image), cmap=color_map)
    plt.title(f'Label: {label}', fontsize=16)
    return

In [None]:
# Select random sample from train_df
idx = random.sample(dataset_df.index.to_list(), 1)[0]

# Load the random sample and label
sample_image, sample_label = _load(dataset_df.image_path[idx]), dataset_df.label[idx]

# View the random sample
view_sample(sample_image, sample_label, color_map='inferno')

<a id=1.5></a>
### View Multiple Randomly Selected Samples

In [None]:
def view_mulitiple_samples(df, sample_loader, count=10, color_map='gray', fig_size=(14, 10)):
    rows = count//5
    if count%5 > 0:
        rows +=1

    idx = random.sample(df.index.to_list(), count)
    fig = plt.figure(figsize=fig_size)

    for column, _ in enumerate(idx):
        plt.subplot(rows, 5, column+1)
        plt.title(f'Label: {df.label[_]}')
        plt.imshow(tf.image.rgb_to_grayscale(sample_loader(df.image_path[_])), cmap=color_map);

    return

view_mulitiple_samples(dataset_df, _load,
                       count=30, color_map='jet',
                       fig_size=(20, 24))

<a id='1.6'></a>
### View Train Labels Distribution

In [None]:
# Generate Figure
fig = plt.figure(figsize=(14, 10))

# Plot Labels Distribution
plt.title('Dataset Labels Distribution', fontsize=20)
distribution = dataset_df['label'].value_counts().sort_values()
sns.barplot(x=distribution.values,
            y=list(distribution.keys()),
            orient="h");

<a id=1.7></a>
### Discard Insufficient Sample Classes

In [None]:
# # Get insufficient labels
# insufficient_labels = list(distribution[distribution.values < 10].keys())

# # Get insufficient labels indices from df
# insufficient_indices = pd.concat([dataset_df[dataset_df.label == _] for _ in insufficient_labels]).index

# # Remove insufficient labels
# dataset_df = dataset_df.drop(insufficient_indices).reset_index(drop=True)

# Get insufficient labels
insufficient_labels = list(distribution[distribution.values < 10].keys())

# Get insufficient labels indices from df only if there are any insufficient labels
if insufficient_labels:
    insufficient_indices = pd.concat([dataset_df[dataset_df.label == _] for _ in insufficient_labels]).index

    # Remove insufficient labels
    dataset_df = dataset_df.drop(insufficient_indices).reset_index(drop=True)


In [None]:
dataset_df['label'].value_counts().sort_values()

### Re-label the encoded classes

In [None]:
# Generate Label Encoder
label_encoder = LabelEncoder()

# Label Encode the Image Labels
dataset_df['label_encoded'] = label_encoder.fit_transform(dataset_df.label)

# View first 10 samples
dataset_df.head(10)

In [None]:
# Get class names and number of classes from label_encoder
num_classes = len(label_encoder.classes_)
class_names = label_encoder.classes_

print(f'Number of classes: {num_classes}')
print(f'Classes: {class_names}')

<a id='2.1'></a>
### Create Train & Test Splits

In [None]:
# Create Train/Test split with Training Set
train_split_idx, val_test_split_idx, _, _ = train_test_split(dataset_df.index,
                                                        dataset_df.label_encoded,
                                                        test_size=0.4,
                                                        stratify=dataset_df.label_encoded,
                                                        random_state=CFG.SEED)

In [None]:
# Get training and validation data
train_df = dataset_df.iloc[train_split_idx].reset_index(drop=True)
val_test_df = dataset_df.iloc[val_test_split_idx].reset_index(drop=True)

# View shapes
train_df.shape, val_test_df.shape

### Create Validation & Test Splits

In [None]:
# Create Train/Test split with Training Set
val_split_idx, test_split_idx, _, _ = train_test_split(val_test_df.index,
                                                       val_test_df.label_encoded,
                                                       test_size=0.6,
                                                       stratify=val_test_df.label_encoded,
                                                       random_state=CFG.SEED)

In [None]:
# Get validation and test data
val_df = dataset_df.iloc[val_split_idx].reset_index(drop=True)
test_df = dataset_df.iloc[test_split_idx].reset_index(drop=True)

# View shapes
val_df.shape, test_df.shape

In [None]:
train_size = len(train_df)
val_size = len(val_df)
test_size = len(test_df)
total = train_size + val_size + test_size

# View the counts
print(f'train samples count:\t\t{train_size}')
print(f'validation samples count:\t{val_size}')
print(f'test samples count:\t\t{test_size}')
print('=======================================')
print(f'TOTAL:\t\t\t\t{total}')

<a id='2.2'></a>
### View New Train & Validation Labels Distribution

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(3, figsize=(16, 24))

# Set the spacing between subplots
fig.tight_layout(pad=6.0)

# Plot Train Labels Distribution
ax1.set_title('Train Labels Distribution', fontsize=20)
train_distribution = train_df['label'].value_counts().sort_values()
sns.barplot(x=train_distribution.values,
            y=list(train_distribution.keys()),
            orient="h",
            ax=ax1);

# Plot Validation Labels Distribution
ax2.set_title('Validation Labels Distribution', fontsize=20)
val_distribution = val_df['label'].value_counts().sort_values()
sns.barplot(x=val_distribution.values,
            y=list(val_distribution.keys()),
            orient="h",
            ax=ax2);

# Plot Test Labels Distribution
ax3.set_title('Test Labels Distribution', fontsize=20)
test_distribution = test_df['label'].value_counts().sort_values()
sns.barplot(x=test_distribution.values,
            y=list(test_distribution.keys()),
            orient="h",
            ax=ax3);

<a id='2.3'></a>
### Create an Image Data Augmentation Layer

In [None]:
# Build augmentation layer
augmentation_layer = Sequential([
    layers.RandomFlip(mode='horizontal_and_vertical', seed=CFG.TF_SEED),
    layers.RandomZoom(height_factor=(-0.1, 0.1), width_factor=(-0.1, 0.1), seed=CFG.TF_SEED),
], name='augmentation_layer')

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 10))

# Set the spacing between subplots
fig.tight_layout(pad=6.0)

# View Original Image
ax1.set_title('Original Image', fontsize=20)
ax1.imshow(tf.image.rgb_to_grayscale(sample_image), cmap='inferno');

# View Augmented Image
ax2.set_title('Augmented Image', fontsize=20)
ax2.imshow(tf.image.rgb_to_grayscale(augmentation_layer(sample_image)), cmap='inferno');

<a id='2.4'></a>
### Create Input Data Pipeline w. tf.data API

In [None]:
def encode_labels(labels, encode_depth=24):
    return tf.one_hot(labels, depth=encode_depth).numpy()

def create_pipeline(df, load_function, augment=False, batch_size=32, shuffle=False, cache=None, prefetch=False):
    '''
    Generates an input pipeline using the tf.data API given a Pandas DataFrame and image loading function.

    @params
        - df: (pd.DataFrame) -> DataFrame containing paths and labels
        - load_function: (function) -> function used to load images given their paths
        - augment: (bool) -> condition for applying augmentation
        - batch_size: (int) -> size for batched (default=32)
        - shuffle: (bool) -> condition for data shuffling, data is shuffled when True (default=False)
        - cache: (str) -> cache path for caching data, data is not cached when None (default=None)
        - prefetch: (bool) -> condition for prefeching data, data is prefetched when True (default=False)

    @returns
        - dataset: (tf.data.Dataset) -> dataset input pipeline used to train a TensorFlow model
    '''
    # Get image paths and labels from DataFrame
    image_paths = df.image_path
    image_labels = encode_labels(df.label_encoded)
    AUTOTUNE = tf.data.AUTOTUNE

    # Create dataset with raw data from DataFrame
    ds = tf.data.Dataset.from_tensor_slices((image_paths, image_labels))

    # Map augmentation layer and load function to dataset inputs if augment is True
    # Else map only the load function
    if augment:
        ds = ds.map(lambda x, y: (augmentation_layer(load_function(x)), y), num_parallel_calls=AUTOTUNE)
    else:
        ds = ds.map(lambda x, y: (load_function(x), y), num_parallel_calls=AUTOTUNE)

    # Apply shuffling based on condition
    if shuffle:
        ds = ds.shuffle(buffer_size=1000)

    # Apply batching
    ds = ds.batch(batch_size)

    # Apply caching based on condition
    # Note: Use cache in memory (cache='') if the data is small enough to fit in memory!!!
    if cache != None:
        ds = ds.cache(cache)

    # Apply prefetching based on condition
    # Note: This will result in memory trade-offs
    if prefetch:
        ds = ds.prefetch(buffer_size=AUTOTUNE)

    # Return the dataset
    return ds

In [None]:
# Generate Train Input Pipeline
train_ds = create_pipeline(train_df, _load, augment=True,
                           batch_size=CFG.BATCH_SIZE,
                           shuffle=False, prefetch=True)

# Generate Validation Input Pipeline
val_ds = create_pipeline(val_df, _load,
                         batch_size=CFG.BATCH_SIZE,
                         shuffle=False, prefetch=False)

# Generate Test Input Pipeline
test_ds = create_pipeline(test_df, _load,
                          batch_size=CFG.BATCH_SIZE,
                          shuffle=False, prefetch=False)

In [None]:
# View string representation of datasets
print('========================================')
print('Train Input Data Pipeline:\n\n', train_ds)
print('========================================')
print('Validation Input Data Pipeline:\n\n', val_ds)
print('========================================')
print('Test Input Data Pipeline:\n\n', test_ds)
print('========================================')

In [None]:
# Here's a function to get any model/preprocessor from tensorflow hub
def get_tfhub_model(model_link, model_name, model_trainable=False):
    return hub.KerasLayer(model_link,
                          trainable=model_trainable,
                          name=model_name)

<a id="3.1"></a>
### Get EfficientNet From TensorFlow Hub

In [None]:
# Get EfficientNet V2 B0 here
efficientnet_v2_url = 'https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/feature_vector/2'
model_name = 'efficientnet_v2_b0'

# Set trainable to False for inference-only
set_trainable=False

efficientnet_v2_b0 = get_tfhub_model(efficientnet_v2_url,
                                     model_name,
                                     model_trainable=set_trainable)

<a id="3.2"></a>
### Define EfficientNet Model

In [None]:
def efficientnet_v2_model():

    initializer = tf.keras.initializers.GlorotNormal(seed=CFG.SEED)

    efficientnet_v2_sequential = Sequential([
        layers.Input(shape=CFG.IMAGE_SIZE, dtype=tf.float32, name='input_image'),
        efficientnet_v2_b0,
        layers.Dropout(0.2),
        layers.Dense(512, activation='relu', kernel_initializer=initializer),
        layers.Dense(256, activation='relu', kernel_initializer=initializer),
        layers.Dense(24, dtype=tf.float32, activation='softmax', kernel_initializer=initializer)
    ], name='efficientnet_v2_sequential_model')

    return efficientnet_v2_sequential

In [None]:
# Generate Model
model_efficientnet_v2 = efficientnet_v2_model()

# Generate Summary of the Model
model_efficientnet_v2.summary()

In [None]:
# Explore model visually
plot_model(
    model_efficientnet_v2, dpi=60,
    show_shapes=True
)

In [None]:
def train_model(model, num_epochs, callbacks_list, tf_train_data,
                tf_valid_data=None, shuffling=False):
    '''
        Trains a TensorFlow model and returns a dict object containing the model metrics history data.

        @params
        - model: (tf.keras.model) -> model to be trained
        - num_epochs: (int) -> number of epochs to train the model
        - callbacks_list: (list) -> list containing callback fuctions for model
        - tf_train_data: (tf.data.Dataset) -> dataset for model to be train on
        - tf_valid_data: (tf.data.Dataset) -> dataset for model to be validated on (default=None)
        - shuffling: (bool) -> condition for data shuffling, data is shuffled when True (default=False)

        @returns
        - model_history: (dict) -> dictionary containing loss and metrics values tracked during training
    '''

    model_history = {}

    if tf_valid_data != None:
        model_history = model.fit(tf_train_data,
                                  epochs=num_epochs,
                                  validation_data=tf_valid_data,
                                  validation_steps=int(len(tf_valid_data)),
                                  callbacks=callbacks_list,
                                  shuffle=shuffling)

    if tf_valid_data == None:
        model_history = model.fit(tf_train_data,
                                  epochs=num_epochs,
                                  callbacks=callbacks_list,
                                  shuffle=shuffling)
    return model_history

In [None]:
# Define Early Stopping Callback
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True)

# Define Reduce Learning Rate Callback
reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    patience=2,
    factor=0.1,
    verbose=1)

# Define Callbacks and Metrics lists
CALLBACKS = [early_stopping_callback, reduce_lr_callback]
METRICS = ['accuracy']

In [None]:
tf.random.set_seed(CFG.SEED)

# Compile the model
model_efficientnet_v2.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=METRICS
)

# Train the model
print(f'Training {model_efficientnet_v2.name}.')
print(f'Train on {len(train_df)} samples, validate on {len(val_df)} samples.')
print('----------------------------------')

efficientnet_v2_history = train_model(
    model_efficientnet_v2, CFG.EPOCHS, CALLBACKS,
    train_ds, val_ds,
    shuffling=False
)

In [None]:
# Evaluate the model
efficientnet_v2_evaluation = model_efficientnet_v2.evaluate(test_ds)

In [None]:
# Generate model probabilities and associated predictions
efficientnet_v2_test_probabilities = model_efficientnet_v2.predict(test_ds, verbose=1)
efficientnet_v2_test_predictions = tf.argmax(efficientnet_v2_test_probabilities, axis=1)

In [None]:
!pip install -q vit-keras

In [None]:
!pip install tensorflow-addons

<a id='4.1'></a>
### Get Vision Transformer Model

In [None]:
from vit_keras import vit

# Download the model|
vit_model = vit.vit_b16(
        image_size=224,
        activation='softmax',
        pretrained=True,
        include_top=False,
        pretrained_top=False,
        classes=2)

# Freeze model layers for inference-mode only
for layer in vit_model.layers:
    layer.trainable = False

<a id='4.2'></a>
### Define Vision Transformer Model

In [None]:
def vit_b16_model():

    initializer = tf.keras.initializers.GlorotNormal(seed=CFG.SEED)

    vit_b16_sequential = Sequential([
        layers.Input(shape=CFG.IMAGE_SIZE, dtype=tf.float32, name='input_image'),
        vit_model,
        layers.Dropout(0.2),
        layers.Dense(512, activation='relu', kernel_initializer=initializer),
        layers.Dense(256, activation='relu', kernel_initializer=initializer),
        layers.Dense(24, dtype=tf.float32, activation='softmax', kernel_initializer=initializer)
    ], name='vit_b16_sequential_model')

    return vit_b16_sequential

In [None]:
# Generate Model
model_vit_b16 = vit_b16_model()

# Generate Summary of the Model
model_vit_b16.summary()

In [None]:
# Explore model visually
plot_model(
    model_vit_b16, dpi=60,
    show_shapes=True
)

<a id='4.3'></a>
### Train Vision Transformer Model

In [None]:
tf.random.set_seed(CFG.SEED)

# Compile the model
model_vit_b16.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=METRICS
)

# Train the model
print(f'Training {model_vit_b16.name}.')
print(f'Train on {len(train_df)} samples, validate on {len(val_df)} samples.')
print('----------------------------------')

vit_b16_history = train_model(
    model_vit_b16, CFG.EPOCHS, CALLBACKS,
    train_ds, val_ds,
    shuffling=False
)

In [None]:
# Evaluate the model
vit_b16_evaluation = model_vit_b16.evaluate(test_ds)

In [None]:
# Generate model probabilities and associated predictions
vit_b16_test_probabilities = model_vit_b16.predict(test_ds, verbose=1)
vit_b16_test_predictions = tf.argmax(vit_b16_test_probabilities, axis=1)

In [None]:
# Compute average probabilities
avg_probabilities = np.mean([
    efficientnet_v2_test_probabilities,
    vit_b16_test_probabilities], axis=0)

# Compute average ensemble predictions
avg_predictions = tf.argmax(avg_probabilities, axis=1)

In [None]:
# Define weights and list model probabilities
weights = [0.6, 0.4]
model_probabilities = [efficientnet_v2_test_probabilities, vit_b16_test_probabilities]

# Compute weighted average probabilities
weighted_avg_probabilities = sum([w * p for w, p in zip(weights, model_probabilities)])

# Compute weighted average ensemble predictions
weighted_avg_predictions = tf.argmax(weighted_avg_probabilities, axis=1)

In [None]:
# List model probabilities
model_probabilities = [efficientnet_v2_test_probabilities, vit_b16_test_probabilities]

# Compute geometric mean probabilities
geometric_mean_probabilities = np.power(np.multiply(model_probabilities[0],
                                                    model_probabilities[1]),
                                        1/len(model_probabilities))

# Compute weighted average ensemble predictions
geometric_mean_predictions = tf.argmax(geometric_mean_probabilities, axis=1)

<a id='6.1'></a>
### Plot Model Histories

In [None]:
def plot_training_curves(history):

    loss = np.array(history.history['loss'])
    val_loss = np.array(history.history['val_loss'])

    accuracy = np.array(history.history['accuracy'])
    val_accuracy = np.array(history.history['val_accuracy'])

    epochs = range(len(history.history['loss']))

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))

    # Plot loss
    ax1.plot(epochs, loss, label='training_loss', marker='o')
    ax1.plot(epochs, val_loss, label='val_loss', marker='o')

    ax1.fill_between(epochs, loss, val_loss, where=(loss > val_loss), color='C0', alpha=0.3, interpolate=True)
    ax1.fill_between(epochs, loss, val_loss, where=(loss < val_loss), color='C1', alpha=0.3, interpolate=True)

    ax1.set_title('Loss (Lower Means Better)', fontsize=16)
    ax1.set_xlabel('Epochs', fontsize=12)
    ax1.legend()

    # Plot accuracy
    ax2.plot(epochs, accuracy, label='training_accuracy', marker='o')
    ax2.plot(epochs, val_accuracy, label='val_accuracy', marker='o')

    ax2.fill_between(epochs, accuracy, val_accuracy, where=(accuracy > val_accuracy), color='C0', alpha=0.3, interpolate=True)
    ax2.fill_between(epochs, accuracy, val_accuracy, where=(accuracy < val_accuracy), color='C1', alpha=0.3, interpolate=True)

    ax2.set_title('Accuracy (Higher Means Better)', fontsize=16)
    ax2.set_xlabel('Epochs', fontsize=12)
    ax2.legend();

In [None]:
# Plot EfficientNet V2 B0 model training history
plot_training_curves(efficientnet_v2_history)

In [None]:
# Plot VIT B16 model training history
plot_training_curves(vit_b16_history)

In [None]:
def plot_confusion_matrix(y_true, y_pred, classes='auto', figsize=(10, 10), text_size=12):
    # Generate confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Set plot size
    plt.figure(figsize=figsize)

    # Create confusion matrix heatmap
    disp = sns.heatmap(
        cm, annot=True, cmap='Greens',
        annot_kws={"size": text_size}, fmt='g',
        linewidths=0.5, linecolor='black', clip_on=False,
        xticklabels=classes, yticklabels=classes)

    # Set title and axis labels
    disp.set_title('Confusion Matrix', fontsize=24)
    disp.set_xlabel('Predicted Label', fontsize=20)
    disp.set_ylabel('True Label', fontsize=20)
    plt.yticks(rotation=0)

    # Plot confusion matrix
    plt.show()

    return

### EfficientNet V2 B0 Confusion Matrix

In [None]:
plot_confusion_matrix(
    test_df.label_encoded,
    efficientnet_v2_test_predictions,
    figsize=(14, 10),
    classes=class_names)

### ViT-b16 Confusion Matrix

In [None]:
plot_confusion_matrix(
    test_df.label_encoded,
    vit_b16_test_predictions,
    figsize=(14, 10),
    classes=class_names)

### Average Ensemble Confusion Matrix

In [None]:
plot_confusion_matrix(
    test_df.label_encoded,
    avg_predictions,
    figsize=(14, 10),
    classes=class_names)

### Weighted Average Ensemble Confusion Matrix

In [None]:
plot_confusion_matrix(
    test_df.label_encoded,
    weighted_avg_predictions,
    figsize=(14, 10),
    classes=class_names)

### Geometric Mean Ensemble Confusion Matrix

In [None]:
plot_confusion_matrix(
    test_df.label_encoded,
    geometric_mean_predictions,
    figsize=(14, 10),
    classes=class_names)

<a id='6.3'></a>
### View Classification Reports

In [None]:
# EfficientNet V2 Report
print(classification_report(test_df.label_encoded,
                            efficientnet_v2_test_predictions,
                            target_names=class_names))

In [None]:
# ViT-b16 Report
print(classification_report(test_df.label_encoded,
                            vit_b16_test_predictions,
                            target_names=class_names))

In [None]:
# Average Ensemble Report
print(classification_report(test_df.label_encoded,
                            avg_predictions,
                            target_names=class_names))

In [None]:
# Weighted Average Ensemble Report
print(classification_report(test_df.label_encoded,
                            weighted_avg_predictions,
                            target_names=class_names))

In [None]:
# Geometric Mean Ensemble Report
print(classification_report(test_df.label_encoded,
                            geometric_mean_predictions,
                            target_names=class_names))

<a id='6.4'></a>
### Record Classification Metrics

In [None]:
y_probabilities = efficientnet_v2_test_probabilities[:, :4]

In [None]:
from sklearn.metrics import (accuracy_score, precision_recall_fscore_support,
                             matthews_corrcoef, top_k_accuracy_score)

def generate_preformance_scores(y_true, y_pred, y_probabilities):

    # Workaround: Use only the first 4 columns for now
    y_probabilities = y_probabilities[:, :4]

    model_accuracy = accuracy_score(y_true, y_pred)
    top_3_accuracy = top_k_accuracy_score(y_true, y_probabilities, k=3)
    model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true,
                                                                                 y_pred,
                                                                                 average="weighted")
    model_matthews_corrcoef = matthews_corrcoef(y_true, y_pred)


    print('=============================================')
    print(f'\nPerformance Metrics:\n')
    print('=============================================')
    print(f'accuracy_score:\t\t{model_accuracy:.4f}\n')
    print('_____________________________________________')
    print(f'top_3_accuracy_score:\t{top_3_accuracy:.4f}\n')
    print('_____________________________________________')
    print(f'precision_score:\t{model_precision:.4f}\n')
    print('_____________________________________________')
    print(f'recall_score:\t\t{model_recall:.4f}\n')
    print('_____________________________________________')
    print(f'f1_score:\t\t{model_f1:.4f}\n')
    print('_____________________________________________')
    print(f'matthews_corrcoef:\t{model_matthews_corrcoef:.4f}\n')
    print('=============================================')

    preformance_scores = {
        'accuracy_score': model_accuracy,
        'top_3_accuracy': top_3_accuracy,
        'precision_score': model_precision,
        'recall_score': model_recall,
        'f1_score': model_f1,
        'matthews_corrcoef': model_matthews_corrcoef
    }
    return preformance_scores


In [None]:
# Generate EfficieNet model performance scores
efficientnet_v2_performance = generate_preformance_scores(test_df.label_encoded,
                                                          efficientnet_v2_test_predictions,
                                                          efficientnet_v2_test_probabilities)


# efficientnet_v2_performance = generate_preformance_scores(y_true_one_hot, efficientnet_v2_test_predictions, efficientnet_v2_test_probabilities)

In [None]:
# Generate ViT model performance scores
vit_b16_performance = generate_preformance_scores(test_df.label_encoded,
                                                  vit_b16_test_predictions,
                                                  vit_b16_test_probabilities)

In [None]:
# Generate Average Ensemble performance scores
avg_ensemble_performance = generate_preformance_scores(test_df.label_encoded,
                                                       avg_predictions,
                                                       avg_probabilities)

In [None]:
# Generate Weighted Average Ensemble performance scores
weigthed_avg_ensemble_performance = generate_preformance_scores(test_df.label_encoded,
                                                                weighted_avg_predictions,
                                                                weighted_avg_probabilities)

In [None]:
# Generate Geometric Mean Ensemble performance scores
geometric_mean_ensemble_performance = generate_preformance_scores(test_df.label_encoded,
                                                                geometric_mean_predictions,
                                                                geometric_mean_probabilities)

In [None]:
# Record metrics with DataFrame
performance_df = pd.DataFrame({
    'model_efficientnet_v2': efficientnet_v2_performance,
    'model_vit_b16': vit_b16_performance,
    'average_ensemble': avg_ensemble_performance,
    'weigthed_average_ensemble': weigthed_avg_ensemble_performance,
    'geometric_mean_ensemble': geometric_mean_ensemble_performance
}).T

# View Performance DataFrame
performance_df

In [None]:
performance_df.plot(kind="bar", figsize=(10, 8)).legend(bbox_to_anchor=(1.0, 1.0))
plt.title('Performance Metrics', fontsize=20);

In [None]:
def compute_inference_time(model, ds, sample_count, inference_runs=5):
    total_inference_times = []
    inference_rates = []

    for _ in range(inference_runs):
        start = time.perf_counter()
        model.predict(ds, verbose=0)
        end = time.perf_counter()

        # Compute total inference time
        total_inference_time = end - start

        # Compute inference rate
        inference_rate = total_inference_time / sample_count

        total_inference_times.append(total_inference_time)
        inference_rates.append(inference_rate)

    # Calculate average total inference time with uncertainty
    avg_inference_time = sum(total_inference_times) / len(total_inference_times)
    avg_inference_time_uncertainty = (max(total_inference_times) - min(total_inference_times)) / 2

    # Calculate average inference rate with uncertainty
    avg_inference_rate = sum(inference_rates) / len(inference_rates)
    avg_inference_rate_uncertainty = (max(inference_rates) - min(inference_rates)) / 2

    print('====================================================')
    print(f'Model:\t\t{model.name}\n')
    print(f'Inference Time:\t{round(avg_inference_time, 6)}s \xB1 {round(avg_inference_time_uncertainty, 6)}s')
    print(f'Inference Rate:\t{round(avg_inference_rate, 6)}s/sample \xB1 {round(avg_inference_rate_uncertainty, 6)}s/sample')
    print('====================================================')

    return avg_inference_time, avg_inference_rate

In [None]:
efficientnet_v2_inference = compute_inference_time(model_efficientnet_v2, test_ds, len(test_df))

In [None]:
vit_b16_inference = compute_inference_time(model_vit_b16, test_ds, len(test_df))

In [None]:
# We assume all ensembles to have the total inference of all models
ensemble_inference = (
    efficientnet_v2_inference[0] + vit_b16_inference[0],
    efficientnet_v2_inference[1] + vit_b16_inference[1]
)

print('====================================================')
print(f'Model:\t\tAverage/Weighted Average Ensemble\n')
print(f'Inference Time:\t{ensemble_inference[0]:.6f}s')
print(f'Inference Rate:\t{ensemble_inference[1]:.6f}s/sample')
print('====================================================')

In [None]:
# Get MCC for each model
efficientnet_mcc = efficientnet_v2_performance["matthews_corrcoef"]
vit_mcc = vit_b16_performance["matthews_corrcoef"]
avg_mcc = avg_ensemble_performance['matthews_corrcoef']
weighted_mcc = weigthed_avg_ensemble_performance['matthews_corrcoef']
geometric_mcc = geometric_mean_ensemble_performance['matthews_corrcoef']

# Scatter plot inference rate against MCC
plt.figure(figsize=(12, 7))

# plt.scatter(cnn_inference[1], cnn_mcc, label=model_cnn.name)
plt.scatter(efficientnet_v2_inference[1], efficientnet_mcc, label=model_efficientnet_v2.name)
plt.scatter(vit_b16_inference[1], vit_mcc, label=model_vit_b16.name)
plt.scatter(ensemble_inference[1], avg_mcc, label='Average Ensemble')
plt.scatter(ensemble_inference[1], weighted_mcc, label='Weighted Average Ensemble')
plt.scatter(ensemble_inference[1], geometric_mcc, label='Geometric Mean Ensemble')

ideal_inference_rate = 0.0001 # Desired inference time (Exaggerated)
ideal_mcc = 1 # Max MCC

# Plot lines connecting each model coords. to the ideal model coords.
plt.scatter(ideal_inference_rate, ideal_mcc, label="Ideal Hypothetical Model", marker='s')
plt.plot([ideal_inference_rate, efficientnet_v2_inference[1]], [ideal_mcc, efficientnet_mcc], ':')
plt.plot([ideal_inference_rate, vit_b16_inference[1]], [ideal_mcc, vit_mcc], ':')
plt.plot([ideal_inference_rate, ensemble_inference[1]], [ideal_mcc, avg_mcc], ':')
plt.plot([ideal_inference_rate, ensemble_inference[1]], [ideal_mcc, weighted_mcc], ':')
plt.plot([ideal_inference_rate, ensemble_inference[1]], [ideal_mcc, geometric_mcc], ':')

plt.legend()
plt.title("Trade-Offs: Inference Rate vs. Matthews Correlation Coefficient", fontsize=20)
plt.xlabel("Inference Rate (s/sample)", fontsize=16)
plt.ylabel("Matthews Correlation Coefficient", fontsize=16);

In [None]:
def dist(x1, x2, y1, y2):
    return np.sqrt(np.square(x2 - x1) + np.square(y2 - y1))

# List model names
model_names = [
    model_efficientnet_v2.name,
    model_vit_b16.name,
    'Average Ensemble',
    'Weighted Average Ensemble',
    'Geometric Mean Ensemble'
]

# List model mcc scores
model_scores = [
    efficientnet_mcc,
    vit_mcc,
    avg_mcc,
    weighted_mcc,
    geometric_mcc
]

# List model inference rates
model_rates = [
    efficientnet_v2_inference[1],
    vit_b16_inference[1],
    ensemble_inference[1],
    ensemble_inference[1],
    ensemble_inference[1]
]

# Compute trade-off's
trade_offs = [dist(ideal_inference_rate, inference_rate, ideal_mcc, score)
              for inference_rate, score in zip(model_rates, model_scores)]

# View model trade-off scores
print('Trade-Off Score: Inference Rate vs. MCC')
for name, inference_rate, score, trade in zip(model_names, model_rates, model_scores, trade_offs):
    print('---------------------------------------------------------')
    print(f'Model: {name}\n\nInference Rate: {inference_rate:.5f} | MCC: {score:.4f} | Trade-Off: {trade:.4f}')

# View model with best trade-off score
print('=========================================================')
best_model_trade = min(trade_offs)
best_model_name = model_names[np.argmin(trade_offs)]
print(f'\nBest Optimal Model:\t{best_model_name}\nTrade-Off:\t\t{best_model_trade:.4f}\n')
print('=========================================================')