## Experiment CNN models
Use this to test different CNN models, augmentations, and hyperparameters.

In [None]:
import tensorflow as tf
import keras
import keras_cv

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

import os

In [None]:
# Set random variables
np.random.seed(0)

tf.random.set_seed(0)

In [None]:
# Import our data

TRAIN_DATADIR = "../Dataset/train_directory"
VAL_DATADIR = "../Dataset/val_directory"
TEST_DATADIR = "../Dataset/test_directory"
BATCH_SIZE = 128

train_ds = keras.utils.image_dataset_from_directory(
    TRAIN_DATADIR,
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=BATCH_SIZE,
    image_size=(224, 224),
    shuffle=True,
    seed = 0
)

val_ds = keras.utils.image_dataset_from_directory(
    VAL_DATADIR,
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=BATCH_SIZE,
    image_size=(224, 224),
    shuffle=False,
)

test_ds = keras.utils.image_dataset_from_directory(
    TEST_DATADIR,
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=1,
    image_size=(224, 224),
    shuffle=False,
)

In [None]:
# Featch url and labels to a datafram for

data = []

# Traverse each class directory
for class_dir in os.listdir(TRAIN_DATADIR):
    for img in os.listdir(os.path.join(TRAIN_DATADIR, class_dir)):
        data.append((os.path.join(TRAIN_DATADIR, class_dir, img), class_dir))

df = pd.DataFrame(data, columns=['filepath', 'label'])

df.head()

In [None]:
# Look at the class weights
class_labels = df['label'].unique()
label_to_index = {label: idx for idx, label in enumerate(df['label'].unique())}
weights = compute_class_weight(class_weight='balanced', classes=class_labels, y=df['label'])

class_weights = dict(zip(label_to_index.values(), weights))
class_weights

In [None]:
# Let's take a look at the data
batch = next(iter(train_ds.take(1)))
image_batch = batch[0]

keras_cv.visualization.plot_image_gallery(
    image_batch,
    rows=3,
    cols=3,
    value_range=(0, 255),
    show=True,
)

### Augmentations
- RandomFlip (horizontal and vertical)
- RandomCropAndResize
- Normalize pixel values to [0, 1]

The other augmentations are more experimental and can be skipped or added to test their effect.

In [None]:
# Randomly flip the image horizontally and vertically
import keras_cv.layers.preprocessing


random_flip = keras_cv.layers.RandomFlip(mode="horizontal_and_vertical")

# Randomly crop and resize the image
crop_and_resize = keras_cv.layers.RandomCropAndResize(
    target_size=(224, 224),
    crop_area_factor=(0.8, 1.0),
    aspect_ratio_factor=(0.9, 1.1)
)

# Apply some random augmentations
rand_augment = keras_cv.layers.RandAugment(
    augmentations_per_image=3,
    value_range=(0, 1),
    magnitude=0.5,
    magnitude_stddev=0.2,
    rate=1.0
)

# Cut parts of the image and paste them on other images
cut_mix = keras_cv.layers.preprocessing.CutMix()

# Mix two images together
mix_up = keras_cv.layers.preprocessing.MixUp()

# Randomly choose between CutMix and MixUp
cut_mix_or_mix_up = keras_cv.layers.RandomChoice([cut_mix, mix_up], batchwise=True)

# Define the augmentation function
def augmenter_train(images, labels):
    images = tf.cast(images, tf.float32) / 255.0
    images = random_flip(images, training=True)
    images = crop_and_resize(images, training=True)
    #inputs = rand_augment(inputs, training=True)
    #inputs = cut_mix_or_mix_up(inputs)
    
    return images, labels

def augmenter_val(images, labels):
    images = tf.cast(images, tf.float32) / 255.0

    return images, labels

In [None]:
train_ds = train_ds.map(augmenter_train, num_parallel_calls=tf.data.AUTOTUNE).prefetch(buffer_size=tf.data.AUTOTUNE)

val_ds = val_ds.map(augmenter_val, num_parallel_calls=tf.data.AUTOTUNE).prefetch(buffer_size=tf.data.AUTOTUNE)

test_ds = test_ds.map(augmenter_val, num_parallel_calls=tf.data.AUTOTUNE)



In [None]:
image_batch = next(iter(train_ds.take(1)))[0]

keras_cv.visualization.plot_image_gallery(
    image_batch,
    rows=3,
    cols=3,
    value_range=(0, 1),
    show=True,
)

Super-Convergence: Very Fast Training of Neural Networks Using Large Learning Rates: https://arxiv.org/abs/1708.07120 <br>
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features: https://arxiv.org/abs/1905.04899 <br>
mixup: Beyond Empirical Risk Minimization: https://arxiv.org/abs/1710.09412 <br>
https://github.com/ageron/handson-ml3/blob/main/11_training_deep_neural_networks.ipynb 


In [None]:
import math

class ExponentialLearningRate(tf.keras.callbacks.Callback):
    def __init__(self, factor):
        self.factor = factor
        self.rates = []
        self.losses = []

    def on_epoch_begin(self, epoch, logs=None):
        self.sum_of_epoch_losses = 0

    def on_batch_end(self, batch, logs=None):
        mean_epoch_loss = logs["loss"]
        new_sum_of_epoch_losses = mean_epoch_loss * (batch + 1)
        batch_loss = new_sum_of_epoch_losses - self.sum_of_epoch_losses
        self.sum_of_epoch_losses = new_sum_of_epoch_losses
        self.rates.append(self.model.optimizer.learning_rate.numpy())
        self.losses.append(batch_loss)
        new_lr = self.model.optimizer.learning_rate * self.factor
        self.model.optimizer.learning_rate.assign(new_lr)
        
def find_learning_rate(model, dataset, epochs=1, min_rate=1e-4, max_rate=1):
    init_weights = model.get_weights()
    num_samples = tf.data.experimental.cardinality(dataset).numpy()
    iterations = math.ceil(num_samples / epochs)
    factor = (max_rate / min_rate) ** (1 / iterations)
    init_lr = model.optimizer.learning_rate.numpy()
    model.optimizer.learning_rate.assign(min_rate)
    exp_lr = ExponentialLearningRate(factor)
    history = model.fit(dataset, epochs=epochs, callbacks=[exp_lr])
    model.optimizer.learning_rate.assign(init_lr)
    model.set_weights(init_weights)
    return exp_lr.rates, exp_lr.losses

def plot_lr_vs_loss(rates, losses):
    plt.plot(rates, losses, "b")
    plt.gca().set_xscale('log')
    max_loss = losses[0] + min(losses)
    plt.hlines(min(losses), min(rates), max(rates), color="k")
    plt.axis([min(rates), max(rates), 0, max_loss])
    plt.xlabel("Learning rate")
    plt.ylabel("Loss")
    plt.grid()

In [None]:
class OneCycleScheduler(tf.keras.callbacks.Callback):
    def __init__(self, iterations, max_lr=1e-3, start_lr=None,
                 last_iterations=None, last_lr=None):
        self.iterations = iterations
        self.max_lr = max_lr
        self.start_lr = start_lr or max_lr / 10
        self.last_iterations = last_iterations or iterations // 10 + 1
        self.half_iteration = (iterations - self.last_iterations) // 2
        self.last_lr = last_lr or self.start_lr / 1000
        self.iteration = 0

    def _interpolate(self, iter1, iter2, lr1, lr2):
        return (lr2 - lr1) * (self.iteration - iter1) / (iter2 - iter1) + lr1

    def on_batch_begin(self, batch, logs):
        if self.iteration < self.half_iteration:
            lr = self._interpolate(0, self.half_iteration, self.start_lr,
                                   self.max_lr)
        elif self.iteration < 2 * self.half_iteration:
            lr = self._interpolate(self.half_iteration, 2 * self.half_iteration,
                                   self.max_lr, self.start_lr)
        else:
            lr = self._interpolate(2 * self.half_iteration, self.iterations,
                                   self.start_lr, self.last_lr)
        self.iteration += 1
        self.model.optimizer.learning_rate.assign(lr)

## ResNet-34 CNN


In [None]:
from functools import partial
from keras.saving import register_keras_serializable

DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, strides=1,
                        padding="same", kernel_initializer="he_normal",
                        use_bias=False)


@register_keras_serializable(package='Custom', name='ResidualUnit')
class ResidualUnit(tf.keras.layers.Layer):
    def __init__(self, filters, strides=1, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.strides = strides
        self.filters = filters
        self.activation = tf.keras.activations.get(activation)
        self.main_layers = [
            DefaultConv2D(filters, strides=strides),
            tf.keras.layers.BatchNormalization(),
            self.activation,
            DefaultConv2D(filters),
            tf.keras.layers.BatchNormalization()
        ]
        self.skip_layers = []
        if strides > 1:
            self.skip_layers = [
                DefaultConv2D(filters, kernel_size=1, strides=strides),
                tf.keras.layers.BatchNormalization()
            ]
    def build(self, input_shape):
        super().build(input_shape)

    def call(self, inputs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)
        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
        return self.activation(Z + skip_Z)
    
    def get_config(self):
        config = super().get_config()
        config.update({
                "filters": self.filters,
                "strides": self.strides,
                "activation": tf.keras.activations.serialize(self.activation)
            })
        return config
    
    @classmethod
    def from_config(cls, config):
        return cls(**config)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(224, 224, 3)),
    DefaultConv2D(64, kernel_size=7, strides=2),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation("relu"),
    tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding="same"),
])
prev_filters = 64
for filters in [64] * 2 + [128] * 2 + [256] * 2 + [512] * 2:
    strides = 1 if filters == prev_filters else 2
    model.add(ResidualUnit(filters, strides=strides))
    prev_filters = filters

model.add(tf.keras.layers.GlobalAvgPool2D())
model.add(tf.keras.layers.Dense(11, activation="softmax"))

loss = keras.losses.CategoricalFocalCrossentropy(weights)

model.compile(
    loss=loss,
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum = 0.9, weight_decay=5e-4),
    metrics=["accuracy"],
)

model.summary()

In [None]:
# Find the optimal learning rate
rates, losses = find_learning_rate(model, train_ds, epochs=1)
plot_lr_vs_loss(rates, losses)

In [None]:
model.compile(
    loss=keras.losses.CategoricalFocalCrossentropy(weights),
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.001, momentum = 0.9, weight_decay=5e-4),
    metrics=["accuracy"],
)

EPOCHS = 75
# Set up callbacks
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
import datetime

log_dir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

onecycle = OneCycleScheduler(math.ceil(len(train_ds)) * EPOCHS, max_lr=1e-3)


# Callbacks definition
callbacks = [
    EarlyStopping(monitor='val_loss', patience=20, verbose=1),
    ModelCheckpoint('../models/best_model_18.keras', monitor='val_accuracy', save_best_only=True, verbose=1),
    TensorBoard(log_dir=log_dir, histogram_freq=1),
    onecycle
]


history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS, callbacks=callbacks)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

def plot_learning_curves(history, start_epoch=1):
    """
    Plot training and validation loss and accuracy curves.

    Parameters:
    - history: Training history (output from the model's fit method).
    - start_epoch: Epoch from which to start plotting. Default is 5 (i.e., plot from epoch 6 onwards).
    """   

# Convert the history.history dict to a pandas DataFrame
    df = pd.DataFrame(history.history)

    # Plot the curves from the specified epoch onwards
    df = df.iloc[start_epoch-1:]

    # Set the style of seaborn for better visualization
    sns.set(rc={'axes.facecolor': '#f0f0fc'}, style='darkgrid')

    # Plotting the learning curves
    plt.figure(figsize=(15,6))

    # Plotting the training and validation loss
    plt.subplot(1, 2, 1)
    sns.lineplot(x=df.index, y=df['loss'], color='royalblue', label='Train Loss')
    sns.lineplot(x=df.index, y=df['val_loss'], color='orangered', linestyle='--', label='Validation Loss')
    plt.title('Loss Evolution')

    # Plotting the training and validation accuracy
    plt.subplot(1, 2, 2)
    sns.lineplot(x=df.index, y=df['accuracy'], color='royalblue', label='Train Accuracy')
    sns.lineplot(x=df.index, y=df['val_accuracy'], color='orangered', linestyle='--', label='Validation Accuracy')
    plt.title('Accuracy Evolution')

    plt.show()

In [None]:
plot_learning_curves(history, start_epoch=1)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
from matplotlib.colors import LinearSegmentedColormap
def evaluate_model_performance(model, test_ds, class_labels):
    """
    Evaluate the model's performance on the validation set and print the classification report.

    Parameters:
    - model: The trained model.
    - val_generator: Validation data generator.
    - class_labels: List of class names.
    
    Returns:
    - report: Classification report as a string.
    """
    
    # Initialize a list to hold all labels
    true_labels = []

    # Iterate over the dataset
    for images, labels in test_ds:
        true_labels.append(np.argmax(labels.numpy()))  # Extract labels and convert to NumPy arrays
 
    #  To get the predicted labels, we predict using the model  
    predictions = model.predict(test_ds, steps=len(test_ds))
    
    # Take the argmax to get the predicted class indices.
    predicted_labels = np.argmax(predictions, axis=1)
    print(predicted_labels)

    # Classification report
    report = classification_report(true_labels, predicted_labels, target_names=class_labels)
    print(report)
    print('\n')
    
    # Define a custom colormap
    colors = ["white", "royalblue"]
    cmap_cm = LinearSegmentedColormap.from_list("cmap_cm", colors)

    # Confusion Matrix
    cm = confusion_matrix(true_labels, predicted_labels)

    # Plotting confusion matrix using seaborn
    plt.figure(figsize=(8,6))
    sns.heatmap(cm, annot=True, cmap=cmap_cm, fmt='d', xticklabels=class_labels, yticklabels=class_labels)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.show()


In [None]:
# Impoer model
model = keras.models.load_model('../models/best_model_18.keras')

In [None]:
labels = ['battery', 'biological', 'brown-glass', 'cardboard', 'green-glass', 'metal', 'paper', 'plastic', 'textile','vegetation', 'white-glass']

In [None]:
evaluate_model_performance(model, test_ds, labels)

In [None]:
# Test model

def test_model(model, test_ds, labels):
    test_loss, test_accuracy = model.evaluate(test_ds)
    print(f"Test accuracy: {test_accuracy * 100:.2f}%")

    # Get the true labels and predicted labels
    y_true = []
    y_pred = []

    for image_batch, label_batch in test_ds:
        y_true.extend(label_batch.numpy())
        y_pred.extend(model.predict(image_batch).argmax(axis=1))

    # Compute the confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Display the confusion matrix using ConfusionMatrixDisplay
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
    disp.plot(cmap='Blues', xticks_rotation='vertical')
    plt.show()

    # Display the classification report
    print(classification_report(y_true, y_pred, target_names=labels))


In [None]:
test_model(model, test_ds, df['label'].unique())
