## Import the necessary libraries


In [1]:
import os
import random

import keras_cv
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras import layers as tfkl
from PIL import ImageFont
from sklearn.utils import compute_class_weight
from tensorflow import keras as tfk


print(tf.__version__)
print(tf.test.is_built_with_cuda())
print(tf.config.list_physical_devices("GPU"))

FOLDER_PATH = "dataset"
TRAIN_PATH = "train"
TEST_PATH = "test"

2023-12-06 11:13:30.539370: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-06 11:13:30.561175: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-06 11:13:30.648705: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-06 11:13:30.648725: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-06 11:13:30.649383: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

Using TensorFlow backend
2.14.0
True
[]


2023-12-06 11:13:34.663455: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-06 11:13:34.718073: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


### Set seed for reproducibility


In [None]:
seed = 42

random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

## Load data


In [None]:
VALIDATION_SPLIT = 0.2
image_size = (96, 96)
input_shape = (96, 96, 3)
batch_size = 64

training_set, validation_set = tf.keras.preprocessing.image_dataset_from_directory(
    FOLDER_PATH + "/" + TRAIN_PATH,
    labels="inferred",
    label_mode="categorical",
    image_size=image_size,
    color_mode="rgb",
    batch_size=batch_size,
    validation_split=VALIDATION_SPLIT,
    subset="both",  # validation and training,
    shuffle=True,
    seed=seed,
)
test_set = tf.keras.preprocessing.image_dataset_from_directory(
    FOLDER_PATH + "/" + TEST_PATH,
    labels="inferred",
    label_mode="categorical",
    image_size=image_size,
    batch_size=batch_size,
    seed=seed,
)

### CutMix + MixUP augmentation


In [None]:
# We used this two complex type of data augmentation to reduce overfitting on provided data
# https://keras.io/guides/keras_cv/cut_mix_mix_up_and_rand_augment/ for more information


def to_dict(img, label):
    # First line also scales to [0,1]
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.cast(img, tf.float32)
    return {"images": img, "labels": label}


def prep_for_model(inputs):
    images, labels = inputs["images"], inputs["labels"]
    images = tf.cast(images, tf.float32)
    return images, labels


# N:B keras_cv expect target to be one hot encoded
def cutmix_or_mixup(samples):
    # half of the images are augmented with cutmix and remaining with mixup
    if tf.random.uniform(()) > 0.5:
        samples = keras_cv.layers.CutMix()(samples)
    else:
        samples = keras_cv.layers.MixUp()(samples)
    return samples


training_set = training_set.map(to_dict).map(cutmix_or_mixup).map(prep_for_model)

### Display sample augmented images

In [None]:
# Display an example of augmented image

image_batch, label_batch = next(iter(training_set))
print("Expected result is a mix:")
print(label_batch[2])

plt.figure(figsize=(15, 15))
first_image = image_batch[2]
ax = plt.subplot(3, 3, 1)
plt.imshow(first_image / 255)
plt.show()

### Calculate class weights


In [None]:
# Since the classes have different cardinalities we calculate the classes weights to improve training


def calculate_weight(y_train: np.array) -> dict:
    unique = np.unique(y_train, return_counts=False)
    # 'balanced' mode adjusts weights inversely proportional to class frequencies
    class_weights = compute_class_weight(
        class_weight="balanced", classes=unique, y=y_train
    )
    return dict(enumerate(class_weights.flatten()))


datagen = tf.keras.preprocessing.image.ImageDataGenerator()
data = datagen.flow_from_directory(FOLDER_PATH + "/" + TRAIN_PATH)
class_weights = calculate_weight(data.classes)
print(class_weights)

## Transfer Learning


### Define some Hyperparameters

In [None]:
TL_LEARNING_RATE = 1e-3  # Learning rate
TL_EPOCHS = 200  # Max epochs of training
TL_ES_PATIENCE = 30  # Early stopping patience

### Define the basic data augmentation layers


In [None]:
# We define three layers of random augmentation that will be placed before the base keras model
data_augmentation = tf.keras.Sequential(
    [
        tfkl.RandomFlip("horizontal_and_vertical"),
        tfkl.RandomRotation(factor=0.4, fill_mode="reflect"),
        tfkl.RandomZoom(height_factor=-0.2),
    ],
    name="data_augmentation",
)

### Load base model from keras


In [None]:
MODEL_NAME = "ConvNeXtLarge"
base_model = tfk.applications.convnext.ConvNeXtLarge(
    input_shape=input_shape,
    include_top=False,
    weights="imagenet",
)
# Set base model as non trainable and import its preprocessing layer
base_model.trainable = False
preprocess_input = tfk.applications.convnext.preprocess_input

### Create complete model for transfer learning


In [None]:
# We use the keras model to do feature extraction and add our custom layers for the final prediction
model = tfk.Sequential(
    [
        tfk.Input(shape=input_shape),
        data_augmentation,
        # Use Lambda as preprocess must be a layer to be added to Sequential model
        tfkl.Lambda(preprocess_input, name="base_model_preprocessing"),
        base_model,
        tfkl.Flatten(),
        tfkl.Dropout(0.4),
        tfkl.Dense(
            1024,
            activation="relu",
            kernel_regularizer=tf.keras.regularizers.L1L2(1e-3),
            kernel_initializer=tfk.initializers.HeUniform(seed),
        ),
        tfkl.Dense(
            1024,
            activation="relu",
            kernel_regularizer=tf.keras.regularizers.L1L2(1e-3),
            kernel_initializer=tfk.initializers.HeUniform(seed),
        ),
        tfkl.Dropout(0.3),
        tfkl.Dense(
            2,
            activation="softmax",
            kernel_initializer=tf.keras.initializers.GlorotUniform(seed),
        ),
    ]
)

# Compile the model and display its summary
model.compile(
    loss=tfk.losses.CategoricalCrossentropy(),
    optimizer=tfk.optimizers.Adam(TL_LEARNING_RATE),
    metrics="accuracy",
)
model.summary()

### Train model


In [None]:
# We noticed that reducing learning rate in this phase did not provide any benefit so we do not use it
# We also noticed that using validation accuracy as our early stopping metric instead of validation loss gave us better result in the challenge
es_callback = tfk.callbacks.EarlyStopping(
    monitor="val_accuracy",
    mode="max",
    patience=TL_ES_PATIENCE,
    restore_best_weights=True,
)
model_history = model.fit(
    training_set,
    batch_size=batch_size,
    shuffle=True,
    validation_data=validation_set,
    epochs=TL_EPOCHS,
    callbacks=[es_callback],
    class_weight=class_weights,  # training will take in consideration class weights distribution
)

# Save the trained model
model.save(f"Models/best/{MODEL_NAME}/fullModel")

### Plot training results


In [None]:
acc = model_history.history["accuracy"]
val_acc = model_history.history["val_accuracy"]

loss = model_history.history["loss"]
val_loss = model_history.history["val_loss"]

plt.figure(figsize=(20, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label="Training Accuracy")
plt.plot(val_acc, label="Validation Accuracy")
plt.ylim([min(plt.ylim()), 1])
plt.plot(
    [es_callback.best_epoch, es_callback.best_epoch],
    plt.ylim(),
    label="Best Epoch",
)
plt.legend(loc="lower right")
plt.title("Accuracy")

plt.subplot(2, 1, 2)
plt.plot(loss, label="Training Loss")
plt.plot(val_loss, label="Validation Loss")
plt.ylim([0, max(plt.ylim())])
plt.plot(
    [es_callback.best_epoch, es_callback.best_epoch],
    plt.ylim(),
    label="Best Epoch",
)
plt.legend(loc="upper right")
plt.title("Loss")
plt.show()

## Fine Tuning


### Define some Hyperparameters

In [None]:
FT_TOTAL_EPOCHS = es_callback.best_epoch + 200
FT_ES_PATIENCE = 30
FT_LEARNING_RATE = 5e-5
N_UNFREEZE_LAYERS = 999  # Can be set lower to unfreeze only part of the model

FT_LR_PATIENCE = 10  # Patience before reducing learning rate
FT_LR_FACTOR = 0.5  # Reduction factor when plateau is reached

### Create a new model from the previously trained one


In [None]:
ft_model = model

ft_model.compile(
    optimizer=tfk.optimizers.Adam(FT_LEARNING_RATE),
    loss=tfk.losses.CategoricalCrossentropy(),
    metrics=["accuracy"],
)
ft_model.set_weights(model.get_weights())
ft_model.summary()

### Unfreeze the base model layers to fine tune them in combination with the added ones


In [None]:
ft_model.get_layer(base_model.name).trainable = True

layer_index = 0
for i in reversed(range(len(ft_model.get_layer(base_model.name).layers))):
    layer = ft_model.get_layer(base_model.name).layers[i]
    # Documentation on fine tuning point out that batch normalization should be frozen
    if layer_index < N_UNFREEZE_LAYERS and not isinstance(
        layer,
        tfkl.BatchNormalization,
    ):
        layer_index += 1
        layer.trainable = True
    else:
        layer.trainable = False

    print(i, layer.name, layer.trainable)

### Compile the new model


In [None]:
ft_model.compile(
    optimizer=tfk.optimizers.Adam(FT_LEARNING_RATE),
    loss=tfk.losses.CategoricalCrossentropy(),
    metrics=["accuracy"],
)
ft_model.summary()

### Train the model once more


In [None]:
ft_es_callback = tfk.callbacks.EarlyStopping(
    monitor="val_accuracy",
    mode="max",
    patience=FT_ES_PATIENCE,
    restore_best_weights=True,
)
ft_reduce_lr_callback = tfk.callbacks.ReduceLROnPlateau(
    monitor="val_accuracy",
    patience=FT_LR_PATIENCE,
    factor=FT_LR_FACTOR,
    mode="max",
    min_lr=1e-6,
)
ft_history = ft_model.fit(
    training_set,
    epochs=FT_TOTAL_EPOCHS,
    batch_size=batch_size,
    shuffle=True,
    initial_epoch=es_callback.best_epoch,
    validation_data=validation_set,
    callbacks=[ft_es_callback, ft_reduce_lr_callback],
    class_weight=class_weights,
)

# Save the fine tuned model
ft_model.save(f"Models/best/{MODEL_NAME}/ft-fullModel")

## Results

### Plot loss and accuracy curves


In [None]:
# cut everything after best epoch
acc = (
    model_history.history["accuracy"][: es_callback.best_epoch]
    + ft_history.history["accuracy"]
)
loss = (
    model_history.history["loss"][: es_callback.best_epoch] + ft_history.history["loss"]
)
val_acc = (
    model_history.history["val_accuracy"][: es_callback.best_epoch]
    + ft_history.history["val_accuracy"]
)
val_loss = (
    model_history.history["val_loss"][: es_callback.best_epoch]
    + ft_history.history["val_loss"]
)

# Plot accuracy_objectGetValue
plt.figure(figsize=(20, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label="Training Accuracy")
plt.plot(val_acc, label="Validation Accuracy")
# Set ylim for lines of best epoch
plt.ylim([min(plt.ylim()), 1])
plt.plot(
    [es_callback.best_epoch + 1, es_callback.best_epoch + 1],
    plt.ylim(),
    label="Start Fine Tuning",
)
plt.legend(loc="lower right")
plt.title("Accuracy")
# Plot loss
plt.subplot(2, 1, 2)
plt.semilogy(loss, label="Training Loss")
plt.semilogy(val_loss, label="Validation Loss")
# Set ylim for lines of best epoch
plt.ylim([0, max(plt.ylim())])
plt.plot(
    [es_callback.best_epoch + 1, es_callback.best_epoch + 1],
    plt.ylim(),
    label="Start Fine Tuning",
)

plt.legend(loc="upper right")
plt.title("Loss")
plt.show()