# AN2DL - Colab Notebook

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive/My Drive/Deep

Mounted at /gdrive
/gdrive/My Drive/Deep


## ⚙️ Import Libraries

### Dependencies for training

In [None]:
!pip install keras_cv

Collecting keras_cv
  Downloading keras_cv-0.9.0-py3-none-any.whl.metadata (12 kB)
Collecting keras-core (from keras_cv)
  Downloading keras_core-0.1.7-py3-none-any.whl.metadata (4.3 kB)
Downloading keras_cv-0.9.0-py3-none-any.whl (650 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m650.7/650.7 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading keras_core-0.1.7-py3-none-any.whl (950 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m35.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras-core, keras_cv
Successfully installed keras-core-0.1.7 keras_cv-0.9.0


In [None]:
import numpy as np

import tensorflow as tf

import keras as tfk

from keras.layers import Dense, Flatten, Dropout, BatchNormalization, GlobalAveragePooling2D
from keras.models import Model
from keras.optimizers import AdamW
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

from sklearn.model_selection import train_test_split

import keras.layers as tfkl

import keras_cv as kcv
from keras_cv.layers import RandomApply

import matplotlib.pyplot as plt

from keras.applications import ConvNeXtXLarge

from keras.initializers import HeNormal

np.random.seed(42)
tf.random.set_seed(42)

## Data loading and preprocessing

### Load data

In [None]:
data = np.load('data_unique.npz', allow_pickle=True)
X = data['images']
y = data['labels']

In [None]:
# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X,
                                                  y,
                                                  test_size=0.2,
                                                  random_state=42,
                                                  stratify=y)

### Augmentation (and preprocessing)

In [None]:
random_layers = [
    RandomApply(kcv.layers.RandAugment((0, 255)), rate=0.9),
    RandomApply(kcv.layers.JitteredResize((96, 96), (0.9, 1)), rate=0.7),
    RandomApply(kcv.layers.RandomTranslation(0.4, 0.4), rate=0.6),
    RandomApply(kcv.layers.RandomRotation(1), rate=0.6),
    RandomApply(kcv.layers.RandomSaturation(0.7), rate=0.3),
    RandomApply(kcv.layers.RandomContrast((0, 255), 0.7), rate=0.3),
    RandomApply(kcv.layers.RandomCutout(0.6, 0.6), rate=0.8),
    RandomApply(kcv.layers.RandAugment((0, 255)), rate=0.3)
]
augmenter = tfk.Sequential(random_layers)

In [None]:
# Build val dataset for transfer learning (batch of 128)
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(128).prefetch(tf.data.AUTOTUNE)
train_ds = train_ds.map(
    lambda x, y: (augmenter((x)), y),
    num_parallel_calls=tf.data.AUTOTUNE
)

# Build val dataset for transfer learning (batch of 128)
val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(128).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.map(
    lambda x, y: ((x), y),
    num_parallel_calls=tf.data.AUTOTUNE
)

### Visualize augmented images

In [None]:
def display_augmented_images(dataset, n_images=10):
    plt.figure(figsize=(30, 10))
    for i, (images, labels) in enumerate(dataset.take(1)):
        # Apply augmentation to the first n_images
        augmented_images = augmenter(images[:n_images])
        for j in range(n_images):
            ax = plt.subplot(1, n_images, j + 1)
            plt.imshow(tf.cast(augmented_images[j], tf.uint8))
            plt.axis("off")
        break  # Take only the first batch

display_augmented_images(train_ds)
plt.show()

## 🛠️ Train and Save the Model

### Transfer Learning

In [None]:
# Add He normal weights initializer
heNormal = HeNormal(seed=42)

def create_model():
    # Load ConvNeXt model
    base_model = ConvNeXtXLarge(weights='imagenet', include_top=False, input_shape=(96, 96, 3))

    # Deactivate all layers for transfer learning
    for layer in base_model.layers:
        layer.trainable = False

    # Classifier
    x = base_model.output
    x = Flatten()(x)
    x = Dense(1024, activation='relu', kernel_initializer=heNormal)(x)
    x = Dropout(0.3)(x)
    x = Dense(8, activation='softmax', kernel_initializer=heNormal)(x)  # 8 classes

    model = Model(inputs=base_model.input, outputs=x)

    model.compile(optimizer=AdamW(learning_rate=1e-5,
                                    weight_decay=1e-6,
                                    epsilon=1e-8,
                                    beta_1=0.9,
                                    beta_2=0.999,
                                  ), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
model = create_model()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/convnext/convnext_xlarge_notop.h5
[1m1393257616/1393257616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 0us/step


In [None]:
# Early stopping to stop training and save best model, lr scheduler for best performance
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=1, min_lr=1e-7)

# Training
history = model.fit(
    train_ds,
    epochs=30,
    validation_data=(val_ds),
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/30
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 876ms/step - accuracy: 0.2564 - loss: 2.2573 - val_accuracy: 0.7432 - val_loss: 0.8337 - learning_rate: 1.0000e-05
Epoch 2/30
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 392ms/step - accuracy: 0.4391 - loss: 1.6093 - val_accuracy: 0.8348 - val_loss: 0.5775 - learning_rate: 1.0000e-05
Epoch 3/30
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 389ms/step - accuracy: 0.5069 - loss: 1.4119 - val_accuracy: 0.8745 - val_loss: 0.4452 - learning_rate: 1.0000e-05
Epoch 4/30
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 390ms/step - accuracy: 0.5515 - loss: 1.2698 - val_accuracy: 0.8883 - val_loss: 0.3879 - learning_rate: 1.0000e-05
Epoch 5/30
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 389ms/step - accuracy: 0.5751 - loss: 1.2174 - val_accuracy: 0.8954 - val_loss: 0.3421 - learning_rate: 1.0000e-05
Epoch 6/30
[1m75/75[0m [32m━━━━━━━━━━━━━━

In [None]:
model.save('tf.keras')

## Fine Tuning

### Half network trainable

In [None]:
model = tf.keras.models.load_model('tf.keras')

In [None]:
# Build train dataset for first round of fine tuning (batch of 128)
train_ds_ft = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(128).prefetch(tf.data.AUTOTUNE)
train_ds_ft = train_ds_ft.map(
    lambda x, y: (augmenter((x)), y),
    num_parallel_calls=tf.data.AUTOTUNE
)

# Build val dataset for first round of fine tuning (batch of 128)
val_ds_ft = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(128).prefetch(tf.data.AUTOTUNE)
val_ds_ft = val_ds_ft.map(
    lambda x, y: ((x), y),
    num_parallel_calls=tf.data.AUTOTUNE
)

In [None]:
# Activate half network
for layer in model.layers[-131:]:
        layer.trainable = True

model.compile(optimizer=AdamW(learning_rate=1e-5,
                                    weight_decay=1e-5,
                                    epsilon=1e-8,
                                    beta_1=0.9,
                                    beta_2=0.999,
                                  ), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=2, min_lr=1e-8)

# Fine-tuning
history = model.fit(
    train_ds_ft,
    epochs=40,
    validation_data=(val_ds_ft),
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/40
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 778ms/step - accuracy: 0.8398 - loss: 0.4584 - val_accuracy: 0.9770 - val_loss: 0.0657 - learning_rate: 1.0000e-05
Epoch 2/40
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 439ms/step - accuracy: 0.8383 - loss: 0.4582 - val_accuracy: 0.9795 - val_loss: 0.0623 - learning_rate: 1.0000e-05
Epoch 3/40
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 444ms/step - accuracy: 0.8447 - loss: 0.4387 - val_accuracy: 0.9799 - val_loss: 0.0629 - learning_rate: 1.0000e-05
Epoch 4/40
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 431ms/step - accuracy: 0.8367 - loss: 0.4671 - val_accuracy: 0.9787 - val_loss: 0.0615 - learning_rate: 1.0000e-05
Epoch 5/40
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 432ms/step - accuracy: 0.8508 - loss: 0.4344 - val_accuracy: 0.9757 - val_loss: 0.0668 - learning_rate: 1.0000e-05
Epoch 6/40
[1m75/75[0m [32m━━━━━━━━━━━━━━

In [None]:
model.compile(optimizer=None, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.save('ft1.keras', include_optimizer=False)

### Entire network trainable

In [None]:
model = tf.keras.models.load_model('ft1.keras', compile=False)

In [None]:
for layer in model.layers:
        layer.trainable = True

# Add gradient accumulation (to compensate for the lower batches)
model.compile(optimizer=AdamW(learning_rate=1e-4,
                                    weight_decay=1e-5,
                                    epsilon=1e-8,
                                    beta_1=0.9,
                                    beta_2=0.999,
                                    gradient_accumulation_steps=2
                                  ), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Reduce batches (not to fill up the RAM)
train_ds_ft = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(64).prefetch(tf.data.AUTOTUNE)
train_ds_ft = train_ds_ft.map(
    lambda x, y: (augmenter((x)), y),
    num_parallel_calls=tf.data.AUTOTUNE
)

val_ds_ft = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(64).prefetch(tf.data.AUTOTUNE)
val_ds_ft = val_ds_ft.map(
    lambda x, y: ((x), y),
    num_parallel_calls=tf.data.AUTOTUNE
)

In [None]:
early_stopping = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=2, min_lr=1e-8)

history = model.fit(
    train_ds_ft,
    epochs=40,
    validation_data=(val_ds_ft),
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/40
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 564ms/step - accuracy: 0.8084 - loss: 0.5926 - val_accuracy: 0.9849 - val_loss: 0.0522 - learning_rate: 1.0000e-04
Epoch 2/40
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 278ms/step - accuracy: 0.8648 - loss: 0.3942 - val_accuracy: 0.9762 - val_loss: 0.0856 - learning_rate: 1.0000e-04
Epoch 3/40
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 283ms/step - accuracy: 0.8681 - loss: 0.3934 - val_accuracy: 0.9858 - val_loss: 0.0479 - learning_rate: 1.0000e-04
Epoch 4/40
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 278ms/step - accuracy: 0.8791 - loss: 0.3431 - val_accuracy: 0.9820 - val_loss: 0.0640 - learning_rate: 1.0000e-04
Epoch 5/40
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 282ms/step - accuracy: 0.8903 - loss: 0.3159 - val_accuracy: 0.9891 - val_loss: 0.0393 - learning_rate: 1.0000e-04
Epoch 6/40
[1m150/150[0m [32m━━

In [None]:
model.save('weights.keras')

### Continuing

In [None]:
model = tf.keras.models.load_model('weights.keras', compile=False)

In [None]:
model.compile(optimizer=AdamW(learning_rate=1e-5,
                                    weight_decay=1e-3,
                                    epsilon=1e-8,
                                    beta_1=0.9,
                                    beta_2=0.999,
                                    gradient_accumulation_steps=4
                                  ), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
train_ds_ft = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(64).prefetch(tf.data.AUTOTUNE)
train_ds_ft = train_ds_ft.map(
    lambda x, y: (augmenter((x)), y),
    num_parallel_calls=tf.data.AUTOTUNE
)

val_ds_ft = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(64).prefetch(tf.data.AUTOTUNE)
val_ds_ft = val_ds_ft.map(
    lambda x, y: ((x), y),
    num_parallel_calls=tf.data.AUTOTUNE
)

In [None]:
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-7)

history = model.fit(
    train_ds_ft,
    epochs=6,
    validation_data=(val_ds_ft),
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/6
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m380s[0m 3s/step - accuracy: 0.9549 - loss: 0.1365 - val_accuracy: 0.9929 - val_loss: 0.0250 - learning_rate: 2.0000e-06
Epoch 2/6
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m373s[0m 2s/step - accuracy: 0.9526 - loss: 0.1372 - val_accuracy: 0.9925 - val_loss: 0.0257 - learning_rate: 2.0000e-06
Epoch 3/6
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m373s[0m 2s/step - accuracy: 0.9467 - loss: 0.1544 - val_accuracy: 0.9921 - val_loss: 0.0261 - learning_rate: 2.0000e-06
Epoch 4/6
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m382s[0m 2s/step - accuracy: 0.9567 - loss: 0.1330 - val_accuracy: 0.9921 - val_loss: 0.0262 - learning_rate: 2.0000e-06
Epoch 5/6
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m370s[0m 2s/step - accuracy: 0.9559 - loss: 0.1378 - val_accuracy: 0.9921 - val_loss: 0.0259 - learning_rate: 4.0000e-07
Epoch 6/6
[1m150/150[0m [32m━━━━━━━━━━━━━━━━━━━

In [None]:
# Free up RAM
import gc
del train_ds_ft
del val_ds_ft
gc.collect()

9

In [None]:
model.compile(optimizer=None, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.save('weights.keras', include_optimizer=False)