In [1]:
import os
import datetime
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import cv2

2024-12-03 14:48:44.391526: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-03 14:48:44.435788: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733230124.462805   37047 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733230124.472388   37047 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-03 14:48:44.517227: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
NUM_CLASSES = 7
IMG_SHAPE = (224, 224, 3)
BATCH_SIZE = 16

TRAIN_EPOCH = 100
TRAIN_LR = 1e-3
TRAIN_ES_PATIENCE = 5
TRAIN_LR_PATIENCE = 3
TRAIN_MIN_LR = 1e-6
TRAIN_DROPOUT = 0.1

FT_EPOCH = 500
FT_LR = 1e-5
FT_LR_DECAY_STEP = 80.0
FT_LR_DECAY_RATE = 1
FT_ES_PATIENCE = 20
FT_DROPOUT = 0.2

ES_LR_MIN_DELTA = 0.003

In [None]:
# Data loading

In [3]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, file_paths, labels, batch_size, img_size):
        self.file_paths = file_paths
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size

    def __len__(self):
        return int(np.ceil(len(self.file_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_paths = self.file_paths[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]
        
        images = []
        for path in batch_paths:
            image = cv2.imread(path, cv2.IMREAD_COLOR)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (self.img_size, self.img_size))
            images.append(image / 255.0)

        return np.array(images, dtype=np.float32), np.array(batch_labels, dtype=np.int32)

def get_file_paths_and_labels(path_prefix, split):
    emotion_labels = ['angry', 'disgusted', 'fear', 'happy', 'neutral', 'sad', 'surprised']
    label_to_index = {label: idx for idx, label in enumerate(emotion_labels)}

    file_paths, labels = [], []
    split_path = os.path.join(path_prefix, split)
    for file_name in os.listdir(split_path):
        parts = file_name.split('_')
        if len(parts) < 3:
            continue
        emotion = parts[-1].split('.')[0]
        if emotion not in label_to_index:
            continue
        
        file_paths.append(os.path.join(split_path, file_name))
        labels.append(label_to_index[emotion])
    
    return file_paths, labels

In [4]:
batch_size = 16
img_size = 224
path_prefix = 'fer_plus_balanced'
# Prepare file paths and labels
train_paths, train_labels = get_file_paths_and_labels(path_prefix, 'train')
val_paths, val_labels = get_file_paths_and_labels(path_prefix, 'val')
test_paths, test_labels = get_file_paths_and_labels(path_prefix, 'test')

# Create generators
train_generator = DataGenerator(train_paths, train_labels, batch_size, img_size)
val_generator = DataGenerator(val_paths, val_labels, batch_size, img_size)
test_generator = DataGenerator(test_paths, test_labels, batch_size, img_size)

In [5]:
# Model Building
input_layer = tf.keras.Input(shape=IMG_SHAPE, name='universal_input')
sample_resizing = tf.keras.layers.Resizing(224, 224, name="resize")
data_augmentation = tf.keras.Sequential([tf.keras.layers.RandomFlip(mode='horizontal'), 
                                        tf.keras.layers.RandomContrast(factor=0.3)], name="augmentation")
preprocess_input = tf.keras.applications.mobilenet.preprocess_input

backbone = tf.keras.applications.mobilenet.MobileNet(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
backbone.trainable = False
base_model = tf.keras.Model(backbone.input, backbone.layers[-29].output, name='base_model')

self_attention = tf.keras.layers.Attention(use_scale=True, name='attention')
patch_extraction = tf.keras.Sequential([
    tf.keras.layers.SeparableConv2D(256, kernel_size=4, strides=4, padding='same', activation='relu'), 
    tf.keras.layers.SeparableConv2D(256, kernel_size=2, strides=2, padding='valid', activation='relu'), 
    tf.keras.layers.Conv2D(256, kernel_size=1, strides=1, padding='valid', activation='relu')
], name='patch_extraction')
global_average_layer = tf.keras.layers.GlobalAveragePooling2D(name='gap')
pre_classification = tf.keras.Sequential([tf.keras.layers.Dense(32, activation='relu'), 
                                          tf.keras.layers.BatchNormalization()], name='pre_classification')
prediction_layer = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax", name='classification_head')

inputs = input_layer
x = sample_resizing(inputs)
x = data_augmentation(x)
x = preprocess_input(x)
x = base_model(x, training=False)
x = patch_extraction(x)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(TRAIN_DROPOUT)(x)
x = pre_classification(x)
x = tf.keras.layers.Reshape((1, -1))(x)
x = self_attention([x, x])
x = tf.keras.layers.Flatten()(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs, name='train-head')
model.compile(optimizer=keras.optimizers.Adam(learning_rate=TRAIN_LR, global_clipnorm=3.0), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Training Procedure
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=TRAIN_ES_PATIENCE, min_delta=ES_LR_MIN_DELTA, restore_best_weights=True)
learning_rate_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', patience=TRAIN_LR_PATIENCE, verbose=0, min_delta=ES_LR_MIN_DELTA, min_lr=TRAIN_MIN_LR)
history = model.fit(train_generator, epochs=TRAIN_EPOCH, batch_size=BATCH_SIZE, validation_data=val_generator, verbose=1, callbacks=[early_stopping_callback, learning_rate_callback])
test_loss, test_acc = model.evaluate(test_generator)
model.save('basic_model.keras')

I0000 00:00:1733230138.487318   37047 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13512 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


Epoch 1/100


  self._warn_if_super_not_called()
I0000 00:00:1733230141.243562   37211 service.cc:148] XLA service 0x7fc25000a370 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1733230141.243594   37211 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4090 Laptop GPU, Compute Capability 8.9
2024-12-03 14:49:01.297804: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1733230141.561891   37211 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  15/1313[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m16s[0m 13ms/step - accuracy: 0.1354 - loss: 1.9524

I0000 00:00:1733230146.744525   37211 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m 525/1313[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m10s[0m 13ms/step - accuracy: 0.1432 - loss: 1.9465

2024-12-03 14:49:19.358182: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng4{} for conv (f32[8,256,4,4]{3,2,1,0}, u8[0]{0}) custom-call(f32[8,256,5,5]{3,2,1,0}, f32[256,1,2,2]{3,2,1,0}), window={size=2x2}, dim_labels=bf01_oi01->bf01, feature_group_count=256, custom_call_target="__cudnn$convForward", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kNone","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]} is taking a while...
2024-12-03 14:49:19.437667: E external/local_xla/xla/service/slow_operation_alarm.cc:133] The operation took 3.870959691s
Trying algorithm eng4{} for conv (f32[8,256,4,4]{3,2,1,0}, u8[0]{0}) custom-call(f32[8,256,5,5]{3,2,1,0}, f32[256,1,2,2]{3,2,1,0}), window={size=2x2}, dim_labels=bf01_oi01->bf01, feature_group_count=256, custom_call_target="__cudnn$convForward", backend_config={"cudnn_conv_backend_config":{"activat

[1m1313/1313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 23ms/step - accuracy: 0.1700 - loss: 1.9034 - val_accuracy: 0.1834 - val_loss: 1.9321 - learning_rate: 0.0010
Epoch 2/100
[1m1313/1313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 11ms/step - accuracy: 0.2558 - loss: 1.7307 - val_accuracy: 0.1434 - val_loss: 3.2731 - learning_rate: 0.0010
Epoch 3/100
[1m 644/1313[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m9s[0m 15ms/step - accuracy: 0.2871 - loss: 1.6818 

KeyboardInterrupt: 

In [12]:
def lr_schedule(epoch, lr):
    decay_rate = FT_LR_DECAY_RATE
    decay_steps = FT_LR_DECAY_STEP
    new_lr = FT_LR / (1 + decay_rate * (epoch // decay_steps))
    return float(new_lr)
# Model Finetuning
print("\nFinetuning ...")
unfreeze = 59
base_model.trainable = True
fine_tune_from = len(base_model.layers) - unfreeze
for layer in base_model.layers[:fine_tune_from]:
    layer.trainable = False
for layer in base_model.layers[fine_tune_from:]:
    if isinstance(layer, tf.keras.layers.BatchNormalization):
        layer.trainable = False

inputs = input_layer
x = sample_resizing(inputs)
x = data_augmentation(x)
x = preprocess_input(x)
x = base_model(x, training=False)
x = patch_extraction(x)
x = tf.keras.layers.SpatialDropout2D(FT_DROPOUT)(x)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(FT_DROPOUT)(x)
x = pre_classification(x)
x = tf.keras.layers.Reshape((1, -1))(x)
x = self_attention([x, x])
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dropout(FT_DROPOUT)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs, name='finetune-backbone')
model.compile(optimizer=keras.optimizers.Adam(learning_rate=FT_LR, global_clipnorm=3.0), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Training Procedure
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', min_delta=ES_LR_MIN_DELTA, patience=FT_ES_PATIENCE, restore_best_weights=True)
scheduler = keras.optimizers.schedules.InverseTimeDecay(initial_learning_rate=FT_LR, decay_steps=FT_LR_DECAY_STEP, decay_rate=FT_LR_DECAY_RATE)
scheduler_callback = tf.keras.callbacks.LearningRateScheduler(schedule=lr_schedule)

history_finetune = model.fit(train_generator, epochs=FT_EPOCH, batch_size=BATCH_SIZE, validation_data=val_generator, verbose=1, 
                             initial_epoch=history.epoch[-TRAIN_ES_PATIENCE], callbacks=[early_stopping_callback, scheduler_callback, tensorboard_callback])
test_loss, test_acc = model.evaluate(test_generator)
model.save('model_finetuned.keras')


Finetuning ...
Epoch 16/500
[1m1313/1313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 15ms/step - accuracy: 0.4607 - loss: 1.4095 - val_accuracy: 0.4151 - val_loss: 1.5240 - learning_rate: 1.0000e-05
Epoch 17/500
[1m1313/1313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 14ms/step - accuracy: 0.4722 - loss: 1.3967 - val_accuracy: 0.4170 - val_loss: 1.5216 - learning_rate: 1.0000e-05
Epoch 18/500
[1m1313/1313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 11ms/step - accuracy: 0.4667 - loss: 1.3921 - val_accuracy: 0.4165 - val_loss: 1.5180 - learning_rate: 1.0000e-05
Epoch 19/500
[1m1313/1313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 14ms/step - accuracy: 0.4755 - loss: 1.3969 - val_accuracy: 0.4123 - val_loss: 1.5221 - learning_rate: 1.0000e-05
Epoch 20/500
[1m1313/1313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 12ms/step - accuracy: 0.4721 - loss: 1.3932 - val_accuracy: 0.4154 - val_loss: 1.5205 - learning_rate: 1.0000e-05
Epoc

KeyboardInterrupt: 