# Method 4: Optimized SE-CBAM CNN - FER2013 Training

## C√°c k·ªπ thu·∫≠t c·∫£i ti·∫øn so v·ªõi Method 2:
1. **CBAM (Convolutional Block Attention Module)** ‚Äî K·∫øt h·ª£p Channel Attention (SE) V√Ä Spatial Attention
2. **Enhanced Data Augmentation** ‚Äî K·ªπ thu·∫≠t augmentation m·∫°nh h∆°n (t·ª´ Method 1 + b·ªï sung)
3. **Focal Loss** ‚Äî X·ª≠ l√Ω m·∫•t c√¢n b·∫±ng l·ªõp (ƒë·∫∑c bi·ªát Fear, Disgust)
4. **Cosine Annealing LR** ‚Äî Learning rate gi·∫£m m∆∞·ª£t theo h√¨nh cosine
5. **TƒÉng Epochs l√™n 80** v·ªõi EarlyStopping patience=15
6. **Label Smoothing = 0.15** ‚Äî C·∫£i thi·ªán generalization
7. **Test Time Augmentation (TTA)** ‚Äî TƒÉng accuracy khi ƒë√°nh gi√°

In [ ]:
# ================================================
# CELL 1: SETUP & GPU CHECK
# ================================================
!nvidia-smi

import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
print(f"TensorFlow: {tf.__version__}")
print(f"GPUs: {gpus}")

if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    print("‚úÖ GPU ENABLED!")
else:
    print("‚ùå NO GPU! Go to Runtime ‚Üí Change runtime type ‚Üí GPU")


In [ ]:
# ================================================
# CELL 2: IMPORT LIBRARIES
# ================================================
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import warnings
import pickle
import json
import math
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, regularizers, backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback, LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

print("‚úÖ Libraries imported!")


In [ ]:
# ================================================
# CELL 3: MOUNT DRIVE
# ================================================
from google.colab import drive
drive.mount('/content/drive')
print("‚úÖ Drive mounted!")


In [ ]:
# ================================================
# CELL 4: EXTRACT DATASET
# ================================================
ZIP_PATH = '/content/drive/MyDrive/CaptoneProject/camera.zip'
LOCAL_PATH = '/content/dataset'

if not os.path.exists(LOCAL_PATH):
    if os.path.exists(ZIP_PATH):
        print("üì¶ Unzipping...")
        !unzip -q -o "{ZIP_PATH}" -d /content/
        if os.path.exists('/content/camera'):
            !mv /content/camera "{LOCAL_PATH}"
        elif os.path.exists('/content/train') and os.path.exists('/content/test'):
            os.makedirs(LOCAL_PATH, exist_ok=True)
            !mv /content/train "{LOCAL_PATH}/train"
            !mv /content/test "{LOCAL_PATH}/test"
        print("‚úÖ Dataset ready at /content/dataset")
    else:
        print("‚ùå ZIP file not found in Drive!")
else:
    print("‚úÖ Dataset already exists locally!")

TRAIN_DIR = os.path.join(LOCAL_PATH, 'train')
TEST_DIR = os.path.join(LOCAL_PATH, 'test')


In [ ]:
# ================================================
# CELL 5: CONFIG
# ================================================
IMG_SIZE = 48
BATCH_SIZE = 64
EPOCHS = 80
INITIAL_LR = 0.001
NUM_CLASSES = 7
EMOTIONS = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
SEED = 42
LABEL_SMOOTHING = 0.15

np.random.seed(SEED)
tf.random.set_seed(SEED)

CHECKPOINT_DIR = '/content/drive/MyDrive/CaptoneProject/checkpoints/method4_optimized'
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
BEST_MODEL_PATH = f'{CHECKPOINT_DIR}/best_model.keras'
HISTORY_PATH = f'{CHECKPOINT_DIR}/history.pkl'

print("‚úÖ Config set!")
print(f"   EPOCHS={EPOCHS}, LR={INITIAL_LR}, LABEL_SMOOTHING={LABEL_SMOOTHING}, BATCH={BATCH_SIZE}")


In [ ]:
# ================================================
# CELL 6: ENHANCED DATA AUGMENTATION
# ================================================
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.7, 1.3],
    fill_mode='nearest',
    validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR, target_size=(IMG_SIZE, IMG_SIZE), color_mode='grayscale',
    batch_size=BATCH_SIZE, class_mode='categorical', subset='training', shuffle=True, seed=SEED)

validation_generator = train_datagen.flow_from_directory(
    TRAIN_DIR, target_size=(IMG_SIZE, IMG_SIZE), color_mode='grayscale',
    batch_size=BATCH_SIZE, class_mode='categorical', subset='validation', shuffle=False, seed=SEED)

test_generator = test_datagen.flow_from_directory(
    TEST_DIR, target_size=(IMG_SIZE, IMG_SIZE), color_mode='grayscale',
    batch_size=BATCH_SIZE, class_mode='categorical', shuffle=False)

print("‚úÖ Data Generators ready!")


In [ ]:
# ================================================
# CELL 7: CLASS WEIGHTS
# ================================================
train_labels = train_generator.classes
class_weights_array = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)
class_weights = dict(enumerate(class_weights_array))

print("‚öñÔ∏è Class Weights:")
for i, emotion in enumerate(EMOTIONS):
    print(f"   {emotion}: {class_weights[i]:.4f}")


In [ ]:
# ================================================
# CELL 8: FOCAL LOSS + CBAM ATTENTION + BUILD MODEL
# (G·ªôp t·∫•t c·∫£ custom components v√†o 1 cell)
# ================================================

# -------------------- FOCAL LOSS --------------------
class FocalLoss(keras.losses.Loss):
    """
    Focal Loss: t·∫≠p trung v√†o m·∫´u kh√≥ ph√¢n lo·∫°i.
    FL(p_t) = -alpha * (1 - p_t)^gamma * log(p_t)
    """
    def __init__(self, gamma=2.0, alpha=0.25, label_smoothing=0.15, **kwargs):
        super().__init__(**kwargs)
        self.gamma = gamma
        self.alpha = alpha
        self.label_smoothing = label_smoothing

    def call(self, y_true, y_pred):
        num_classes = tf.cast(tf.shape(y_true)[-1], dtype=tf.float32)
        y_true = y_true * (1.0 - self.label_smoothing) + (self.label_smoothing / num_classes)
        y_pred = tf.clip_by_value(y_pred, K.epsilon(), 1.0 - K.epsilon())
        cross_entropy = -y_true * tf.math.log(y_pred)
        focal_weight = self.alpha * tf.pow(1.0 - y_pred, self.gamma)
        return tf.reduce_sum(focal_weight * cross_entropy, axis=-1)

    def get_config(self):
        config = super().get_config()
        config.update({'gamma': self.gamma, 'alpha': self.alpha, 'label_smoothing': self.label_smoothing})
        return config


# -------------------- CBAM ATTENTION --------------------
class ChannelAttention(layers.Layer):
    """Channel Attention (c·∫£i ti·∫øn SE-Block): d√πng c·∫£ AvgPool + MaxPool"""
    def __init__(self, ratio=8, **kwargs):
        super().__init__(**kwargs)
        self.ratio = ratio

    def build(self, input_shape):
        ch = input_shape[-1]
        self.dense1 = layers.Dense(ch // self.ratio, activation='relu', kernel_initializer='he_normal')
        self.dense2 = layers.Dense(ch, kernel_initializer='he_normal')
        self.gap = layers.GlobalAveragePooling2D()
        self.gmp = layers.GlobalMaxPooling2D()
        super().build(input_shape)

    def call(self, x):
        ch = x.shape[-1]
        avg = self.dense2(self.dense1(self.gap(x)))
        mx  = self.dense2(self.dense1(self.gmp(x)))
        att = tf.sigmoid(avg + mx)
        return x * tf.reshape(att, (-1, 1, 1, ch))

    def get_config(self):
        config = super().get_config()
        config.update({'ratio': self.ratio})
        return config


class SpatialAttention(layers.Layer):
    """Spatial Attention: t·∫≠p trung v√†o v√πng quan tr·ªçng (m·∫Øt, mi·ªáng...)"""
    def __init__(self, kernel_size=7, **kwargs):
        super().__init__(**kwargs)
        self.kernel_size = kernel_size

    def build(self, input_shape):
        self.conv = layers.Conv2D(1, self.kernel_size, padding='same',
                                  activation='sigmoid', kernel_initializer='he_normal')
        super().build(input_shape)

    def call(self, x):
        avg = tf.reduce_mean(x, axis=-1, keepdims=True)
        mx  = tf.reduce_max(x, axis=-1, keepdims=True)
        att = self.conv(tf.concat([avg, mx], axis=-1))
        return x * att

    def get_config(self):
        config = super().get_config()
        config.update({'kernel_size': self.kernel_size})
        return config


class CBAMBlock(layers.Layer):
    """CBAM = Channel Attention ‚Üí Spatial Attention (tu·∫ßn t·ª±)"""
    def __init__(self, ratio=8, kernel_size=7, **kwargs):
        super().__init__(**kwargs)
        self.ratio = ratio
        self.kernel_size = kernel_size
        self.ca = ChannelAttention(ratio=ratio)
        self.sa = SpatialAttention(kernel_size=kernel_size)

    def call(self, x):
        return self.sa(self.ca(x))

    def get_config(self):
        config = super().get_config()
        config.update({'ratio': self.ratio, 'kernel_size': self.kernel_size})
        return config


# -------------------- BUILD MODEL --------------------
def build_optimized_cbam_cnn(input_shape=(48, 48, 1), num_classes=7):
    inputs = layers.Input(shape=input_shape)

    # Block 1: 64 filters
    x = layers.Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = CBAMBlock(ratio=8)(x)
    x = layers.MaxPooling2D(2)(x)       # 48‚Üí24
    x = layers.Dropout(0.25)(x)

    # Block 2: 128 filters
    x = layers.Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = CBAMBlock(ratio=8)(x)
    x = layers.MaxPooling2D(2)(x)       # 24‚Üí12
    x = layers.Dropout(0.25)(x)

    # Block 3: 256 filters
    x = layers.Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = CBAMBlock(ratio=16)(x)
    x = layers.MaxPooling2D(2)(x)       # 12‚Üí6
    x = layers.Dropout(0.3)(x)

    # Block 4: 512 filters
    x = layers.Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = CBAMBlock(ratio=16)(x)
    x = layers.MaxPooling2D(2)(x)       # 6‚Üí3
    x = layers.Dropout(0.3)(x)

    # Classifier
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, kernel_regularizer=regularizers.l2(0.0005))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, kernel_regularizer=regularizers.l2(0.0005))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    return models.Model(inputs=inputs, outputs=outputs)


model = build_optimized_cbam_cnn()
model.summary()
print(f"\n‚úÖ Model built! Total params: {model.count_params():,}")


In [ ]:
# ================================================
# CELL 9: COSINE ANNEALING LR + COMPILE
# ================================================

def cosine_annealing_schedule(epoch, initial_lr=INITIAL_LR, total_epochs=EPOCHS, min_lr=1e-6):
    """lr = min_lr + 0.5*(initial_lr - min_lr)*(1 + cos(pi*epoch/total_epochs))"""
    return min_lr + (initial_lr - min_lr) * 0.5 * (1 + math.cos(math.pi * epoch / total_epochs))

# Visualize
lrs = [cosine_annealing_schedule(e) for e in range(EPOCHS)]
plt.figure(figsize=(10, 4))
plt.plot(lrs, 'b-', linewidth=2)
plt.title('Cosine Annealing Learning Rate Schedule', fontsize=14)
plt.xlabel('Epoch'); plt.ylabel('Learning Rate')
plt.grid(True, alpha=0.3)
plt.show()

# Compile
model.compile(
    optimizer=Adam(learning_rate=INITIAL_LR),
    loss=FocalLoss(gamma=2.0, alpha=0.25, label_smoothing=LABEL_SMOOTHING),
    metrics=['accuracy']
)
print(f"‚úÖ Compiled! Focal Loss (Œ≥=2.0, Œ±=0.25), Label Smoothing={LABEL_SMOOTHING}")


In [ ]:
# ================================================
# CELL 10: CALLBACKS
# ================================================

class SaveHistoryCallback(Callback):
    def __init__(self, path):
        super().__init__()
        self.path = path
        self.data = {'accuracy':[], 'val_accuracy':[], 'loss':[], 'val_loss':[], 'lr':[]}

    def on_epoch_end(self, epoch, logs=None):
        for k in ['accuracy','val_accuracy','loss','val_loss']:
            self.data[k].append(logs.get(k))
        self.data['lr'].append(float(self.model.optimizer.learning_rate.numpy()))
        with open(self.path, 'wb') as f:
            pickle.dump(self.data, f)

callbacks = [
    ModelCheckpoint(BEST_MODEL_PATH, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1),
    EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True, verbose=1),
    LearningRateScheduler(cosine_annealing_schedule, verbose=1),
    SaveHistoryCallback(HISTORY_PATH)
]
print("‚úÖ Callbacks: ModelCheckpoint, EarlyStopping(patience=15), CosineAnnealing, SaveHistory")


In [ ]:
# ================================================
# CELL 11: TRAINING üöÄ
# ================================================

print("üöÄ Starting Training (Method 4 - Optimized)...")
print(f"   Epochs={EPOCHS}, Batch={BATCH_SIZE}, Train={train_generator.samples}, Val={validation_generator.samples}")
print("="*60)

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=validation_generator,
    callbacks=callbacks,
    class_weight=class_weights,
    verbose=1
)
print("\n‚úÖ Training Completed!")


In [ ]:
# ================================================
# CELL 12: TRAINING VISUALIZATION
# ================================================

fig, axes = plt.subplots(1, 3, figsize=(20, 5))

axes[0].plot(history.history['accuracy'], label='Train', linewidth=2)
axes[0].plot(history.history['val_accuracy'], label='Val', linewidth=2)
axes[0].set_title('Accuracy', fontsize=14)
axes[0].set_xlabel('Epoch'); axes[0].set_ylabel('Accuracy')
axes[0].legend(); axes[0].grid(True, alpha=0.3)

axes[1].plot(history.history['loss'], label='Train', linewidth=2)
axes[1].plot(history.history['val_loss'], label='Val', linewidth=2)
axes[1].set_title('Loss', fontsize=14)
axes[1].set_xlabel('Epoch'); axes[1].set_ylabel('Loss')
axes[1].legend(); axes[1].grid(True, alpha=0.3)

try:
    with open(HISTORY_PATH, 'rb') as f:
        lr_vals = pickle.load(f).get('lr', [])
    if lr_vals:
        axes[2].plot(lr_vals, 'g-', linewidth=2)
        axes[2].set_title('Learning Rate (Cosine)', fontsize=14)
        axes[2].set_xlabel('Epoch'); axes[2].set_ylabel('LR')
        axes[2].grid(True, alpha=0.3)
except:
    pass

plt.tight_layout()
plt.savefig(f'{CHECKPOINT_DIR}/training_curves.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"üìä Best Val Accuracy: {max(history.history['val_accuracy'])*100:.2f}% (Epoch {np.argmax(history.history['val_accuracy'])+1})")


In [ ]:
# ================================================
# CELL 13: STANDARD EVALUATION
# ================================================

print("üìä Standard Evaluation...")
best_model = keras.models.load_model(
    BEST_MODEL_PATH,
    custom_objects={'FocalLoss': FocalLoss, 'CBAMBlock': CBAMBlock,
                    'ChannelAttention': ChannelAttention, 'SpatialAttention': SpatialAttention}
)

test_generator.reset()
test_loss, test_acc = best_model.evaluate(test_generator)
print(f"\nüéØ TEST ACCURACY (Standard): {test_acc*100:.2f}%")
print(f"   TEST LOSS: {test_loss:.4f}")

test_generator.reset()
predictions = best_model.predict(test_generator, verbose=1)
y_pred = np.argmax(predictions, axis=1)
y_true = test_generator.classes

print("\n" + "="*60)
print("üìã Classification Report (Standard):")
print("="*60)
print(classification_report(y_true, y_pred, target_names=EMOTIONS, digits=4))


In [ ]:
# ================================================
# CELL 14: TEST TIME AUGMENTATION (TTA)
# ================================================

def predict_with_tta(model, test_dir, img_size=48, batch_size=64):
    tta_datagens = [
        ImageDataGenerator(rescale=1./255),
        ImageDataGenerator(rescale=1./255, horizontal_flip=True),
        ImageDataGenerator(rescale=1./255, rotation_range=10),
        ImageDataGenerator(rescale=1./255, zoom_range=0.1),
        ImageDataGenerator(rescale=1./255, width_shift_range=0.1, height_shift_range=0.1),
    ]
    all_preds = []
    for i, dg in enumerate(tta_datagens):
        print(f"   TTA {i+1}/{len(tta_datagens)}...")
        gen = dg.flow_from_directory(test_dir, target_size=(img_size,img_size),
                                     color_mode='grayscale', batch_size=batch_size,
                                     class_mode='categorical', shuffle=False)
        all_preds.append(model.predict(gen, verbose=0))
    return np.mean(all_preds, axis=0)

print("üîÑ Running TTA...")
tta_preds = predict_with_tta(best_model, TEST_DIR)
y_pred_tta = np.argmax(tta_preds, axis=1)
y_true_tta = test_generator.classes

tta_accuracy = np.mean(y_pred_tta == y_true_tta)
print(f"\nüèÜ TEST ACCURACY (TTA): {tta_accuracy*100:.2f}%")
print("\n" + "="*60)
print("üìã Classification Report (TTA):")
print("="*60)
print(classification_report(y_true_tta, y_pred_tta, target_names=EMOTIONS, digits=4))


In [ ]:
# ================================================
# CELL 15: CONFUSION MATRIX
# ================================================

cm = confusion_matrix(y_true_tta, y_pred_tta)
fig, axes = plt.subplots(1, 2, figsize=(18, 7))

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=EMOTIONS, yticklabels=EMOTIONS, ax=axes[0])
axes[0].set_title('Confusion Matrix (Counts)', fontsize=14)
axes[0].set_ylabel('True'); axes[0].set_xlabel('Predicted')

cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm_norm, annot=True, fmt='.2%', cmap='Oranges',
            xticklabels=EMOTIONS, yticklabels=EMOTIONS, ax=axes[1])
axes[1].set_title('Confusion Matrix (Normalized)', fontsize=14)
axes[1].set_ylabel('True'); axes[1].set_xlabel('Predicted')

plt.tight_layout()
plt.savefig(f'{CHECKPOINT_DIR}/confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()


In [ ]:
# ================================================
# CELL 16: SO S√ÅNH & SAVE MODEL
# ================================================

print("="*65)
print("üìä SO S√ÅNH K·∫æT QU·∫¢ C√ÅC PH∆Ø∆†NG PH√ÅP")
print("="*65)

results = {
    'Method 1 - Enhanced Augmentation':        62.93,
    'Method 2 - SE Attention CNN':             64.22,
    'Method 3 - MobileNetV2':                  36.28,
    'Method 4 - CBAM (Standard)':  test_acc * 100,
    'Method 4 - CBAM (TTA)':       tta_accuracy * 100,
}

for method, acc in results.items():
    bar = '‚ñà' * int(acc / 2)
    print(f"   {method:<42s} | {acc:6.2f}% | {bar}")

print("="*65)
print(f"\nüèÜ Improvement over Method 2: +{(tta_accuracy*100 - 64.22):.2f}%")

# Save final model
FINAL_MODEL_PATH = '/content/drive/MyDrive/CaptoneProject/best_model_method4.keras'
best_model.save(FINAL_MODEL_PATH)
print(f"\nüíæ Model saved to: {FINAL_MODEL_PATH}")

print("\n" + "="*60)
print("üìù TRAINING SUMMARY")
print("="*60)
print(f"   Architecture: CNN 4-Block + CBAM Attention")
print(f"   Loss: Focal Loss (gamma=2.0, alpha=0.25)")
print(f"   Label Smoothing: {LABEL_SMOOTHING}")
print(f"   LR: Cosine Annealing ({INITIAL_LR} ‚Üí 1e-6)")
print(f"   Augmentation: Enhanced (rot=25¬∞, shift=0.2, brightness)")
print(f"   Params: {model.count_params():,}")
print(f"   Best Val Acc: {max(history.history['val_accuracy'])*100:.2f}%")
print(f"   Test Acc (Standard): {test_acc*100:.2f}%")
print(f"   Test Acc (TTA): {tta_accuracy*100:.2f}%")
print("="*60)
