# Method 4 FIXED: Optimized CBAM CNN - FER2013

## üîç Ph√¢n t√≠ch v·∫•n ƒë·ªÅ t·ª´ l·∫ßn ch·∫°y tr∆∞·ªõc:
- **Val Accuracy ch·ªâ ƒë·∫°t 46.3%** ‚Äî Th·∫•p h∆°n Method 2 (64.22%)
- **Nguy√™n nh√¢n 1**: Focal Loss (gamma=2.0) + Label Smoothing (0.15) qu√° aggressive ‚Üí gradient r·∫•t nh·ªè ‚Üí model h·ªçc ch·∫≠m/k√©m
- **Nguy√™n nh√¢n 2**: Cosine Annealing LR v·ªõi min_lr=1e-6 ‚Üí LR gi·∫£m qu√° nhanh ‚Üí model b·ªã underfitting s·ªõm
- **B·∫±ng ch·ª©ng**: Epoch 1 val_accuracy=7.4% (b√¨nh th∆∞·ªùng ph·∫£i ~14%), val_accuracy dao ƒë·ªông r·∫•t m·∫°nh

## ‚úÖ Fix √°p d·ª•ng trong version n√†y:
| Th√†nh ph·∫ßn | Tr∆∞·ªõc (l·ªói) | Sau (fix) | L√Ω do |
|---|---|---|---|
| **Loss** | FocalLoss(gamma=2.0, Œ±=0.25) | CrossEntropy + label_smoothing=0.1 | FL qu√° aggressive, gradient b·ªã tri·ªát ti√™u |
| **LR Schedule** | Cosine Annealing (min=1e-6) | ReduceLROnPlateau (factor=0.5, patience=5) | Th√≠ch nghi v·ªõi th·ª±c t·∫ø training t·ªët h∆°n |
| **Initial LR** | 0.001 | 0.001 | Gi·ªØ nguy√™n |
| **Label Smoothing** | 0.15 | 0.1 | Gi·∫£m ƒë·ªÉ gradient ƒë·ªß l·ªõn |
| **Architecture** | CBAM 4-Block | CBAM 4-Block | **GI·ªÆ NGUY√äN** ‚Äî kh√¥ng ph·∫£i nguy√™n nh√¢n |
| **Dropout** | 0.25/0.3/0.5 | 0.25/0.3/0.5 | **GI·ªÆ NGUY√äN** |
| **Class Weights** | C√≥ | C√≥ | **GI·ªÆ NGUY√äN** |
| **EarlyStopping** | patience=15 | patience=15 | **GI·ªÆ NGUY√äN** |

In [ ]:
# ================================================
# CELL 1: SETUP & GPU CHECK
# ================================================
!nvidia-smi

import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
print(f"TensorFlow: {tf.__version__}")
print(f"GPUs: {gpus}")

if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    print("‚úÖ GPU ENABLED!")
else:
    print("‚ùå NO GPU! Go to Runtime ‚Üí Change runtime type ‚Üí GPU")


In [ ]:
# ================================================
# CELL 2: IMPORT LIBRARIES
# ================================================
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import pickle
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, regularizers, backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import (
    EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, Callback
)
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

print("‚úÖ Libraries imported!")


In [ ]:
# ================================================
# CELL 3: MOUNT DRIVE
# ================================================
from google.colab import drive
drive.mount('/content/drive')
print("‚úÖ Drive mounted!")


In [ ]:
# ================================================
# CELL 4: EXTRACT DATASET
# ================================================
ZIP_PATH = '/content/drive/MyDrive/CaptoneProject/camera.zip'
LOCAL_PATH = '/content/dataset'

if not os.path.exists(LOCAL_PATH):
    if os.path.exists(ZIP_PATH):
        print("üì¶ Unzipping...")
        !unzip -q -o "{ZIP_PATH}" -d /content/
        if os.path.exists('/content/camera'):
            !mv /content/camera "{LOCAL_PATH}"
        elif os.path.exists('/content/train') and os.path.exists('/content/test'):
            os.makedirs(LOCAL_PATH, exist_ok=True)
            !mv /content/train "{LOCAL_PATH}/train"
            !mv /content/test "{LOCAL_PATH}/test"
        print("‚úÖ Dataset ready at /content/dataset")
    else:
        print("‚ùå ZIP file not found in Drive!")
else:
    print("‚úÖ Dataset already exists locally!")

TRAIN_DIR = os.path.join(LOCAL_PATH, 'train')
TEST_DIR  = os.path.join(LOCAL_PATH, 'test')
print(f"Train: {TRAIN_DIR}")
print(f"Test:  {TEST_DIR}")


In [ ]:
# ================================================
# CELL 5: CONFIG
# ================================================
IMG_SIZE        = 48
BATCH_SIZE      = 64
EPOCHS          = 100      # TƒÉng l√™n 100, EarlyStopping s·∫Ω d·ª´ng ƒë√∫ng l√∫c
INITIAL_LR      = 0.001
NUM_CLASSES     = 7
EMOTIONS        = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
SEED            = 42
LABEL_SMOOTHING = 0.1      # FIX: Gi·∫£m t·ª´ 0.15 ‚Üí 0.1

np.random.seed(SEED)
tf.random.set_seed(SEED)

CHECKPOINT_DIR  = '/content/drive/MyDrive/CaptoneProject/checkpoints/method4_fixed'
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
BEST_MODEL_PATH = f'{CHECKPOINT_DIR}/best_model.keras'
HISTORY_PATH    = f'{CHECKPOINT_DIR}/history.pkl'

print("‚úÖ Config set!")
print(f"   EPOCHS={EPOCHS}, LR={INITIAL_LR}, LABEL_SMOOTHING={LABEL_SMOOTHING}, BATCH={BATCH_SIZE}")
print(f"   Loss: CategoricalCrossentropy (kh√¥ng d√πng Focal Loss)")
print(f"   LR Schedule: ReduceLROnPlateau (thay Cosine Annealing)")


In [ ]:
# ================================================
# CELL 6: DATA AUGMENTATION (Gi·ªØ nguy√™n t·ª´ l·∫ßn tr∆∞·ªõc)
# ================================================
train_datagen = ImageDataGenerator(
    rescale           = 1./255,
    rotation_range    = 25,
    width_shift_range = 0.2,
    height_shift_range= 0.2,
    shear_range       = 0.2,
    zoom_range        = 0.2,
    horizontal_flip   = True,
    brightness_range  = [0.7, 1.3],
    fill_mode         = 'nearest',
    validation_split  = 0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR, target_size=(IMG_SIZE, IMG_SIZE), color_mode='grayscale',
    batch_size=BATCH_SIZE, class_mode='categorical',
    subset='training', shuffle=True, seed=SEED
)
validation_generator = train_datagen.flow_from_directory(
    TRAIN_DIR, target_size=(IMG_SIZE, IMG_SIZE), color_mode='grayscale',
    batch_size=BATCH_SIZE, class_mode='categorical',
    subset='validation', shuffle=False, seed=SEED
)
test_generator = test_datagen.flow_from_directory(
    TEST_DIR, target_size=(IMG_SIZE, IMG_SIZE), color_mode='grayscale',
    batch_size=BATCH_SIZE, class_mode='categorical', shuffle=False
)

print("‚úÖ Data Generators ready!")
print(f"   Train: {train_generator.samples} images")
print(f"   Val:   {validation_generator.samples} images")
print(f"   Test:  {test_generator.samples} images")


In [ ]:
# ================================================
# CELL 7: CLASS WEIGHTS (Gi·ªØ nguy√™n)
# ================================================
train_labels = train_generator.classes
class_weights_array = compute_class_weight(
    'balanced', classes=np.unique(train_labels), y=train_labels
)
class_weights = dict(enumerate(class_weights_array))

print("‚öñÔ∏è Class Weights (b√π m·∫•t c√¢n b·∫±ng d·ªØ li·ªáu):")
for i, emotion in enumerate(EMOTIONS):
    print(f"   {emotion:10s}: {class_weights[i]:.4f}")


In [ ]:
# ================================================
# CELL 8: CBAM ATTENTION + BUILD MODEL
# (Architecture gi·ªØ nguy√™n, ch·ªâ fix Loss & LR)
# ================================================

# -------------------- CBAM ATTENTION --------------------
class ChannelAttention(layers.Layer):
    """Channel Attention: d√πng c·∫£ AvgPool + MaxPool (c·∫£i ti·∫øn t·ª´ SE-Block)"""
    def __init__(self, ratio=8, **kwargs):
        super().__init__(**kwargs)
        self.ratio = ratio

    def build(self, input_shape):
        ch = input_shape[-1]
        self.dense1 = layers.Dense(ch // self.ratio, activation='relu',
                                   kernel_initializer='he_normal')
        self.dense2 = layers.Dense(ch, kernel_initializer='he_normal')
        self.gap = layers.GlobalAveragePooling2D()
        self.gmp = layers.GlobalMaxPooling2D()
        super().build(input_shape)

    def call(self, x):
        ch = x.shape[-1]
        avg = self.dense2(self.dense1(self.gap(x)))
        mx  = self.dense2(self.dense1(self.gmp(x)))
        att = tf.sigmoid(avg + mx)
        return x * tf.reshape(att, (-1, 1, 1, ch))

    def get_config(self):
        config = super().get_config()
        config.update({'ratio': self.ratio})
        return config


class SpatialAttention(layers.Layer):
    """Spatial Attention: focus v√†o v√πng quan tr·ªçng (m·∫Øt, mi·ªáng...)"""
    def __init__(self, kernel_size=7, **kwargs):
        super().__init__(**kwargs)
        self.kernel_size = kernel_size

    def build(self, input_shape):
        self.conv = layers.Conv2D(1, self.kernel_size, padding='same',
                                  activation='sigmoid',
                                  kernel_initializer='he_normal')
        super().build(input_shape)

    def call(self, x):
        avg = tf.reduce_mean(x, axis=-1, keepdims=True)
        mx  = tf.reduce_max (x, axis=-1, keepdims=True)
        att = self.conv(tf.concat([avg, mx], axis=-1))
        return x * att

    def get_config(self):
        config = super().get_config()
        config.update({'kernel_size': self.kernel_size})
        return config


class CBAMBlock(layers.Layer):
    """CBAM = Channel Attention ‚Üí Spatial Attention"""
    def __init__(self, ratio=8, kernel_size=7, **kwargs):
        super().__init__(**kwargs)
        self.ratio = ratio
        self.kernel_size = kernel_size
        self.ca = ChannelAttention(ratio=ratio)
        self.sa = SpatialAttention(kernel_size=kernel_size)

    def call(self, x):
        return self.sa(self.ca(x))

    def get_config(self):
        config = super().get_config()
        config.update({'ratio': self.ratio, 'kernel_size': self.kernel_size})
        return config


# -------------------- BUILD MODEL --------------------
def build_cbam_cnn(input_shape=(48, 48, 1), num_classes=7):
    inputs = layers.Input(shape=input_shape)

    # Block 1: 64 filters ‚Äî 48x48 ‚Üí 24x24
    x = layers.Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(64, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = CBAMBlock(ratio=8)(x)
    x = layers.MaxPooling2D(2)(x)
    x = layers.Dropout(0.25)(x)

    # Block 2: 128 filters ‚Äî 24x24 ‚Üí 12x12
    x = layers.Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(128, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = CBAMBlock(ratio=8)(x)
    x = layers.MaxPooling2D(2)(x)
    x = layers.Dropout(0.25)(x)

    # Block 3: 256 filters ‚Äî 12x12 ‚Üí 6x6
    x = layers.Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(256, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = CBAMBlock(ratio=16)(x)
    x = layers.MaxPooling2D(2)(x)
    x = layers.Dropout(0.3)(x)

    # Block 4: 512 filters ‚Äî 6x6 ‚Üí 3x3
    x = layers.Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Conv2D(512, 3, padding='same', kernel_initializer='he_normal')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = CBAMBlock(ratio=16)(x)
    x = layers.MaxPooling2D(2)(x)
    x = layers.Dropout(0.3)(x)

    # Classifier Head
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, kernel_regularizer=regularizers.l2(0.0005))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, kernel_regularizer=regularizers.l2(0.0005))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    return models.Model(inputs=inputs, outputs=outputs)


model = build_cbam_cnn()
model.summary()
print(f"\n‚úÖ CBAM Model built! Params: {model.count_params():,}")


In [ ]:
# ================================================
# CELL 9: COMPILE
# FIX: D√πng CategoricalCrossentropy thay FocalLoss
# ================================================

# FIX 1: S·ª≠ d·ª•ng CategoricalCrossentropy v·ªõi label_smoothing=0.1
# (thay v√¨ FocalLoss gamma=2.0 alpha=0.25 ‚Äî qu√° aggressive)
loss_fn = keras.losses.CategoricalCrossentropy(
    label_smoothing=LABEL_SMOOTHING  # 0.1 ‚Äî ƒë·ªß ƒë·ªÉ regularize, kh√¥ng qu√° m·∫°nh
)

model.compile(
    optimizer=Adam(learning_rate=INITIAL_LR),
    loss=loss_fn,
    metrics=['accuracy']
)

print("‚úÖ Model compiled!")
print(f"   Loss: CategoricalCrossentropy (label_smoothing={LABEL_SMOOTHING})")
print(f"   Optimizer: Adam (lr={INITIAL_LR})")
print(f"   ‚ö†Ô∏è  KH√îNG d√πng FocalLoss ‚Äî nguy√™n nh√¢n g√¢y accuracy th·∫•p tr∆∞·ªõc ƒë√≥")


In [ ]:
# ================================================
# CELL 10: CALLBACKS
# FIX: ReduceLROnPlateau thay Cosine Annealing
# ================================================

class SaveHistoryCallback(Callback):
    """L∆∞u history sau m·ªói epoch ƒë·ªÉ kh√¥ng m·∫•t khi runtime crash"""
    def __init__(self, path):
        super().__init__()
        self.path = path
        self.data = {'accuracy': [], 'val_accuracy': [], 'loss': [], 'val_loss': [], 'lr': []}

    def on_epoch_end(self, epoch, logs=None):
        for k in ['accuracy', 'val_accuracy', 'loss', 'val_loss']:
            self.data[k].append(logs.get(k))
        try:
            lr_val = float(self.model.optimizer.learning_rate)
        except:
            lr_val = float(self.model.optimizer.learning_rate.numpy())
        self.data['lr'].append(lr_val)
        with open(self.path, 'wb') as f:
            pickle.dump(self.data, f)


callbacks = [
    # L∆∞u model t·ªët nh·∫•t
    ModelCheckpoint(
        BEST_MODEL_PATH,
        monitor='val_accuracy', save_best_only=True, mode='max', verbose=1
    ),
    # D·ª´ng s·ªõm n·∫øu kh√¥ng c·∫£i thi·ªán
    EarlyStopping(
        monitor='val_accuracy', patience=15, restore_best_weights=True, verbose=1
    ),
    # FIX 2: ReduceLROnPlateau ‚Äî gi·∫£m LR khi val_loss kh√¥ng c·∫£i thi·ªán
    # Th√≠ch nghi t·ªët h∆°n Cosine Annealing cho b√†i to√°n n√†y
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,       # Gi·∫£m LR c√≤n 50%
        patience=5,       # Sau 5 epoch kh√¥ng c·∫£i thi·ªán
        min_lr=1e-6,      # S√†n LR
        verbose=1
    ),
    # L∆∞u history
    SaveHistoryCallback(HISTORY_PATH)
]

print("‚úÖ Callbacks configured:")
print("   - ModelCheckpoint (monitor=val_accuracy)")
print("   - EarlyStopping (patience=15)")
print("   - ReduceLROnPlateau (factor=0.5, patience=5) ‚Üê FIX t·ª´ Cosine Annealing")
print("   - SaveHistory")


In [ ]:
# ================================================
# CELL 11: TRAINING üöÄ
# ================================================

print("üöÄ Starting Training (Method 4 FIXED)...")
print(f"   Epochs={EPOCHS}, Batch={BATCH_SIZE}")
print(f"   Train={train_generator.samples}, Val={validation_generator.samples}")
print(f"   Loss: CategoricalCrossentropy (label_smoothing={LABEL_SMOOTHING})")
print(f"   LR: ReduceLROnPlateau (init={INITIAL_LR})")
print("=" * 60)

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=validation_generator,
    callbacks=callbacks,
    class_weight=class_weights,
    verbose=1
)

print("\n‚úÖ Training Completed!")
best_val_acc = max(history.history['val_accuracy'])
best_epoch   = history.history['val_accuracy'].index(best_val_acc) + 1
print(f"   Best Val Accuracy: {best_val_acc*100:.2f}% (Epoch {best_epoch})")


In [ ]:
# ================================================
# CELL 12: TRAINING VISUALIZATION
# ================================================

fig, axes = plt.subplots(1, 3, figsize=(20, 5))

# Accuracy
axes[0].plot(history.history['accuracy'],     label='Train', linewidth=2)
axes[0].plot(history.history['val_accuracy'], label='Val',   linewidth=2)
best_val_acc = max(history.history['val_accuracy'])
axes[0].axhline(best_val_acc, color='red', linestyle='--', alpha=0.5,
                label=f'Best Val={best_val_acc*100:.2f}%')
axes[0].set_title('Accuracy', fontsize=14)
axes[0].set_xlabel('Epoch'); axes[0].set_ylabel('Accuracy')
axes[0].legend(); axes[0].grid(True, alpha=0.3)

# Loss
axes[1].plot(history.history['loss'],     label='Train', linewidth=2)
axes[1].plot(history.history['val_loss'], label='Val',   linewidth=2)
axes[1].set_title('Loss', fontsize=14)
axes[1].set_xlabel('Epoch'); axes[1].set_ylabel('Loss')
axes[1].legend(); axes[1].grid(True, alpha=0.3)

# Learning Rate
try:
    with open(HISTORY_PATH, 'rb') as f:
        lr_vals = pickle.load(f).get('lr', [])
    if lr_vals:
        axes[2].plot(lr_vals, 'g-', linewidth=2)
        axes[2].set_title('Learning Rate (ReduceLROnPlateau)', fontsize=14)
        axes[2].set_xlabel('Epoch'); axes[2].set_ylabel('LR')
        axes[2].set_yscale('log')  # Log scale cho d·ªÖ nh√¨n
        axes[2].grid(True, alpha=0.3)
except Exception as e:
    print(f"LR plot error: {e}")

plt.tight_layout()
plt.savefig(f'{CHECKPOINT_DIR}/training_curves.png', dpi=150, bbox_inches='tight')
plt.show()

best_val_acc = max(history.history['val_accuracy'])
best_epoch   = history.history['val_accuracy'].index(best_val_acc) + 1
print(f"üìä Best Val Accuracy: {best_val_acc*100:.2f}% (Epoch {best_epoch}/{len(history.history['val_accuracy'])})")


In [ ]:
# ================================================
# CELL 13: STANDARD EVALUATION
# ================================================

print("üìä Standard Evaluation...")
best_model = keras.models.load_model(
    BEST_MODEL_PATH,
    custom_objects={
        'CBAMBlock':        CBAMBlock,
        'ChannelAttention': ChannelAttention,
        'SpatialAttention': SpatialAttention
    }
)

test_generator.reset()
test_loss, test_acc = best_model.evaluate(test_generator, verbose=1)
print(f"\nüéØ TEST ACCURACY (Standard): {test_acc*100:.2f}%")
print(f"   TEST LOSS: {test_loss:.4f}")

test_generator.reset()
predictions = best_model.predict(test_generator, verbose=1)
y_pred = np.argmax(predictions, axis=1)
y_true = test_generator.classes

print("\n" + "=" * 60)
print("üìã Classification Report (Standard):")
print("=" * 60)
print(classification_report(y_true, y_pred, target_names=EMOTIONS, digits=4))


In [ ]:
# ================================================
# CELL 14: TEST TIME AUGMENTATION (TTA)
# ================================================

def predict_with_tta(model, test_dir, img_size=48, batch_size=64, n_aug=5):
    """TTA: Trung b√¨nh d·ª± ƒëo√°n t·ª´ nhi·ªÅu augmented version"""
    tta_datagens = [
        ImageDataGenerator(rescale=1./255),
        ImageDataGenerator(rescale=1./255, horizontal_flip=True),
        ImageDataGenerator(rescale=1./255, rotation_range=10),
        ImageDataGenerator(rescale=1./255, zoom_range=0.1),
        ImageDataGenerator(rescale=1./255, width_shift_range=0.1, height_shift_range=0.1),
    ]
    all_preds = []
    for i, dg in enumerate(tta_datagens[:n_aug]):
        print(f"   TTA {i+1}/{n_aug}...", end=' ')
        gen = dg.flow_from_directory(
            test_dir, target_size=(img_size, img_size),
            color_mode='grayscale', batch_size=batch_size,
            class_mode='categorical', shuffle=False
        )
        preds = model.predict(gen, verbose=0)
        all_preds.append(preds)
        print(f"done")
    return np.mean(all_preds, axis=0)


print("üîÑ Running TTA (5 augmentations)...")
tta_preds     = predict_with_tta(best_model, TEST_DIR)
y_pred_tta    = np.argmax(tta_preds, axis=1)
y_true_tta    = test_generator.classes
tta_accuracy  = np.mean(y_pred_tta == y_true_tta)

print(f"\nüèÜ TEST ACCURACY (TTA): {tta_accuracy*100:.2f}%")
print("\n" + "=" * 60)
print("üìã Classification Report (TTA):")
print("=" * 60)
print(classification_report(y_true_tta, y_pred_tta, target_names=EMOTIONS, digits=4))


In [ ]:
# ================================================
# CELL 15: CONFUSION MATRIX
# ================================================

cm = confusion_matrix(y_true_tta, y_pred_tta)
fig, axes = plt.subplots(1, 2, figsize=(18, 7))

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=EMOTIONS, yticklabels=EMOTIONS, ax=axes[0])
axes[0].set_title('Confusion Matrix (Counts)', fontsize=14)
axes[0].set_ylabel('True'); axes[0].set_xlabel('Predicted')

cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm_norm, annot=True, fmt='.2%', cmap='Oranges',
            xticklabels=EMOTIONS, yticklabels=EMOTIONS, ax=axes[1])
axes[1].set_title('Confusion Matrix (Normalized %)', fontsize=14)
axes[1].set_ylabel('True'); axes[1].set_xlabel('Predicted')

plt.tight_layout()
plt.savefig(f'{CHECKPOINT_DIR}/confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()


In [ ]:
# ================================================
# CELL 16: SO S√ÅNH K·∫æT QU·∫¢ & SAVE MODEL
# ================================================

print("=" * 70)
print("üìä SO S√ÅNH K·∫æT QU·∫¢ C√ÅC PH∆Ø∆†NG PH√ÅP")
print("=" * 70)

results = {
    'Method 1 - Enhanced Augmentation':    62.93,
    'Method 2 - SE Attention CNN':         64.22,
    'Method 3 - MobileNetV2':              36.28,
    'Method 4 (l·ªói) - Focal+Cosine':      46.30,  # K·∫øt qu·∫£ c≈©
    'Method 4 (fixed) - Standard (TTA)':  tta_accuracy * 100,
}

print(f"{'Method':<48s} | {'Acc':>7s} | Bar")
print("-" * 70)
for method, acc in results.items():
    bar   = '‚ñà' * int(acc / 2)
    arrow = ' ‚Üê C·∫¢I THI·ªÜN!' if 'fixed' in method else ''
    print(f"  {method:<46s} | {acc:6.2f}% | {bar}{arrow}")

print("=" * 70)
improvement = tta_accuracy * 100 - 64.22
sign = '+' if improvement >= 0 else ''
print(f"\n   So v·ªõi Method 2 (best tr∆∞·ªõc ƒë√¢y): {sign}{improvement:.2f}%")
print(f"   So v·ªõi Method 4 l·ªói (46.3%):      +{tta_accuracy*100 - 46.30:.2f}%")

# Save
FINAL_MODEL_PATH = '/content/drive/MyDrive/CaptoneProject/best_model_method4_fixed.keras'
best_model.save(FINAL_MODEL_PATH)
print(f"\nüíæ Model saved: {FINAL_MODEL_PATH}")

# Summary
print("\n" + "=" * 60)
print("üìù TRAINING SUMMARY")
print("=" * 60)
best_val_acc = max(history.history['val_accuracy'])
print(f"   Architecture:   CNN 4-Block + CBAM Attention")
print(f"   Loss:           CategoricalCrossentropy + label_smoothing={LABEL_SMOOTHING}")
print(f"   LR Schedule:    ReduceLROnPlateau (factor=0.5, patience=5)")
print(f"   Augmentation:   Enhanced (rot=25¬∞, shift=0.2, brightness)")
print(f"   Total Params:   {model.count_params():,}")
print(f"   Best Val Acc:   {best_val_acc*100:.2f}%")
print(f"   Test Acc (Std): {test_acc*100:.2f}%")
print(f"   Test Acc (TTA): {tta_accuracy*100:.2f}%")
print("=" * 60)
