In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Chemin vers votre fichier .zip sur Google Drive
zip_path = "/content/drive/MyDrive/AI_Datasets/combined_dataset_96.zip"

# Dossier de destination dans l'environnement Colab
extract_path = "/content/datasets/"

# Commande de décompression
!unzip -q "{zip_path}" -d "{extract_path}"

print("Dataset décompressé avec succès !")

Dataset décompressé avec succès !


In [None]:
# ==============================================================================
# UPGRADED SCRIPT: AIMING FOR 70%+ ACCURACY ON KAGGLE (COMPLETE & CORRECTED)
# ==============================================================================
#
# STRATEGY:
# 1. Using a more powerful EfficientNetV2B2 base model.
# 2. Using larger 192x192 images for more detail.
# 3. Using the robust AdamW optimizer to ensure stability.
# 4. Deeper fine-tuning for better adaptation to the dataset.
#
# ==============================================================================

# --- 1. SETUP AND IMPORTS ---
import os
import gc
import tensorflow as tf
from tensorflow.keras import layers, models, applications, regularizers
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import AdamW
import matplotlib.pyplot as plt

print(f"TensorFlow Version: {tf.__version__}")

# --- 2. DATASET DOWNLOAD FROM GOOGLE DRIVE ---
!pip install -q gdown
gdrive_link = "https://drive.google.com/file/d/1oaJcF-Oe-81OD9wp16VExOJgLuc8vU94/view?usp=drive_link"
output_zip_path = "/kaggle/working/dataset.zip"
extract_path = "/kaggle/working/datasets/"
print("📂 Téléchargement du dataset depuis Google Drive...")
!gdown --fuzzy "{gdrive_link}" -O "{output_zip_path}"
print("📦 Décompression du dataset...")
!unzip -q -o "{output_zip_path}" -d "{extract_path}"
print(f"✅ Dataset prêt dans {extract_path}")


# --- 3. CONFIGURATION AND OPTIMIZATIONS ---

# --- AMÉLIORATION : Images plus grandes pour de meilleurs résultats ---
IMG_SIZE = 192
BATCH_SIZE = 32 # BATCH_SIZE de 32 est bon pour les GPU de Kaggle
CHANNELS = 3
EPOCHS_PHASE_1 = 15
EPOCHS_PHASE_2 = 30
AUTOTUNE = tf.data.AUTOTUNE

# --- Paths configured for Kaggle ---
TRAIN_DIR = os.path.join(extract_path, "train")
TEST_DIR = os.path.join(extract_path, "test")
CHECKPOINT_DIR = "/kaggle/working/AI_Checkpoints"
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
CHECKPOINT_PATH = os.path.join(CHECKPOINT_DIR, "emotion_efficientnetv2B2.weights.h5")

# --- Performance Optimization ---
tf.keras.mixed_precision.set_global_policy('mixed_float16')
print("\n✅ Mixed Precision Training Enabled.")
print(f"📐 Image Resolution: {IMG_SIZE}x{IMG_SIZE}")
print(f"📂 Chemin d'entraînement : {TRAIN_DIR}")
print(f"💾 Chemin de sauvegarde : {CHECKPOINT_DIR}")


# --- 4. DATA PREPARATION AND AUGMENTATION ---

# --- AMÉLIORATION : Ajout de RandomTranslation pour plus de robustesse ---
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
    layers.RandomContrast(0.1),
], name="data_augmentation")

def create_dataset(directory, augment=False):
    """Loads, preprocesses, and augments data, returning the dataset and class names."""
    initial_ds = tf.keras.utils.image_dataset_from_directory(
        directory,
        label_mode='categorical',
        image_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        shuffle=True if augment else False
    )
    class_names = initial_ds.class_names
    dataset = initial_ds
    if augment:
        dataset = dataset.map(
            lambda x, y: (data_augmentation(x, training=True), y),
            num_parallel_calls=AUTOTUNE
        )
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    return dataset, class_names

print("\nLoading and preparing datasets...")
train_dataset, class_names = create_dataset(TRAIN_DIR, augment=True)
test_dataset, _ = create_dataset(TEST_DIR, augment=False)
NUM_CLASSES = len(class_names)
print(f"✅ Found {NUM_CLASSES} classes: {class_names}")


# --- 5. MODEL DEFINITION (TRANSFER LEARNING) ---

def build_model(input_shape, num_classes):
    """Builds the model using a more powerful EfficientNetV2B2 as a base."""
    # --- AMÉLIORATION : Utilisation du modèle B2, plus performant ---
    base_model = applications.EfficientNetV2B2(
        input_shape=input_shape,
        include_top=False,
        weights="imagenet"
    )
    base_model.trainable = False
    
    inputs = layers.Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    
    # Une tête simple mais efficace
    x = layers.Dense(512, activation="gelu", kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    
    outputs = layers.Dense(num_classes, activation="softmax", dtype=tf.float32)(x)
    model = models.Model(inputs, outputs)
    return model

print("\nBuilding model with EfficientNetV2B2 base...")
INPUT_SHAPE = (IMG_SIZE, IMG_SIZE, CHANNELS)
model = build_model(INPUT_SHAPE, NUM_CLASSES)
model.summary()


# --- 6. TRAINING STRATEGY (TWO-PHASE FINE-TUNING) ---

callbacks = [
    EarlyStopping(monitor='val_accuracy', patience=7, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, min_lr=1e-7, verbose=1),
    ModelCheckpoint(filepath=CHECKPOINT_PATH, monitor='val_accuracy', save_best_only=True,
                    save_weights_only=True, mode='max', verbose=1)
]

# --- PHASE 1: Feature Extraction ---
print("\n" + "="*50)
print("🚀 PHASE 1: Training the Classification Head")
print("="*50)
# --- AMÉLIORATION : Optimiseur AdamW avec un learning rate sûr pour la stabilité ---
model.compile(
    optimizer=AdamW(learning_rate=5e-4, weight_decay=1e-4, clipnorm=1.0),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)
history_phase1 = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=EPOCHS_PHASE_1,
    callbacks=callbacks
)

# --- PHASE 2: Fine-Tuning ---
print("\n" + "="*50)
print("🔧 PHASE 2: Deeper Fine-Tuning")
print("="*50)

base_model = model.layers[1]
base_model.trainable = True

# --- AMÉLIORATION : Dégeler les derniers 40% des couches pour un fine-tuning plus profond ---
fine_tune_at = int(len(base_model.layers) * 0.60)
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

# --- CORRECTION DE LA LIGNE INCOMPLÈTE ---
print(f"🔓 Unfrozen {len(base_model.layers) - fine_tune_at} layers out of {len(base_model.layers)} total.")

# On recompile le modèle avec un taux d'apprentissage très bas pour le fine-tuning
model.compile(
    optimizer=AdamW(learning_rate=2e-5, weight_decay=1e-5, clipnorm=1.0),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

history_phase2 = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=EPOCHS_PHASE_2,
    initial_epoch=history_phase1.epoch[-1] if history_phase1.epoch else 0,
    callbacks=callbacks
)

# --- 7. FINAL EVALUATION AND SAVING ---
print("\n" + "="*50)
print("📊 FINAL EVALUATION")
print("="*50)

if os.path.exists(CHECKPOINT_PATH):
    model.load_weights(CHECKPOINT_PATH)
    print("✅ Best weights loaded from checkpoint for final evaluation.")

test_loss, test_acc = model.evaluate(test_dataset, verbose=1)
print(f"\n🎯 Final Test Accuracy: {test_acc*100:.2f}%")
print(f"📉 Final Test Loss: {test_loss:.4f}")

final_model_name = f"emotion_model_final_acc_{test_acc*100:.2f}.keras"
final_model_path = os.path.join(CHECKPOINT_DIR, final_model_name)
model.save(final_model_path)
print(f"💾 Model saved to: {final_model_path}")

gc.collect()

🔧 Configuration optimisée pour la mémoire activée.
📐 Taille d'image : 128x128
📦 Batch size : 16
📂 Chargement des datasets...
Found 41882 files belonging to 7 classes.
Found 10246 files belonging to 7 classes.
✅ Classes trouvées : ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
🏗️ Création du modèle optimisé...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/efficientnet_v2/efficientnetv2-b0_notop.h5
[1m24274472/24274472[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step



🚀 PHASE 1 : Entraînement de la tête
Epoch 1/15
[1m2618/2618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.4162 - loss: 1.7586
Epoch 1: val_accuracy improved from -inf to 0.51737, saving model to /content/drive/MyDrive/AI_Checkpoints/emotion_model_optimized.weights.h5
[1m2618/2618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 24ms/step - accuracy: 0.4162 - loss: 1.7585 - val_accuracy: 0.5174 - val_loss: 1.2749 - learning_rate: 0.0010
Epoch 2/15
[1m2618/2618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4955 - loss: 1.3533
Epoch 2: val_accuracy improved from 0.51737 to 0.53094, saving model to /content/drive/MyDrive/AI_Checkpoints/emotion_model_optimized.weights.h5
[1m2618/2618[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 17ms/step - accuracy: 0.4955 - loss: 1.3533 - val_accuracy: 0.5309 - val_loss: 1.2528 - learning_rate: 0.0010
Epoch 3/15
[1m2614/2618[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m

2314