In [4]:
# Import necessary libraries
import os
import zipfile
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, regularizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
import matplotlib.pyplot as plt

# Step 1: Unzip the dataset
zip_path = "/content/FER-2013.zip"  # Change this to your actual zip file name
extract_path = "dataset/"  # Folder where files will be extracted

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Dataset successfully extracted!")

# Step 2: Check if the dataset folder exists
dataset_path = "dataset/"
if os.path.exists(dataset_path):
    print("Dataset folder found! Listing files:")
    print(os.listdir(dataset_path))
else:
    print("Dataset folder NOT found. Check if extraction was successful.")

# Step 3: Define dataset paths
train_dir = "dataset/train/"
test_dir = "dataset/test/"

# Step 4: Advanced Data Augmentation (CutOut + MixUp)
def cutout(image, mask_size=16):
    h, w = image.shape[0], image.shape[1]
    mask = np.ones((h, w))
    y = np.random.randint(h)
    x = np.random.randint(w)
    y1 = np.clip(y - mask_size // 2, 0, h)
    y2 = np.clip(y + mask_size // 2, 0, h)
    x1 = np.clip(x - mask_size // 2, 0, w)
    x2 = np.clip(x + mask_size // 2, 0, w)
    mask[y1:y2, x1:x2] = 0
    return image * mask[:, :, np.newaxis]

def mixup(images, labels, alpha=0.2):
    batch_size = images.shape[0]
    indices = np.random.permutation(batch_size)
    lam = np.random.beta(alpha, alpha)
    mixed_images = lam * images + (1 - lam) * images[indices]
    mixed_labels = lam * labels + (1 - lam) * labels[indices]
    return mixed_images, mixed_labels

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    preprocessing_function=cutout  # Apply CutOut
)

test_datagen = ImageDataGenerator(rescale=1./255)

def mixup_generator(generator, alpha=0.2):
    while True:  # Loop indefinitely
        for images, labels in generator:
            mixed_images, mixed_labels = mixup(images, labels, alpha)
            yield mixed_images, mixed_labels
        generator.on_epoch_end()  # Reset the generator after each epoch

# Load training images (grayscale, 48x48)
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(48, 48),
    batch_size=32,
    color_mode="grayscale",
    class_mode="categorical"
)

# Load testing images (grayscale, 48x48)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(48, 48),
    batch_size=32,
    color_mode="grayscale",
    class_mode="categorical"
)

# Use MixUp-augmented generator for training
train_generator_mixup = mixup_generator(train_generator)

# Step 5: Define the Custom CNN Model
def create_custom_cnn(input_shape=(48, 48, 1), num_classes=7):
    model = tf.keras.Sequential([
        # First Convolutional Block
        layers.Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.3),

        # Second Convolutional Block
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.4),

        # Third Convolutional Block
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.5),

        # Fully Connected Layers
        layers.Flatten(),
        layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
        layers.BatchNormalization(),
        layers.Dropout(0.5),

        # Output Layer
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Step 6: Create and Compile the Model
model = create_custom_cnn(input_shape=(48, 48, 1), num_classes=7)
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=0.001, weight_decay=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Print model summary
model.summary()

# Step 7: Define Callbacks
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6),
    TensorBoard(log_dir='./logs'),  # For visualization
    ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True, mode='max')  # Save best model
]

# Step 8: Train the Model
history = model.fit(
    train_generator_mixup,  # Use MixUp-augmented generator
    steps_per_epoch=len(train_generator),  # Explicitly set steps per epoch
    epochs=50,  # Train for up to 50 epochs
    validation_data=test_generator,
    callbacks=callbacks
)

# Step 9: Save the Trained Model
model.save("custom_cnn_fer2013.keras")
print("✅ Model training complete!")

# Step 10: Evaluate the Model
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Dataset successfully extracted!
Dataset folder found! Listing files:
['test', 'train']
Found 28709 images belonging to 7 classes.
Found 7178 images belonging to 7 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.2036 - loss: 2.9464

  self._warn_if_super_not_called()


[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 40ms/step - accuracy: 0.2036 - loss: 2.9461 - val_accuracy: 0.2722 - val_loss: 2.2522 - learning_rate: 0.0010
Epoch 2/50
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 32ms/step - accuracy: 0.2664 - loss: 2.1574 - val_accuracy: 0.3277 - val_loss: 1.9261 - learning_rate: 0.0010
Epoch 3/50
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 32ms/step - accuracy: 0.3083 - loss: 2.0297 - val_accuracy: 0.2335 - val_loss: 2.2021 - learning_rate: 0.0010
Epoch 4/50
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 34ms/step - accuracy: 0.3370 - loss: 2.0309 - val_accuracy: 0.3785 - val_loss: 2.0368 - learning_rate: 0.0010
Epoch 5/50
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 39ms/step - accuracy: 0.3670 - loss: 2.0259 - val_accuracy: 0.4228 - val_loss: 1.8228 - learning_rate: 0.