In [9]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score

# Define categories for facial emotions
categories = ['angry', 'fear', 'happy', 'neutral', 'sad', 'surprise', 'disgust']

# Set up image parameters
img_size = 48  # Size of input images
batch_size = 64  # Number of images per batch
train_path = 'train/'  # Path to training data
test_path = 'test/'  # Path to test data

def create_data_generators():
    """
    Create and return data generators for training and validation.
    
    This function sets up data augmentation for training data to increase
    model robustness and preprocessing for validation data.
    
    Returns:
    - train_generator: ImageDataGenerator for training data
    - validation_generator: ImageDataGenerator for validation data
    """
    # Data augmentation for training data
    datagen_train = keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,  # Normalize pixel values
        rotation_range=20,  # Randomly rotate images
        width_shift_range=0.2,  # Randomly shift image horizontally
        height_shift_range=0.2,  # Randomly shift image vertically
        zoom_range=0.2,  # Randomly zoom image
        horizontal_flip=True,  # Randomly flip images horizontally
        brightness_range=[0.8, 1.2],  # Randomly adjust brightness
        shear_range=0.1,  # Apply shearing transformations
        fill_mode='nearest'  # Fill in newly created pixels
    )

    # Minimal augmentation for validation data
    datagen_validation = keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,  # Normalize pixel values
        rotation_range=10,  # Slight rotation for validation
        width_shift_range=0.1,  # Slight horizontal shift
        height_shift_range=0.1,  # Slight vertical shift
        zoom_range=0.1  # Slight zoom
    )

    # Create generator for training data
    train_generator = datagen_train.flow_from_directory(
        train_path,
        target_size=(img_size, img_size),
        color_mode="grayscale",
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True
    )

    # Create generator for validation data
    validation_generator = datagen_validation.flow_from_directory(
        test_path,
        target_size=(img_size, img_size),
        color_mode="grayscale",
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )

    return train_generator, validation_generator

# Model Architectures

def create_enhanced_model():
    """
    Create and return an enhanced model using EfficientNetB0 as the base.
    
    This model uses transfer learning from EfficientNetB0 pre-trained on ImageNet.
    
    Returns:
    - model: Compiled Keras model
    """
    # Load pre-trained EfficientNetB0 model
    base_model = keras.applications.EfficientNetB0(
        weights='imagenet', 
        include_top=False, 
        input_shape=(img_size, img_size, 3)
    )
    base_model.trainable = False  # Freeze the base model

    # Build the model
    inputs = keras.Input(shape=(img_size, img_size, 1))
    x = keras.layers.Conv2D(3, (1, 1))(inputs)  # Convert grayscale to RGB
    x = base_model(x)
    x = keras.layers.GlobalAveragePooling2D()(x)
    x = keras.layers.Dense(128, activation='relu')(x)
    x = keras.layers.Dropout(0.5)(x)
    outputs = keras.layers.Dense(len(categories), activation='softmax')(x)

    model = keras.Model(inputs, outputs, name='EnhancedModel')
    return model

def create_cnn_model():
    """
    Create and return a simple CNN model.
    
    Returns:
    - model: Compiled Keras sequential model
    """
    model = keras.Sequential([
        keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_size, img_size, 1)),
        keras.layers.MaxPooling2D((2, 2)),
        keras.layers.Conv2D(64, (3, 3), activation='relu'),
        keras.layers.MaxPooling2D((2, 2)),
        keras.layers.Conv2D(64, (3, 3), activation='relu'),
        keras.layers.Flatten(),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(len(categories), activation='softmax')
    ], name='CNNModel')
    return model

def residual_block(x, filters, kernel_size=3, stride=1):
    """
    Create a residual block for the ResNet model.
    
    Args:
    - x: Input tensor
    - filters: Number of filters in the convolutional layer
    - kernel_size: Size of the convolutional kernel
    - stride: Stride for the convolutional layer
    
    Returns:
    - x: Output tensor after applying the residual block
    """
    shortcut = x
    x = keras.layers.Conv2D(filters, kernel_size, strides=stride, padding='same')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation('relu')(x)
    x = keras.layers.Conv2D(filters, kernel_size, padding='same')(x)
    x = keras.layers.BatchNormalization()(x)
    if stride != 1 or shortcut.shape[-1] != filters:
        shortcut = keras.layers.Conv2D(filters, 1, strides=stride, padding='same')(shortcut)
        shortcut = keras.layers.BatchNormalization()(shortcut)
    x = keras.layers.Add()([x, shortcut])
    x = keras.layers.Activation('relu')(x)
    return x

def create_resnet_model():
    """
    Create and return a ResNet-like model.
    
    Returns:
    - model: Compiled Keras model with ResNet architecture
    """
    inputs = keras.Input(shape=(img_size, img_size, 1))
    x = keras.layers.Conv2D(64, 7, strides=2, padding='same')(inputs)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation('relu')(x)
    x = keras.layers.MaxPooling2D(3, strides=2, padding='same')(x)
    x = residual_block(x, 64)
    x = residual_block(x, 128, stride=2)
    x = residual_block(x, 256, stride=2)
    x = keras.layers.GlobalAveragePooling2D()(x)
    outputs = keras.layers.Dense(len(categories), activation='softmax')(x)
    model = keras.Model(inputs, outputs, name='ResNetModel')
    return model

def inception_module(x, filters):
    """
    Create an Inception module for the Inception model.
    
    Args:
    - x: Input tensor
    - filters: Number of filters for each convolution branch
    
    Returns:
    - x: Output tensor after applying the Inception module
    """
    branch1x1 = keras.layers.Conv2D(filters, 1, activation='relu')(x)
    
    branch3x3 = keras.layers.Conv2D(filters, 1, activation='relu')(x)
    branch3x3 = keras.layers.Conv2D(filters, 3, padding='same', activation='relu')(branch3x3)
    
    branch5x5 = keras.layers.Conv2D(filters, 1, activation='relu')(x)
    branch5x5 = keras.layers.Conv2D(filters, 5, padding='same', activation='relu')(branch5x5)
    
    branch_pool = keras.layers.MaxPooling2D(3, strides=1, padding='same')(x)
    branch_pool = keras.layers.Conv2D(filters, 1, activation='relu')(branch_pool)
    
    return keras.layers.Concatenate()([branch1x1, branch3x3, branch5x5, branch_pool])

def create_inception_model():
    """
    Create and return an Inception-like model.
    
    Returns:
    - model: Compiled Keras model with Inception architecture
    """
    inputs = keras.Input(shape=(img_size, img_size, 1))
    x = keras.layers.Conv2D(32, 3, strides=2, padding='same', activation='relu')(inputs)
    x = keras.layers.MaxPooling2D(3, strides=2, padding='same')(x)
    x = inception_module(x, 64)
    x = inception_module(x, 120)
    x = keras.layers.GlobalAveragePooling2D()(x)
    outputs = keras.layers.Dense(len(categories), activation='softmax')(x)
    model = keras.Model(inputs, outputs, name='InceptionModel')
    return model

def attention_block(x, filters):
    """
    Create an attention block for the Attention model.
    
    Args:
    - x: Input tensor
    - filters: Number of filters in the convolutional layer
    
    Returns:
    - x: Output tensor after applying the attention mechanism
    """
    g = keras.layers.GlobalAveragePooling2D()(x)
    g = keras.layers.Reshape((1, 1, filters))(g)
    g = keras.layers.Conv2D(filters // 8, 1)(g)
    g = keras.layers.Conv2D(filters, 1, activation='sigmoid')(g)
    return keras.layers.Multiply()([x, g])

def create_attention_model():
    """
    Create and return an Attention-based model.
    
    Returns:
    - model: Compiled Keras model with Attention mechanism
    """
    inputs = keras.Input(shape=(img_size, img_size, 1))
    x = keras.layers.Conv2D(32, 3, activation='relu')(inputs)
    x = keras.layers.MaxPooling2D(2)(x)
    x = keras.layers.Conv2D(64, 3, activation='relu')(x)
    x = attention_block(x, 64)
    x = keras.layers.MaxPooling2D(2)(x)
    x = keras.layers.Conv2D(128, 3, activation='relu')(x)
    x = attention_block(x, 128)
    x = keras.layers.GlobalAveragePooling2D()(x)
    outputs = keras.layers.Dense(len(categories), activation='softmax')(x)
    model = keras.Model(inputs, outputs, name='AttentionModel')
    return model

# Training Function with Cross-Validation and Model Selection
def train_with_cross_validation(model_funcs, train_data, n_splits=5):
    """
    Train multiple models using K-fold cross-validation.
    
    Args:
    - model_funcs: List of functions to create different models
    - train_data: Training data generator
    - n_splits: Number of splits for cross-validation
    
    Returns:
    - best_model: The best performing model
    - best_scores: List of accuracy scores for the best model
    """
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    best_model = None
    best_scores = []
    best_avg_score = 0

    for model_func in model_funcs:
        print(f"Training {model_func.__name__}")
        all_scores = []
        for fold, (train_index, val_index) in enumerate(kf.split(train_data.filenames)):
            print(f"Training fold {fold + 1}")
            
            model = model_func()
            
            optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
            
            model.compile(
                optimizer=optimizer, 
                loss='categorical_crossentropy', 
                metrics=['accuracy']
            )
            
            # Train the model
            history = model.fit(
                train_data,
                epochs=2,  # Increased from 2 to 50
                validation_data=train_data,  # This should be a separate validation set
                callbacks=[
                    keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
                    keras.callbacks.ModelCheckpoint(f"best_{model_func.__name__}_fold_{fold}.keras", save_best_only=True)
                ]
            )
            
            # Evaluate the model
            scores = model.evaluate(train_data)  # This should be a separate validation set
            all_scores.append(scores[1])  # Append accuracy
        
        avg_score = np.mean(all_scores)
        print(f"Average accuracy for {model_func.__name__}: {avg_score}")
        
        if avg_score > best_avg_score:
            best_avg_score = avg_score
            best_model = model
            best_scores = all_scores
    
    print(f"Best model: {best_model.name} with average accuracy: {best_avg_score}")
    return best_model, best_scores

# Grad-CAM Visualization
def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    """
    Generate Grad-CAM heatmap for the given image and model.
    
    Args:
    - img_array: Input image as a numpy array
    - model: Trained Keras model
    - last_conv_layer_name: Name of the last convolutional layer in the model
    - pred_index: Index of the predicted class (if None, uses the highest scoring class)
    
    Returns:
    - heatmap: Grad-CAM heatmap as a numpy array
    """
    grad_model = keras.models.Model(
        [model.inputs], 
        [model.get_layer(last_conv_layer_name).output, model.output]
    )

    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    grads = tape.gradient(class_channel, last_conv_layer_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))

    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)

    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

def display_gradcam(img, heatmap, alpha=0.4):
    """
    Display the original image and its Grad-CAM heatmap overlay.
    
    Args:
    - img: Original input image
    - heatmap: Grad-CAM heatmap
    - alpha: Transparency of the heatmap overlay
    """
    heatmap = np.uint8(255 * heatmap)
    jet = plt.cm.get_cmap("jet")
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]
    jet_heatmap = keras.preprocessing.image.array_to_img(jet_heatmap)
    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
    jet_heatmap = keras.preprocessing.image.img_to_array(jet_heatmap)
    superimposed_img = jet_heatmap * alpha + img
    superimposed_img = keras.preprocessing.image.array_to_img(superimposed_img)
    
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(img)
    plt.title("Original Image")
    plt.axis('off')
    plt.subplot(1, 2, 2)
    plt.imshow(superimposed_img)
    plt.title("Grad-CAM")
    plt.axis('off')
    plt.show()

# Main execution
if __name__ == "__main__":
    # Create data generators
    train_generator, validation_generator = create_data_generators()
    
    # List of model creation functions
    model_funcs = [create_enhanced_model, create_cnn_model, create_resnet_model, create_inception_model, create_attention_model]
    
    # Train models with cross-validation
    best_model, scores = train_with_cross_validation(model_funcs, train_generator)
    
    # Evaluate on the validation set
    evaluation = best_model.evaluate(validation_generator)
    print(f"Test accuracy: {evaluation[1]*100:.2f}%")
    
    # Test if the model works with a sample input
    sample_image = next(iter(validation_generator))[0][0]
    sample_image_array = np.expand_dims(sample_image, axis=0)

    # Pass the sample image through the model (this builds the model if needed)
    best_model.predict(sample_image_array)
    
    # Find the last convolutional layer
    last_conv_layer = None
    for layer in reversed(best_model.layers):
        if isinstance(layer, keras.layers.Conv2D):
            last_conv_layer = layer.name
            break
    
    if last_conv_layer:
        heatmap = make_gradcam_heatmap(sample_image_array, best_model, last_conv_layer)
        display_gradcam(sample_image, heatmap)
    else:
        print("Could not find a convolutional layer for Grad-CAM visualization.")
    
    # Generate confusion matrix
    y_pred = best_model.predict(validation_generator)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = validation_generator.classes
    
    cm = confusion_matrix(y_true, y_pred_classes)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=categories, yticklabels=categories)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred_classes, target_names=categories))
    
    # Calculate and print additional metrics
    f1 = f1_score(y_true, y_pred_classes, average='weighted')
    precision = precision_score(y_true, y_pred_classes, average='weighted')
    recall = recall_score(y_true, y_pred_classes, average='weighted')
    
    print(f"\nAdditional Metrics:")
    print(f"F1-score (weighted): {f1:.4f}")
    print(f"Precision (weighted): {precision:.4f}")
    print(f"Recall (weighted): {recall:.4f}")
    
    # Save the best model
    best_model.save("best_emotion_recognition_model.keras")
    print("\nBest model saved as 'best_emotion_recognition_model.keras'")

Found 28709 images belonging to 7 classes.
Found 7178 images belonging to 7 classes.
Training create_enhanced_model
Training fold 1
Epoch 1/2
[1m 16/449[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:35[0m 497ms/step - accuracy: 0.1761 - loss: 1.9654

KeyboardInterrupt: 