In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import MobileNet
from keras.layers import GlobalAveragePooling2D, Dense, Dropout
from keras.models import Model
from keras.optimizers import Adam
import numpy as np
from imblearn.over_sampling import SMOTE
from collections import Counter

In [2]:
# Define image dimensions and batch size
IMG_HEIGHT = 48
IMG_WIDTH = 48
batch_size = 32


In [3]:
# Path to the training dataset directory
train_data_dir = 'train'
validation_data_dir = 'test'

In [4]:
# Create ImageDataGenerator for the entire training dataset (before oversampling)
pre_oversampling_datagen = ImageDataGenerator(
    rescale=1./255
)

In [5]:

# Generate the training dataset (before oversampling)
pre_oversampling_generator = pre_oversampling_datagen.flow_from_directory(
    train_data_dir,
    color_mode='grayscale',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

Found 28709 images belonging to 7 classes.


In [6]:
# Define the number of desired samples for "Disgust" class to balance it with the majority class
desired_samples = 4100  # Matching the number of "Happy" samples

In [7]:
# Create an ImageDataGenerator for augmenting "Disgust" class
disgust_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [8]:

# Generate augmented samples for "Disgust" class
disgust_generator = disgust_datagen.flow_from_directory(
    train_data_dir,
    color_mode='grayscale',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True,
    subset='training',  # Select only "Disgust" class
)

Found 28709 images belonging to 7 classes.


In [9]:
# Calculate the number of additional samples needed
num_additional_samples = desired_samples - len(disgust_generator.filenames)

In [10]:
# Create more augmented samples for "Disgust" class to match the desired count
for _ in range(num_additional_samples // batch_size):
    more_samples = disgust_generator.next()

In [11]:
# Create ImageDataGenerator for the entire training dataset (after oversampling)
post_oversampling_datagen = ImageDataGenerator(
    rescale=1./255
)

In [12]:
# Generate the training dataset (after oversampling)
post_oversampling_generator = post_oversampling_datagen.flow_from_directory(
    train_data_dir,
    color_mode='grayscale',
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

Found 28709 images belonging to 7 classes.


In [13]:
# Count the number of samples for each class in the training dataset (before oversampling)
pre_oversampling_class_counts = pre_oversampling_generator.classes
pre_oversampling_class_labels = list(pre_oversampling_generator.class_indices.keys())
pre_oversampling_sample_counts = [sum(pre_oversampling_class_counts == i) for i in range(len(pre_oversampling_class_labels))]

In [14]:
print("Training Data Samples per Class (Before Oversampling):")
for label, count in zip(pre_oversampling_class_labels, pre_oversampling_sample_counts):
    print(f"{label}: {count} samples")

Training Data Samples per Class (Before Oversampling):
angry: 3995 samples
disgust: 436 samples
fear: 4097 samples
happy: 7215 samples
neutral: 4965 samples
sad: 4830 samples
surprise: 3171 samples


In [15]:
# Count the number of samples for each class in the training dataset (after oversampling)
post_oversampling_class_counts = post_oversampling_generator.classes
post_oversampling_class_labels = list(post_oversampling_generator.class_indices.keys())
post_oversampling_sample_counts = [sum(post_oversampling_class_counts == i) for i in range(len(post_oversampling_class_labels))]

In [16]:
print("Training Data Samples per Class (After Oversampling):")
for label, count in zip(post_oversampling_class_labels, post_oversampling_sample_counts):
    print(f"{label}: {count} samples")

Training Data Samples per Class (After Oversampling):
angry: 3995 samples
disgust: 436 samples
fear: 4097 samples
happy: 7215 samples
neutral: 4965 samples
sad: 4830 samples
surprise: 3171 samples
