In [24]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os
import numpy as np
from keras.preprocessing import image
import random
import tensorflow as tf
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report

In [11]:
train_data_dir = r"C:\Users\kezin\Downloads\archive - Copy\train"
validation_data_dir = r"C:\Users\kezin\Downloads\archive - Copy\test"

Define image data generators with augmentation

In [16]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    fill_mode='nearest')

validation_datagen = ImageDataGenerator(rescale=1./255)


def augment_data(directory, target_count=1500):
    for class_label in os.listdir(directory):
        class_path = os.path.join(directory, class_label)
        if os.path.isdir(class_path):
            images = os.listdir(class_path)
            num_files = len(images)
            
            # If the number of images is greater than 1000, randomly select 1000 images
            if num_files > target_count:
                print(f"Class {class_label} has {num_files} images. Randomly selecting {target_count} images.")
                images_to_keep = random.sample(images, target_count)
                for img_name in images:
                    if img_name not in images_to_keep:
                        os.remove(os.path.join(class_path, img_name))
            
            # If the number of images is less than 1000, augment the data
            elif num_files < target_count:
                print(f"Augmenting class {class_label} with {num_files} images to {target_count} images")
                datagen = ImageDataGenerator(
                    rescale=1./255,
                    rotation_range=30,
                    shear_range=0.3,
                    zoom_range=0.3,
                    horizontal_flip=True,
                    fill_mode='nearest')
                
                existing_images = []
                for img_name in os.listdir(class_path):
                    img_path = os.path.join(class_path, img_name)
                    img = image.load_img(img_path, target_size=(48, 48), color_mode='grayscale')
                    img = image.img_to_array(img)
                    img = np.expand_dims(img, axis=0)
                    existing_images.append(img)
                
                existing_images = np.vstack(existing_images)
                i = 0
                for batch in datagen.flow(existing_images, batch_size=1, save_to_dir=class_path, save_prefix='aug', save_format='jpg'):
                    i += 1
                    if i > target_count - num_files:
                        break

# Augment training data
augment_data(train_data_dir)


Augmenting class angry with 1000 images to 1500 images
Augmenting class disgust with 1000 images to 1500 images
Augmenting class fear with 1000 images to 1500 images
Augmenting class happy with 1000 images to 1500 images
Augmenting class neutral with 1000 images to 1500 images
Augmenting class sad with 1000 images to 1500 images
Augmenting class surprise with 1000 images to 1500 images


Create generators

In [17]:
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    color_mode='grayscale',
    target_size=(48, 48),
    batch_size=32,
    class_mode='categorical',
    shuffle=True)

validation_generator = validation_datagen.flow_from_directory(
    validation_data_dir,
    color_mode='grayscale',
    target_size=(48, 48),
    batch_size=32,
    class_mode='categorical',
    shuffle=True)

Found 10507 images belonging to 7 classes.
Found 7178 images belonging to 7 classes.


In [18]:
class_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']

img, label = train_generator.__next__()

Define the model

In [19]:
model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48, 48, 1)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.1))

model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.1))

model.add(Conv2D(256, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.1))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(7, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
print(model.summary())

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None


In [20]:
# Calculate the number of training and testing images
num_train_imgs = sum([len(files) for r, d, files in os.walk(train_data_dir)])
num_test_imgs = sum([len(files) for r, d, files in os.walk(validation_data_dir)])

print(num_train_imgs)
print(num_test_imgs)

10507
7178


In [21]:
epochs = 30
history = model.fit(
    train_generator,
    steps_per_epoch=num_train_imgs // 32,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=num_test_imgs // 32)

  self._warn_if_super_not_called()


Epoch 1/30
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 769ms/step - accuracy: 0.1477 - loss: 1.9449 - val_accuracy: 0.2270 - val_loss: 1.8448
Epoch 2/30
[1m  1/328[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:08[0m 208ms/step - accuracy: 0.2500 - loss: 1.9123



[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 41ms/step - accuracy: 0.2500 - loss: 1.9123 - val_accuracy: 0.2313 - val_loss: 1.8421
Epoch 3/30
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 189ms/step - accuracy: 0.2496 - loss: 1.8558 - val_accuracy: 0.2250 - val_loss: 1.8503
Epoch 4/30
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 30ms/step - accuracy: 0.4062 - loss: 1.7347 - val_accuracy: 0.2277 - val_loss: 1.8440
Epoch 5/30
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 189ms/step - accuracy: 0.2634 - loss: 1.8340 - val_accuracy: 0.2824 - val_loss: 1.7780
Epoch 6/30
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 32ms/step - accuracy: 0.2500 - loss: 1.8575 - val_accuracy: 0.2839 - val_loss: 1.7795
Epoch 7/30
[1m328/328[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 231ms/step - accuracy: 0.2782 - loss: 1.7981 - val_accuracy: 0.3107 - val_loss: 1.7531
Epoch 8/30
[1m328/328[

Save the model

In [22]:
model.save('model_file.h5')



reports

In [25]:
# Path to your test dataset
test_dir = r"C:\Users\kezin\Downloads\archive\test"  # Update this to your test image folder

# Image parameters (should match your model's input size)
IMG_SIZE = (48, 48)  # Update if different
BATCH_SIZE = 32  # Adjust batch size as needed

# Load test images using ImageDataGenerator
test_datagen = ImageDataGenerator(rescale=1./255)  # Normalize images
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',  # Ensures labels are one-hot encoded
    color_mode='grayscale',  # Force grayscale images (fix for input shape mismatch)
    shuffle=False  
)

# Get true labels
y_true = test_generator.classes
class_labels = list(test_generator.class_indices.keys())  # Get class names

# Predict on test data
y_pred_probs = model.predict(test_generator)
y_pred = np.argmax(y_pred_probs, axis=1)  # Convert probabilities to class labels

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average="weighted")
conf_matrix = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred, target_names=class_labels)

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(report)


Found 7178 images belonging to 7 classes.
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 30ms/step
Accuracy: 0.4911
F1 Score: 0.4854
Confusion Matrix:
[[ 180  116  161   93  188  169   51]
 [   7   65   20    2    7   10    0]
 [  48   61  295   83  193  198  146]
 [  49   41   78 1268  163   91   84]
 [  38   34  125  112  709  166   49]
 [  69   86  178   89  340  433   52]
 [  15   17  104   49   52   19  575]]

Classification Report:
              precision    recall  f1-score   support

       angry       0.44      0.19      0.26       958
     disgust       0.15      0.59      0.24       111
        fear       0.31      0.29      0.30      1024
       happy       0.75      0.71      0.73      1774
     neutral       0.43      0.58      0.49      1233
         sad       0.40      0.35      0.37      1247
    surprise       0.60      0.69      0.64       831

    accuracy                           0.49      7178
   macro avg       0.44      0.48      0.43      71