Preprocessing 

In [7]:
import os
import cv2
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#image loading function
def load_images_from_folder(folder):
    images = []
    labels = []
    if not os.path.exists(folder):
        raise ValueError(f"Folder does not exist: {folder}")
    # Define label mapping based on folder names
    label_dict = {
        "angry": 0,
        "disgust": 1,
        "fear": 2,
        "happy": 3,
        "sad": 4,
        "surprise": 5,
        "neutral": 6
    }

    # Loop through subfolders
    for root, _, files in os.walk(folder):
        for filename in files:
            if filename.lower().endswith((".jpg", ".png", ".jpeg")):
                img_path = os.path.join(root, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    images.append(img)
                    # Get label from folder name
                    label_name = os.path.basename(root).lower().strip()
                    if label_name in label_dict:
                        labels.append(label_dict[label_name])
                    else:
                        print(f"Unknown label folder: {label_name}")

    images = np.array(images, dtype='float32')
    labels = np.array(labels, dtype='int32')

    if len(images) == 0 or len(labels) == 0:
        raise ValueError(f"No images or labels found in folder: {folder}")

    print(f"Loaded {len(images)} images from {folder}")
    return images, labels


# Dataset paths
dataset_path = "Balanced RAF 75x75"
train_folder = os.path.join(dataset_path, "train")
val_folder = os.path.join(dataset_path, "val")
test_folder = os.path.join(dataset_path, "test")

# Load datasets
print("Loading images...")
X_train, y_train = load_images_from_folder(train_folder)
X_val, y_val = load_images_from_folder(val_folder)
X_test, y_test = load_images_from_folder(test_folder)

# Normalize pixel values (0â€“1)
X_train = X_train / 255.0
X_val = X_val / 255.0
X_test = X_test / 255.0

# Add channel dimension
X_train = np.expand_dims(X_train, -1)
X_val = np.expand_dims(X_val, -1)
X_test = np.expand_dims(X_test, -1)

# One-hot encode labels
num_classes = len(np.unique(y_train))
y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)
y_test = to_categorical(y_test, num_classes)

print("Labels encoded. Number of emotion classes:", num_classes)


#data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    zoom_range=0.1
)
datagen.fit(X_train)


# Save all preprocessed data into one file
os.makedirs("preprocessed_data", exist_ok=True)
np.savez_compressed(
    "preprocessed_data/rafdb_preprocessed.npz",
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    X_test=X_test,
    y_test=y_test
)
# Save label mapping
label_map = {
    0: "Angry",
    1: "Disgust",
    2: "Fear",
    3: "Happy",
    4: "Sad",
    5: "Surprise",
    6: "Neutral"
}
np.save("preprocessed_data/label_map.npy", label_map, allow_pickle=True)

print("\nPreprocessing Complete!")
print("Preprocessed data saved in 'preprocessed_data/' folder.")
print("Train shape:", X_train.shape, "Validation shape:", X_val.shape, "Test shape:", X_test.shape)
print("Classes:", num_classes)


Loading images...
Loaded 30023 images from Balanced RAF 75x75\train
Loaded 7504 images from Balanced RAF 75x75\val
Loaded 4165 images from Balanced RAF 75x75\test
Labels encoded. Number of emotion classes: 7

Preprocessing Complete!
Preprocessed data saved in 'preprocessed_data/' folder.
Train shape: (30023, 75, 75, 1) Validation shape: (7504, 75, 75, 1) Test shape: (4165, 75, 75, 1)
Classes: 7


In [5]:
data = np.load('preprocessed_data/label_map.npy', allow_pickle=True)
print(data)


{0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}
