In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2
import gc
import h5py
import os
import cv2
import numpy as np
from tqdm import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv2D, Dropout, MaxPooling2D, BatchNormalization
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

In [2]:
def moodNamePrintFromLabel(n):
  if n == 0: result = 'angry '
  elif n == 1: result = 'disgust '
  elif n == 2: result = 'fear'
  elif n == 3: result = 'happy'
  elif n == 4: result = 'sad'
  elif n == 5: result = 'surprise'
  elif n == 6: result = 'neutral'
  return result

In [3]:
def load_images_from_directory(directory, target_size=(48, 48)):
    images = []
    labels = []
    label_map = {'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'sad': 4, 'surprise': 5, 'neutral': 6}
    for label_name, label_index in label_map.items():
        class_dir = os.path.join(directory, label_name)
        if not os.path.isdir(class_dir):
            continue
        for img_name in tqdm(os.listdir(class_dir)):
            img_path = os.path.join(class_dir, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, target_size)
                images.append(img.reshape(target_size[0], target_size[1], 1))
                labels.append(label_index)
    return np.array(images), np.array(labels)


# Load FER2013 images
X_fer2013, y_fer2013 = load_images_from_directory('Train Image Data/FER-2013')

# Load AffectNet images
X_affectnet, y_affectnet = load_images_from_directory('Train Image Data/AffectNet')

# Load RAF-DB images
X_RAF, y_RAF = load_images_from_directory('Train Image Data/RAF-DB')

100%|██████████| 3995/3995 [00:01<00:00, 3214.19it/s]
100%|██████████| 436/436 [00:00<00:00, 3428.90it/s]
100%|██████████| 4097/4097 [00:01<00:00, 3354.77it/s]
100%|██████████| 7215/7215 [00:02<00:00, 3395.30it/s]
100%|██████████| 4830/4830 [00:01<00:00, 3364.68it/s]
100%|██████████| 3171/3171 [00:00<00:00, 3391.13it/s]
100%|██████████| 4965/4965 [00:01<00:00, 3126.06it/s]
100%|██████████| 705/705 [00:00<00:00, 2981.91it/s]
100%|██████████| 717/717 [00:00<00:00, 3100.65it/s]
100%|██████████| 281/281 [00:00<00:00, 2941.01it/s]
100%|██████████| 4772/4772 [00:01<00:00, 2827.77it/s]
100%|██████████| 1982/1982 [00:00<00:00, 2970.20it/s]
100%|██████████| 1290/1290 [00:00<00:00, 2949.56it/s]
100%|██████████| 2524/2524 [00:00<00:00, 2928.26it/s]


In [5]:

def preprocess_image(image, target_size=(224, 224)):
    image = image.astype(np.float32)
    image = np.clip(image, 0, 255).astype(np.uint8)
    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    image = cv2.resize(image, target_size)
    image = (image / 127.5) - 1
    return image

def preprocess_and_save_images_with_labels(images, labels, output_file, dataset_name_image, dataset_name_label, batch_size=50):
    num_batches = len(images) // batch_size + (1 if len(images) % batch_size != 0 else 0)
    with h5py.File(output_file, 'w') as hf:
        # Create datasets for images and labels
        processed_shape = (len(images), 224, 224, 3)
        dataset_image = hf.create_dataset(dataset_name_image, shape=processed_shape, dtype=np.float32)
        dataset_label = hf.create_dataset(dataset_name_label, data=labels, dtype=np.uint8)
        
        for batch_idx in range(num_batches):
            start_idx = batch_idx * batch_size
            end_idx = min((batch_idx + 1) * batch_size, len(images))
            batch_images = images[start_idx:end_idx]
            processed_batch_images = np.array([preprocess_image(image) for image in batch_images], dtype=np.float32)
            dataset_image[start_idx:end_idx] = processed_batch_images
            del batch_images, processed_batch_images
            gc.collect()




# Preprocess images in batches and save to HDF5 files
preprocess_and_save_images_with_labels(X_fer2013, y_fer2013, 'output files/fer2013_processed.h5', 'fer2013_images', 'fer2013_labels')
preprocess_and_save_images_with_labels(X_affectnet, y_affectnet, 'output files/affectnet_processed.h5', 'affectnet_images', 'affectnet_labels')
preprocess_and_save_images_with_labels(X_RAF, y_RAF, 'output files/rafdb_processed.h5', 'rafdb_images', 'rafdb_labels')

# Check the shapes of the processed arrays
with h5py.File('output files/fer2013_processed.h5', 'r') as hf:
    fer2013_images = hf['fer2013_images']
    print("Shape of FER2013 images:", fer2013_images.shape)

with h5py.File('output files/affectnet_processed.h5', 'r') as hf:
    affectnet_images = hf['affectnet_images']
    print("Shape of AffectNet images:", affectnet_images.shape)

with h5py.File('output files/rafdb_processed.h5', 'r') as hf:
    rafdb_images = hf['rafdb_images']
    print("Shape of Rafdb images:", rafdb_images.shape)

Shape of FER2013 images: (28709, 224, 224, 3)
Shape of AffectNet images: (0, 224, 224, 3)
Shape of Rafdb images: (12271, 224, 224, 3)


In [8]:

def load_data_from_hdf5_in_chunks(file_path, dataset_name_image, dataset_name_label, chunk_size=200):
    """Load images and labels from an HDF5 file in chunks."""
    with h5py.File(file_path, 'r') as hf:
        images_dataset = hf[dataset_name_image]
        labels_dataset = hf[dataset_name_label]
        
        num_samples = len(labels_dataset)
        images = []
        labels = []
        
        for start_idx in range(0, num_samples, chunk_size):
            end_idx = min(start_idx + chunk_size, num_samples)
            images_chunk = images_dataset[start_idx:end_idx]
            labels_chunk = labels_dataset[start_idx:end_idx]
            images.append(images_chunk)
            labels.append(labels_chunk)
        
        # Convert lists to arrays
        images = np.concatenate(images, axis=0)
        labels = np.concatenate(labels, axis=0)
    
    return images, labels


def save_combined_data(output_file, images, labels, image_dataset_name='combined_images', label_dataset_name='combined_labels'):
    """Save combined images and labels to an HDF5 file."""
    with h5py.File(output_file, 'w') as hf:
        num_samples = images.shape[0]
        # Create datasets for images and labels
        hf.create_dataset(image_dataset_name, data=images, dtype=np.float32)
        hf.create_dataset(label_dataset_name, data=labels, dtype=np.uint8)
    
    print(f"Combined dataset saved to {output_file}")

# Load datasets from HDF5 files
X_fer2013, y_fer2013 = load_data_from_hdf5_in_chunks('output files/fer2013_processed.h5', 'fer2013_images', 'fer2013_labels')
# X_affectnet, y_affectnet = load_data_from_hdf5_in_chunks('output files/affectnet_processed.h5', 'affectnet_images', 'affectnet_labels')
X_RAF, y_RAF = load_data_from_hdf5_in_chunks('output files/rafdb_processed.h5', 'rafdb_images', 'rafdb_labels')


# Concatenate datasets
X_combined = np.concatenate((X_fer2013, X_RAF), axis=0)
y_combined = np.concatenate((y_fer2013, y_RAF), axis=0)

# Save the concatenated dataset to a new HDF5 file
output_combined_file = 'output files/combined_images_labels.h5'
save_combined_data(output_combined_file, X_combined, y_combined)

# Function to list datasets in an HDF5 file
def list_datasets(file_path):
    with h5py.File(file_path, 'r') as hf:
        print(f"Datasets in {file_path}:")
        for dataset in hf:
            print(f" - {dataset}: {hf[dataset].shape}")

# List datasets in the combined HDF5 file
list_datasets('output files/combined_images_labels.h5')


Combined dataset saved to output files/combined_images_labels.h5
Datasets in output files/combined_images_labels.h5:
 - combined_images: (40980, 224, 224, 3)
 - combined_labels: (40980,)


In [9]:
import h5py

# Function to list datasets in an HDF5 file
def list_datasets(file_path):
    with h5py.File(file_path, 'r') as hf:
        print(f"Datasets in {file_path}:")
        for dataset in hf:
            print(f" - {dataset}: {hf[dataset].shape}")

# List datasets in each HDF5 file
list_datasets('output files/fer2013_processed.h5')
list_datasets('output files/affectnet_processed.h5')
list_datasets('output files/rafdb_processed.h5')


Datasets in output files/fer2013_processed.h5:
 - fer2013_images: (28709, 224, 224, 3)
 - fer2013_labels: (28709,)
Datasets in output files/affectnet_processed.h5:
 - affectnet_images: (0, 224, 224, 3)
 - affectnet_labels: (0,)
Datasets in output files/rafdb_processed.h5:
 - rafdb_images: (12271, 224, 224, 3)
 - rafdb_labels: (12271,)
