In [4]:
import os
import shutil

# --- CONFIGURATION ---
# List the datasets you want to combine
# Make sure these point to your CLEAN/FIXED folders
source_datasets = [
    "Sorted_data",
    "fer2013",
    #"CK_Plus_Ready" # Uncomment to include CK+
]

output_dir = "../Combined_Dataset"

# Classes to keep (Standard 7)
# We SKIP 'contempt' to make datasets compatible
target_classes = ['anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise']
# ---------------------

def merge_datasets():
    if os.path.exists(output_dir):
        print(f"Error: '{output_dir}' already exists. Please delete it first.")
        return

    print(f"Creating merged dataset in '{output_dir}'...")

    # We typically merge Train and Valid.
    # Test sets usually stay separate so you can test on them individually later (like in your table).
    # BUT for a massive model, we usually want one big 'train' and 'valid'.

    subsets = ['train', 'valid', 'test']

    for subset in subsets:
        print(f"\nProcessing {subset} set...")

        for class_name in target_classes:
            # Create destination folder (e.g., Combined_Dataset/train/anger)
            dest_path = os.path.join(output_dir, subset, class_name)
            os.makedirs(dest_path, exist_ok=True)

            total_copied = 0

            for dataset in source_datasets:
                # Source path (e.g., fer2013/train/anger)
                src_path = os.path.join(dataset, subset, class_name)

                if not os.path.exists(src_path):
                    print(f"  Warning: {dataset} missing {subset}/{class_name}")
                    continue

                # Copy files
                files = os.listdir(src_path)
                for f in files:
                    if not f.lower().endswith(('.png', '.jpg', '.jpeg')):
                        continue

                    # We rename files to avoid name collisions (e.g. image01.jpg in both datasets)
                    # New name: fer2013_image01.jpg
                    new_filename = f"{dataset}_{f}"
                    shutil.copy2(os.path.join(src_path, f), os.path.join(dest_path, new_filename))

                total_copied += len(files)

            print(f"  Merged '{class_name}': {total_copied} images")

    print(f"\nSuccess! Use '{output_dir}' for your next training.")

merge_datasets()

Creating merged dataset in '../Combined_Dataset'...

Processing train set...
  Merged 'anger': 5535 images
  Merged 'disgust': 2591 images
  Merged 'fear': 5299 images
  Merged 'happiness': 7926 images
  Merged 'neutral': 5588 images
  Merged 'sadness': 5778 images
  Merged 'surprise': 5356 images

Processing valid set...
  Merged 'anger': 1511 images
  Merged 'disgust': 759 images
  Merged 'fear': 1441 images
  Merged 'happiness': 2234 images
  Merged 'neutral': 1507 images
  Merged 'sadness': 1569 images
  Merged 'surprise': 1508 images

Processing test set...
  Merged 'anger': 1341 images
  Merged 'disgust': 438 images
  Merged 'fear': 1342 images
  Merged 'happiness': 2173 images
  Merged 'neutral': 1483 images
  Merged 'sadness': 1525 images
  Merged 'surprise': 1299 images

Success! Use '../Combined_Dataset' for your next training.
