In [2]:
import os
import shutil
from math import floor
import random

# --- SETTINGS ---
base_path = "/Users/ricardosantiago/Downloads/ck_original"
train_ratio = 0.7
random.seed(42)

# --- STEP 1: Split 7-class dataset ---
for class_name in os.listdir(base_path):
    class_path = os.path.join(base_path, class_name)
    if not os.path.isdir(class_path):
        continue

    images = [f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
    total = len(images)
    random.shuffle(images)

    train_size = int(total * train_ratio)
    val_size = floor((total - train_size) / 2)
    test_size = total - train_size - val_size

    # Create folders
    train_folder = os.path.join(base_path, "7_class", "train", class_name)
    val_folder = os.path.join(base_path, "7_class", "val", class_name)
    test_folder = os.path.join(base_path, "7_class", "test", class_name)

    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(val_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    # Move images
    for i, img in enumerate(images):
        src = os.path.join(class_path, img)
        if i < train_size:
            dst = os.path.join(train_folder, img)
        elif i < train_size + val_size:
            dst = os.path.join(val_folder, img)
        else:
            dst = os.path.join(test_folder, img)
        shutil.copy(src, dst)  # copy instead of move to preserve originals

    print(f"{class_name}: train={train_size}, val={val_size}, test={test_size}")

# --- STEP 2: Create 2-class dataset by merging 7-class folders ---
class_map_2 = {
    "Positive": ["happy", "surprise"],
    "Negative": ["anger", "contempt", "disgust", "fear", "sadness"]
}

for split in ["train", "val", "test"]:
    for new_class, old_classes in class_map_2.items():
        new_folder = os.path.join(base_path, "2_class", split, new_class)
        os.makedirs(new_folder, exist_ok=True)
        for old_class in old_classes:
            old_folder = os.path.join(base_path, "7_class", split, old_class)
            if os.path.exists(old_folder):
                for img in os.listdir(old_folder):
                    src = os.path.join(old_folder, img)
                    dst = os.path.join(new_folder, img)
                    shutil.copy(src, dst)

# --- STEP 3: Create 3-class dataset by merging 7-class folders ---
class_map_3 = {
    "Anger": ["anger"],
    "Happy": ["happy"],
    "Sadness": ["sadness"]
}

for split in ["train", "val", "test"]:
    for new_class, old_classes in class_map_3.items():
        new_folder = os.path.join(base_path, "3_class", split, new_class)
        os.makedirs(new_folder, exist_ok=True)
        for old_class in old_classes:
            old_folder = os.path.join(base_path, "7_class", split, old_class)
            if os.path.exists(old_folder):
                for img in os.listdir(old_folder):
                    src = os.path.join(old_folder, img)
                    dst = os.path.join(new_folder, img)
                    shutil.copy(src, dst)

print("All dataset versions created successfully.")


happy: train=144, val=31, test=32
contempt: train=37, val=8, test=9
fear: train=52, val=11, test=12
surprise: train=174, val=37, test=38
sadness: train=58, val=13, test=13
anger: train=94, val=20, test=21
disgust: train=123, val=27, test=27
All dataset versions created successfully.


In [3]:
import os

base_path = "/Users/ricardosantiago/Downloads/ck_original"

dataset_versions = ["7_class", "3_class", "2_class"]
splits = ["train", "val", "test"]

for version in dataset_versions:
    print(f"\n--- {version.upper()} DATASET ---")
    version_path = os.path.join(base_path, version)
    for split in splits:
        split_path = os.path.join(version_path, split)
        print(f"\n{split.upper()} split:")
        if not os.path.exists(split_path):
            print("  Split folder does not exist.")
            continue

        total_split = 0
        for class_name in os.listdir(split_path):
            class_folder = os.path.join(split_path, class_name)
            if not os.path.isdir(class_folder):
                continue
            num_files = len([f for f in os.listdir(class_folder) if os.path.isfile(os.path.join(class_folder, f))])
            total_split += num_files
            print(f"  {class_name}: {num_files} images")
        print(f"  Total images: {total_split}")



--- 7_CLASS DATASET ---

TRAIN split:
  happy: 144 images
  contempt: 37 images
  fear: 52 images
  surprise: 174 images
  sadness: 58 images
  anger: 94 images
  disgust: 123 images
  Total images: 682

VAL split:
  happy: 31 images
  contempt: 8 images
  fear: 11 images
  surprise: 37 images
  sadness: 13 images
  anger: 20 images
  disgust: 27 images
  Total images: 147

TEST split:
  happy: 32 images
  contempt: 9 images
  fear: 12 images
  surprise: 38 images
  sadness: 13 images
  anger: 21 images
  disgust: 27 images
  Total images: 152

--- 3_CLASS DATASET ---

TRAIN split:
  Happy: 144 images
  Sadness: 58 images
  Anger: 94 images
  Total images: 296

VAL split:
  Happy: 31 images
  Sadness: 13 images
  Anger: 20 images
  Total images: 64

TEST split:
  Happy: 32 images
  Sadness: 13 images
  Anger: 21 images
  Total images: 66

--- 2_CLASS DATASET ---

TRAIN split:
  Positive: 318 images
  Negative: 364 images
  Total images: 682

VAL split:
  Positive: 68 images
  Negative

In [4]:
import os

base_path = "/Users/ricardosantiago/Documents/GitHub/CMANAS/datasets/ckplus_split"

dataset_versions = ["7_class", "3_class", "2_class"]
splits = ["train", "val", "test"]

for version in dataset_versions:
    print(f"\n--- {version.upper()} DATASET ---")
    version_path = os.path.join(base_path, version)
    for split in splits:
        split_path = os.path.join(version_path, split)
        print(f"\n{split.upper()} split:")
        if not os.path.exists(split_path):
            print("  Split folder does not exist.")
            continue

        total_split = 0
        for class_name in os.listdir(split_path):
            class_folder = os.path.join(split_path, class_name)
            if not os.path.isdir(class_folder):
                continue
            num_files = len([f for f in os.listdir(class_folder) if os.path.isfile(os.path.join(class_folder, f))])
            total_split += num_files
            print(f"  {class_name}: {num_files} images")
        print(f"  Total images: {total_split}")


--- 7_CLASS DATASET ---

TRAIN split:
  happy: 144 images
  contempt: 37 images
  fear: 52 images
  surprise: 174 images
  sadness: 58 images
  anger: 94 images
  disgust: 123 images
  Total images: 682

VAL split:
  happy: 31 images
  contempt: 8 images
  fear: 11 images
  surprise: 37 images
  sadness: 13 images
  anger: 20 images
  disgust: 27 images
  Total images: 147

TEST split:
  happy: 32 images
  contempt: 9 images
  fear: 12 images
  surprise: 38 images
  sadness: 13 images
  anger: 21 images
  disgust: 27 images
  Total images: 152

--- 3_CLASS DATASET ---

TRAIN split:
  Happy: 144 images
  Sadness: 58 images
  Anger: 94 images
  Total images: 296

VAL split:
  Happy: 31 images
  Sadness: 13 images
  Anger: 20 images
  Total images: 64

TEST split:
  Happy: 32 images
  Sadness: 13 images
  Anger: 21 images
  Total images: 66

--- 2_CLASS DATASET ---

TRAIN split:
  Positive: 318 images
  Negative: 364 images
  Total images: 682

VAL split:
  Positive: 68 images
  Negative