In [None]:
import os
import shutil
import random
from zipfile import ZipFile

# ---------------- Paths ----------------
fer_zip = "/content/FER 2013 Dataset.zip"  # your uploaded FER2013 zip
fer_path = "/content/fer"                  # extracted FER2013
oahega_path = "/kaggle/input/oahega-emotion/OAHEGA_EMOTION"  # OAHEGA path
final_dataset = "/content/final_dataset"

# ---------------- Emotions ----------------
emotions = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"]

# ---------------- Step 0: Clean previous folders ----------------
for path in [fer_path, final_dataset]:
    if os.path.exists(path):
        shutil.rmtree(path)

# ---------------- Step 1: Extract FER2013 zip ----------------
with ZipFile(fer_zip, 'r') as zip_ref:
    zip_ref.extractall(fer_path)

# Check structure
print("FER2013 extracted folders:", os.listdir(fer_path))

FER2013 extracted folders: ['test', 'train']


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("alexa1979/oahega-emotion")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/alexa1979/oahega-emotion?dataset_version_number=1...


100%|██████████| 1.98G/1.98G [00:22<00:00, 95.9MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/alexa1979/oahega-emotion/versions/1


In [None]:
# ---------------- Step 2: Create final dataset folders ----------------
for split in ["train", "test"]:
    for emotion in emotions:
        os.makedirs(os.path.join(final_dataset, split, emotion), exist_ok=True)


In [None]:
# ---------------- Step 3: Copy OAHEGA images and split 80/20 ----------------
random.seed(42)
oahega_source = "/root/.cache/kagglehub/datasets/alexa1979/oahega-emotion/versions/1/OAHEGA_EMOTION"

for emotion in os.listdir(oahega_source):
    if emotion.strip().lower() not in emotions:
        continue  # skip 'Ahegao'
    src = os.path.join(oahega_source, emotion)
    files = os.listdir(src)
    random.shuffle(files)
    split_idx = int(0.8 * len(files))
    train_files = files[:split_idx]
    test_files = files[split_idx:]

    for f in train_files:
        shutil.copy(os.path.join(src, f),
                    os.path.join(final_dataset, "train", emotion.lower(), f"oahega_{f}"))
    for f in test_files:
        shutil.copy(os.path.join(src, f),
                    os.path.join(final_dataset, "test", emotion.lower(), f"oahega_{f}"))

# ---------------- Step 4: Verify counts ----------------
for split in ["train", "test"]:
    print(f"\n{split.upper()} DATASET (OAHEGA only):")
    for emotion in emotions:
        folder = os.path.join(final_dataset, split, emotion)
        print(f"{emotion}: {len(os.listdir(folder))}")

# ---------------- Step 5: Show folder structure ----------------
for split in ["train", "test"]:
    print(f"\n{split.upper()} folder contents:")
    for emotion in emotions:
        folder = os.path.join(final_dataset, split, emotion)
        print(f"{emotion}:", os.listdir(folder)[:5], "...")  # show first 5 files as example


TRAIN DATASET (OAHEGA only):
angry: 1122
disgust: 0
fear: 0
happy: 3253
sad: 3065
surprise: 1006
neutral: 3254

TEST DATASET (OAHEGA only):
angry: 281
disgust: 0
fear: 0
happy: 814
sad: 767
surprise: 252
neutral: 814

TRAIN folder contents:
angry: ['oahega_bb9094a8ff413cc3f6665f7635eb0bcf9e40cbc758cfd3a74b92bbe7~angry.jpg', 'oahega_cropped_emotions.571194~angry.png', 'oahega_cropped_emotions.231637~angry.png', 'oahega_cropped_emotions.231473~angry.png', 'oahega_cropped_emotions.231718~angry.png'] ...
disgust: [] ...
fear: [] ...
happy: ['oahega_0b661c3ecc88e64168bd1c9bcc9f56369a10c47b5a10a36130a13ef3.jpg', 'oahega_0c8dfd2894e393ba1de49806e136e8eabb7cf4c4bb128a3c9e508565.jpg', 'oahega_0d2b4f1ff6d5fced42d5c61d29d7a34da640a6393070b394ce0b628f.jpg', 'oahega_0af4bcbfce37b9fc4613d80460764c819211717d5ff9e5157085c8b4.jpg', 'oahega_0adb7e96dd29bf3fff14a36dbf0a57c1865b5688782b1519ee36c90d.JPG'] ...
sad: ['oahega_1c4f4d75d2460ae0585434bd4c24753a2b0ca4111140ab63289cd652.jpg', 'oahega_8a4680937faf

In [None]:
# ---------------- Step 6: Copy FER2013 images ----------------
for split in ["train", "test"]:
    split_path = os.path.join(fer_path, split)
    for emotion in os.listdir(split_path):
        emotion_lower = emotion.strip().lower()
        if emotion_lower not in emotions:
            continue
        src = os.path.join(split_path, emotion)
        dst = os.path.join(final_dataset, split, emotion_lower)
        for f in os.listdir(src):
            shutil.copy(os.path.join(src, f),
                        os.path.join(dst, f"fer_{f}"))

# ---------------- Step 2: Verify counts ----------------
for split in ["train", "test"]:
    print(f"\n{split.upper()} DATASET (OAHEGA + FER2013):")
    for emotion in emotions:
        folder = os.path.join(final_dataset, split, emotion)
        print(f"{emotion}: {len(os.listdir(folder))}")


TRAIN DATASET (OAHEGA + FER2013):
angry: 5117
disgust: 436
fear: 4097
happy: 10468
sad: 7895
surprise: 4177
neutral: 8219

TEST DATASET (OAHEGA + FER2013):
angry: 1239
disgust: 111
fear: 1024
happy: 2588
sad: 2014
surprise: 1083
neutral: 2047


In [None]:
import shutil

# Path to final dataset
final_dataset = "/content/final_dataset"
zip_path = "/content/final_dataset.zip"

# Create a zip archive
shutil.make_archive(base_name=zip_path.replace('.zip',''), format='zip', root_dir=final_dataset)

print(f"Final dataset zipped successfully: {zip_path}")


Final dataset zipped successfully: /content/final_dataset.zip


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Copy to your Drive
!cp /content/final_dataset.zip /content/drive/MyDrive/


Mounted at /content/drive
