### Remember to only run it once!!

dataset can be found here:

https://rndml-team-cv.obs.ru-moscow-1.hc.sbercloud.ru/datasets/hagrid/hagrid_dataset_new_554800/hagrid_dataset_512.zip

In [None]:
#import os # commented to prevent accidental re-run
import shutil
import random

# ✅ Adjusted to your folder structure
SOURCE_DIR = "../data/raw_hagrid_dataset_512"
DEST_BASE = "../data"
CLASSES = ['rock', 'palm', 'like', 'dislike', 'ok', 'fist']

# Set counts for each split
SPLIT_COUNTS = {
    'train': 7000,
    'val': 1500,
    'test': 1500
}

# Make destination folders
for split in SPLIT_COUNTS:
    for cls in CLASSES:
        dest_path = os.path.join(DEST_BASE, split, cls)
        os.makedirs(dest_path, exist_ok=True)

# Begin copying files
for cls in CLASSES:
    print(f"\n📂 Processing class: {cls}")
    src_folder = os.path.join(SOURCE_DIR, cls)
    
    if not os.path.exists(src_folder):
        raise FileNotFoundError(f"❌ Source folder not found: {src_folder}")

    all_images = [f for f in os.listdir(src_folder) if f.lower().endswith(('.jpg', '.png'))]
    total_needed = sum(SPLIT_COUNTS.values())

    if len(all_images) < total_needed:
        raise ValueError(f"❌ Not enough images in '{cls}' (found {len(all_images)}, need {total_needed})")
    
    random.shuffle(all_images)
    start = 0

    for split, count in SPLIT_COUNTS.items():
        dest_folder = os.path.join(DEST_BASE, split, cls)
        split_images = all_images[start:start + count]
        for img in split_images:
            shutil.copy2(os.path.join(src_folder, img), os.path.join(dest_folder, img))
        print(f"✅ Copied {count} to {split}/{cls}")
        start += count

print("\n🎉 Done! Images split into data/train/, data/val/, and data/test/")


📂 Processing class: rock
✅ Copied 7000 to train/rock
✅ Copied 1500 to val/rock
✅ Copied 1500 to test/rock

📂 Processing class: palm
✅ Copied 7000 to train/palm
✅ Copied 1500 to val/palm
✅ Copied 1500 to test/palm

📂 Processing class: like
✅ Copied 7000 to train/like
✅ Copied 1500 to val/like
✅ Copied 1500 to test/like

📂 Processing class: dislike
✅ Copied 7000 to train/dislike
✅ Copied 1500 to val/dislike
✅ Copied 1500 to test/dislike

📂 Processing class: ok
✅ Copied 7000 to train/ok
✅ Copied 1500 to val/ok
✅ Copied 1500 to test/ok

📂 Processing class: fist
✅ Copied 7000 to train/fist
✅ Copied 1500 to val/fist
✅ Copied 1500 to test/fist

🎉 Done! Images split into data/train/, data/val/, and data/test/
