In [4]:
import os
from sklearn.model_selection import train_test_split
import shutil

OUTPUT_DIR = 'yolo_dataset/raw/'

def prepare_yolo_dataset():
    # Create train/val directories
    for split in ['train', 'val']:
        os.makedirs(f"yolo_dataset/{split}/images", exist_ok=True)
        os.makedirs(f"yolo_dataset/{split}/labels", exist_ok=True)

    # Collect all skill images
    all_files = []
    for skill in ['Q', 'W', 'E', 'R']:
        skill_dir = os.path.join(OUTPUT_DIR, f"{skill.lower()}_cd")
        files = [f for f in os.listdir(skill_dir) if f.endswith('.jpg')]
        all_files.extend([(os.path.join(skill_dir, f), skill) for f in files])

    # Split dataset
    train_files, val_files = train_test_split(all_files, test_size=0.2, random_state=42)

    # Copy files to YOLO format
    for files, split in [(train_files, 'train'), (val_files, 'val')]:
        for img_path, skill in files:
            # Copy image
            dst_img = os.path.join(f"yolo_dataset/{split}/images", os.path.basename(img_path))
            shutil.copy(img_path, dst_img)
            
            # Copy label
            label_path = img_path.replace('.jpg', '.txt')
            if os.path.exists(label_path):
                dst_label = os.path.join(f"yolo_dataset/{split}/labels", os.path.basename(label_path))
                shutil.copy(label_path, dst_label)

    # Create dataset.yaml
    with open("yolo_dataset/dataset.yaml", 'w') as f:
        f.write("""path: ../yolo_dataset
train: train/images
val: val/images

names:
  0: cooldown_digit
""")

prepare_yolo_dataset()