# 分割原始数据为 --> train-val-test

In [3]:
import os
import shutil
import random
from pathlib import Path

# 参数配置
SOURCE_DIR = Path("cropped_objects")       # 原始数据路径
SPLITS = {"train": 0.6, "val": 0.2, "test": 0.2}
DEST_DIR  = Path(f"data/data_split_{SPLITS['train']}_{SPLITS['val']}_{SPLITS['test']}")  # 输出目录
CLASSES = [d.name for d in SOURCE_DIR.iterdir() if d.is_dir()]
IMG_EXTS = [".jpg", ".jpeg", ".png"]
SEED = 42

# 创建目标文件夹结构
for split in SPLITS:
    for cls in CLASSES:
        (DEST_DIR / split / cls).mkdir(parents=True, exist_ok=True)

# 设置随机种子
random.seed(SEED)

# 开始划分
for cls in CLASSES:
    files = [f for f in (SOURCE_DIR / cls).iterdir() if f.suffix.lower() in IMG_EXTS]
    random.shuffle(files)

    n = len(files)
    n_train = int(SPLITS["train"] * n)
    n_val = int(SPLITS["val"] * n)

    split_files = {
        "train": files[:n_train],
        "val": files[n_train:n_train + n_val],
        "test": files[n_train + n_val:]
    }

    for split, file_list in split_files.items():
        for file in file_list:
            dest = DEST_DIR / split / cls / file.name
            shutil.copy2(file, dest)

print("✅ Dataset successfully split into train/val/test.")


✅ Dataset successfully split into train/val/test.
