In [1]:
import os
import random
import shutil
from tqdm import tqdm

class DataSplitter:
    def __init__(self, data_path, output_path, split_ratio_train, split_ratio_val, split_ratio_test):
        self.data_path = data_path
        self.output_path = output_path
        self.split_ratio_train = split_ratio_train
        self.split_ratio_val = split_ratio_val
        self.split_ratio_test = split_ratio_test

    def split_data(self):
        # 각 데이터 폴더를 생성합니다.
        train_path = os.path.join(self.output_path, "train")
        val_path = os.path.join(self.output_path, "val")
        test_path = os.path.join(self.output_path, "test")
        os.makedirs(train_path, exist_ok=True)
        os.makedirs(val_path, exist_ok=True)
        os.makedirs(test_path, exist_ok=True)

        # 이미지와 JSON 파일을 순서대로 가져와서 train, val, test 폴더로 복사합니다.
        for root, dirs, files in tqdm(os.walk(self.data_path), desc="Splitting data"):
            for file in files:
                if file.endswith((".jpg", ".jpeg", ".png")):
                    img_path = os.path.join(root, file)
                    json_path = os.path.join(root.replace("img", "label"), file.split(".")[0] + ".json")

                    rand_val = random.random()
                    if rand_val < self.split_ratio_train:
                        dest_folder = train_path
                    elif rand_val < self.split_ratio_train + self.split_ratio_val:
                        dest_folder = val_path
                    else:
                        dest_folder = test_path

                    # 이미지와 JSON을 이동시킵니다.
                    img_dest_folder = os.path.join(dest_folder, "IMG")
                    json_dest_folder = os.path.join(dest_folder, "JSON")
                    os.makedirs(img_dest_folder, exist_ok=True)
                    os.makedirs(json_dest_folder, exist_ok=True)

                    shutil.copy(img_path, os.path.join(img_dest_folder, os.path.basename(img_path)))
                    shutil.copy(json_path, os.path.join(json_dest_folder, os.path.basename(json_path)))


In [2]:
DATA_PATH = "C:/Users/user/Desktop/pj/noa/rose/새 폴더"
OUTPUT_PATH = "./dataset"
split_ratio_train = 0.7  # 훈련 데이터 비율
split_ratio_val = 0.2  # 검증 데이터 비율
split_ratio_test = 0.1
data_splitter = DataSplitter(DATA_PATH, OUTPUT_PATH, split_ratio_train, split_ratio_val,split_ratio_test)
data_splitter.split_data()


Splitting data: 3it [00:05,  1.98s/it]
