In [10]:
import os
from PIL import Image
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
import tqdm

class CustomDatasetYOLO:
    def __init__(self, folder_path, image_size=640, transforms=None):
        self.folder_path = folder_path
        self.transforms = transforms
        self.image_size = image_size
        self.data_pairs = self._load_data_pairs()

    def _load_data_pairs(self):
        image_files = []
        json_files = {}
        data_pairs = []

        for root, _, files in os.walk(self.folder_path):
            for file in files:
                if file.endswith((".jpg", ".jpeg", ".png")):
                    image_files.append(os.path.join(root, file))
                elif file.endswith(".json"):
                    json_files[os.path.splitext(file)[0]] = os.path.join(root, file)

        for image_file_path in tqdm.tqdm(image_files, desc="Loading data pairs"):
            image_name = os.path.splitext(os.path.basename(image_file_path))[0]

            # 해당 이미지 파일과 매칭되는 JSON 파일 찾기
            if image_name in json_files:
                json_file_path = json_files[image_name]
                data_pairs.append((image_file_path, json_file_path))
            else:
                print(f"JSON file not found for image: {image_file_path}")

        return data_pairs

    def resize_image_and_annotations(self, image_path, annotations, categories, target_size):
        img = Image.open(image_path)
        img_resized = img.resize(target_size, Image.LANCZOS)  # 이미지 크기 조정

        width_ratio = target_size[0] / img.width  # 너비 비율 계산
        height_ratio = target_size[1] / img.height  # 높이 비율 계산

        # 한글에서 영어로 라벨을 매핑하는 딕셔너리 (예시)
        label_mapping = {'화방': 1, '줄기': 2, '잎': 3, '열매': 0}

        labels = []
        for ann in annotations:
            x, y, w, h = ann['bbox']
            category_id = ann['category_id']
            category_name = categories.get(category_id, '')

            # 라벨 매핑
            label = label_mapping.get(category_name, -1)
            if label == -1:
                # 처리할 수 없는 경우 스킵
                continue

            # 조정된 이미지의 바운딩 박스 좌표 계산
            x_resized = x * width_ratio
            y_resized = y * height_ratio
            w_resized = w * width_ratio
            h_resized = h * height_ratio

            # YOLO 형식의 정규화된 좌표 계산
            x_center = (x_resized + w_resized / 2) / target_size[0]
            y_center = (y_resized + h_resized / 2) / target_size[1]
            box_width = w_resized / target_size[0]
            box_height = h_resized / target_size[1]

            # YOLO 형식 문자열로 변환
            label_str = f"{label} {x_center} {y_center} {box_width} {box_height}"
            labels.append(label_str)

        return img_resized, labels

    def create_label_files(self):
        def process_image(image_path, json_path, split):
            parent_folder = os.path.join(self.folder_path, split)
            images_folder = os.path.join(parent_folder, 'images')
            labels_folder = os.path.join(parent_folder, 'labels')
            os.makedirs(images_folder, exist_ok=True)
            os.makedirs(labels_folder, exist_ok=True)

            img_name = os.path.splitext(os.path.basename(image_path))[0]
            txt_file = os.path.join(labels_folder, f"{img_name}.txt")
            img_resized_path = os.path.join(images_folder, f"{img_name}.jpg")

            # 이미 라벨 파일이 존재하는 경우 스킵
            if os.path.exists(txt_file) and os.path.exists(img_resized_path):
                print(f"Skipping already processed file: {image_path}")
                return

            with open(json_path, 'r') as json_file:
                json_data = json.load(json_file)

            # 라벨이 없는 경우 패스
            if not json_data.get('annotations'):
                print(f"No annotations found for {image_path}, skipping...")
                return

            # categories 정보를 가져옴
            categories = {cat['id']: cat['name'] for cat in json_data['categories']}

            img_resized, labels = self.resize_image_and_annotations(image_path, json_data['annotations'], categories, (self.image_size, self.image_size))

            img_resized.save(img_resized_path)

            with open(txt_file, 'w') as f:
                for label in labels:
                    f.write(f"{label}\n")

        with ThreadPoolExecutor() as executor:
            futures = []
            for image_path, json_path in self.data_pairs:
                split = image_path.split(os.sep)[-3]  # 'train', 'val', 'test'
                futures.append(executor.submit(process_image, image_path, json_path, split))

            for future in as_completed(futures):
                try:
                    future.result()
                except Exception as e:
                    print(f"Exception occurred: {e}")

        print("YOLO 라벨 파일 생성이 완료되었습니다.")



Loading data pairs: 0it [00:00, ?it/s]

YOLO 라벨 파일 생성이 완료되었습니다.





In [11]:
if __name__ == "__main__":
    dataset = CustomDatasetYOLO('D:/NOA/시설작물모델/output/datasets', image_size=640)
    dataset.create_label_files()

Loading data pairs: 100%|██████████| 38562/38562 [00:00<00:00, 217219.76it/s]


Skipping already processed file: D:/NOA/시설작물모델/output/datasets\test\images\02_20210104_1895953.jpg
Skipping already processed file: D:/NOA/시설작물모델/output/datasets\test\images\04_20201026_531536.jpg
Skipping already processed file: D:/NOA/시설작물모델/output/datasets\test\images\04_20201026_531324.jpg
Skipping already processed file: D:/NOA/시설작물모델/output/datasets\test\images\04_20201026_531256.jpg
Skipping already processed file: D:/NOA/시설작물모델/output/datasets\test\images\04_20201026_532188.jpg
Skipping already processed file: D:/NOA/시설작물모델/output/datasets\test\images\04_20201026_531561.jpg
Skipping already processed file: D:/NOA/시설작물모델/output/datasets\test\images\04_20201026_532218.jpg
Skipping already processed file: D:/NOA/시설작물모델/output/datasets\test\images\04_20201026_532770.jpg
Skipping already processed file: D:/NOA/시설작물모델/output/datasets\test\images\04_20201026_532549.jpg
Skipping already processed file: D:/NOA/시설작물모델/output/datasets\test\images\04_20201026_532566.jpg
Skipping already pr

#### cpu가 좋을 때 ProcessPoolExecutor

In [6]:
import os
import json
from tqdm import tqdm
import cv2
from concurrent.futures import ProcessPoolExecutor, as_completed

# 전역 함수로 이동
def process_image(image_path, json_path, split, folder_path, image_size):
    try:
        parent_folder = os.path.join(folder_path, split)
        images_folder = os.path.join(parent_folder, 'images')
        labels_folder = os.path.join(parent_folder, 'labels')
        os.makedirs(images_folder, exist_ok=True)
        os.makedirs(labels_folder, exist_ok=True)

        img_name = os.path.splitext(os.path.basename(image_path))[0]
        txt_file = os.path.join(labels_folder, f"{img_name}.txt")
        img_resized_path = os.path.join(images_folder, f"{img_name}.jpg")

        if os.path.exists(txt_file) and os.path.exists(img_resized_path):
            print(f"Skipping already processed file: {image_path}")
            return

        with open(json_path, 'r') as json_file:
            json_data = json.load(json_file)

        if not json_data.get('annotations'):
            print(f"No annotations found for {image_path}, skipping...")
            return

        categories = {cat['id']: cat['name'] for cat in json_data['categories']}
        img_resized, labels = resize_image_and_annotations(image_path, json_data['annotations'], categories, (image_size, image_size))

        cv2.imwrite(img_resized_path, img_resized)

        with open(txt_file, 'w') as f:
            for label in labels:
                f.write(f"{label}\n")

    except Exception as e:
        print(f"Exception occurred while processing {image_path}: {e}")

def resize_image_and_annotations(image_path, annotations, categories, target_size):
    img = cv2.imread(image_path)
    img_resized = cv2.resize(img, target_size, interpolation=cv2.INTER_LANCZOS4)

    width_ratio = target_size[0] / img.shape[1]
    height_ratio = target_size[1] / img.shape[0]

    label_mapping = {'화방': 1, '줄기': 2, '잎': 3, '열매': 0}

    labels = []
    for ann in annotations:
        x, y, w, h = ann['bbox']
        category_id = ann['category_id']
        category_name = categories.get(category_id, '')

        label = label_mapping.get(category_name, -1)
        if label == -1:
            continue

        x_resized = x * width_ratio
        y_resized = y * height_ratio
        w_resized = w * width_ratio
        h_resized = h * height_ratio

        x_center = (x_resized + w_resized / 2) / target_size[0]
        y_center = (y_resized + h_resized / 2) / target_size[1]
        box_width = w_resized / target_size[0]
        box_height = h_resized / target_size[1]

        label_str = f"{label} {x_center} {y_center} {box_width} {box_height}"
        labels.append(label_str)

    return img_resized, labels



In [7]:
class CustomDatasetYOLO:
    def __init__(self, folder_path, image_size=416, transforms=None):
        self.folder_path = folder_path
        self.transforms = transforms
        self.image_size = image_size
        self.data_pairs = self._load_data_pairs()

    def _load_data_pairs(self):
        image_files = []
        json_files = {}
        data_pairs = []

        for root, _, files in os.walk(self.folder_path):
            for file in files:
                if file.endswith((".jpg", ".jpeg", ".png")):
                    image_files.append(os.path.join(root, file))
                elif file.endswith(".json"):
                    json_files[os.path.splitext(file)[0]] = os.path.join(root, file)

        for image_file_path in tqdm(image_files, desc="Loading data pairs"):
            image_name = os.path.splitext(os.path.basename(image_file_path))[0]

            if image_name in json_files:
                json_file_path = json_files[image_name]
                data_pairs.append((image_file_path, json_file_path))
            else:
                print(f"JSON file not found for image: {image_file_path}")

        return data_pairs

    def create_label_files(self, max_workers=4):
        with ProcessPoolExecutor(max_workers=max_workers) as executor:
            futures = []
            for image_path, json_path in self.data_pairs:
                split = image_path.split(os.sep)[-3]  # 'train', 'val', 'test'
                futures.append(executor.submit(process_image, image_path, json_path, split, self.folder_path, self.image_size))

            for future in as_completed(futures):
                try:
                    future.result()
                except Exception as e:
                    print(f"Exception occurred: {e}")

        print("YOLO 라벨 파일 생성이 완료되었습니다.")



In [9]:
if __name__ == "__main__":
    dataset = CustomDatasetYOLO('D:/NOA/시설작물모델/output/datasets', image_size=640)
    dataset.create_label_files(max_workers=2)


Loading data pairs: 100%|██████████| 38562/38562 [00:00<00:00, 222191.81it/s]


Exception occurred: A process in the process pool was terminated abruptly while the future was running or pending.
Exception occurred: A process in the process pool was terminated abruptly while the future was running or pending.
Exception occurred: A process in the process pool was terminated abruptly while the future was running or pending.
Exception occurred: A process in the process pool was terminated abruptly while the future was running or pending.
Exception occurred: A process in the process pool was terminated abruptly while the future was running or pending.
Exception occurred: A process in the process pool was terminated abruptly while the future was running or pending.
Exception occurred: A process in the process pool was terminated abruptly while the future was running or pending.
Exception occurred: A process in the process pool was terminated abruptly while the future was running or pending.
Exception occurred: A process in the process pool was terminated abruptly while 