In [2]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# 원본 데이터 경로
dic_dir = r"C:\Users\user\Desktop\Cell DIC Mask R-CNN\png_convert\DIC"
mask_dir = r"C:\Users\user\Desktop\Cell DIC Mask R-CNN\png_convert\mask"

# 출력 데이터 경로
output_dir = r"C:\Users\user\Desktop\Cell DIC Mask R-CNN\png_convert\Training_data_512"
train_dic_dir = os.path.join(output_dir, "train", "DIC")
train_mask_dir = os.path.join(output_dir, "train", "mask")
val_dic_dir = os.path.join(output_dir, "val", "DIC")
val_mask_dir = os.path.join(output_dir, "val", "mask")

# 출력 디렉토리 생성
os.makedirs(train_dic_dir, exist_ok=True)
os.makedirs(train_mask_dir, exist_ok=True)
os.makedirs(val_dic_dir, exist_ok=True)
os.makedirs(val_mask_dir, exist_ok=True)

# 원본 파일 리스트 가져오기
dic_files = sorted(os.listdir(dic_dir))
mask_files = sorted(os.listdir(mask_dir))

# Train/Validation Split (80% Train, 20% Validation)
train_dic, val_dic, train_mask, val_mask = train_test_split(
    dic_files, mask_files, test_size=0.2, random_state=42
)

# 오버랩 분할 함수
def split_with_overlap(image, mask, patch_size, overlap):
    """
    이미지를 겹침을 고려하여 패치로 분할.
    """
    img_height, img_width = image.shape[:2]
    step = patch_size - overlap
    patches_img = []
    patches_mask = []

    for y in range(0, img_height - overlap, step):
        for x in range(0, img_width - overlap, step):
            patch_img = image[y:y+patch_size, x:x+patch_size]
            patch_mask = mask[y:y+patch_size, x:x+patch_size]

            # 패치 크기가 설정된 크기보다 작으면 패치 무시
            if patch_img.shape[0] < patch_size or patch_img.shape[1] < patch_size:
                continue

            patches_img.append(patch_img)
            patches_mask.append(patch_mask)

    return patches_img, patches_mask

# 최소 마스크 크기 필터링 함수
def is_mask_large_enough(mask, min_area=50):
    """
    마스크 크기가 최소 면적 이상인지 확인.
    :param mask: 마스크 이미지
    :param min_area: 최소 면적
    :return: 마스크가 충분히 크면 True, 작으면 False
    """
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        area = cv2.contourArea(contour)
        if area >= min_area:
            return True
    return False

# 데이터 증강 함수
def augment_and_save(image, mask, save_dir_image, save_dir_mask, prefix):
    """
    증강된 이미지를 저장
    """
    aug_methods = {
        "hflip": lambda img: cv2.flip(img, 1),
        "vflip": lambda img: cv2.flip(img, 0),
        "rot90": lambda img: np.rot90(img),
        "rot180": lambda img: np.rot90(img, k=2),
        "rot270": lambda img: np.rot90(img, k=3),
    }
    
    for method_name, aug_func in aug_methods.items():
        aug_img = aug_func(image)
        aug_mask = aug_func(mask)
        cv2.imwrite(os.path.join(save_dir_image, f"{prefix}_{method_name}.png"), aug_img)
        cv2.imwrite(os.path.join(save_dir_mask, f"{prefix}_{method_name}.png"), aug_mask)

# 데이터 처리 및 저장
def process_and_save(data_dic, data_mask, dic_dir, mask_dir, save_dic_dir, save_mask_dir, patch_size=512, overlap=256, min_mask_area=50):
    for dic_file, mask_file in zip(data_dic, data_mask):
        dic_path = os.path.join(dic_dir, dic_file)
        mask_path = os.path.join(mask_dir, mask_file)

        # 이미지와 마스크 로드
        img = cv2.imread(dic_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        # 마스크 검증: 모든 픽셀 값이 0인지 확인
        if mask is None or np.all(mask == 0):
            print(f"[INFO] Empty or invalid mask detected, skipping: {mask_file}")
            continue

        # 마스크 크기 필터링
        if not is_mask_large_enough(mask, min_area=min_mask_area):
            print(f"[INFO] Mask too small, skipping: {mask_file}")
            continue

        # 패치로 분할
        patches_img, patches_mask = split_with_overlap(img, mask, patch_size, overlap)

        for idx, (patch_img, patch_mask) in enumerate(zip(patches_img, patches_mask)):
            # 패치 검증: 분할된 패치도 확인
            if np.all(patch_mask == 0) or not is_mask_large_enough(patch_mask, min_area=min_mask_area):
                print(f"[INFO] Empty or small patch detected, skipping: {dic_file}_patch{idx}")
                continue

            prefix = f"{os.path.splitext(dic_file)[0]}_patch{idx}"

            # 원본 패치 저장
            cv2.imwrite(os.path.join(save_dic_dir, f"{prefix}.png"), patch_img)
            cv2.imwrite(os.path.join(save_mask_dir, f"{prefix}.png"), patch_mask)

            # 증강된 이미지 저장
            augment_and_save(patch_img, patch_mask, save_dic_dir, save_mask_dir, prefix)

# Train 데이터 처리
process_and_save(train_dic, train_mask, dic_dir, mask_dir, train_dic_dir, train_mask_dir, min_mask_area=200)

# Validation 데이터 처리
process_and_save(val_dic, val_mask, dic_dir, mask_dir, val_dic_dir, val_mask_dir, min_mask_area=200)

print("Image splitting and augmentation complete!")


[INFO] Empty or small patch detected, skipping: DIC_MIP_19.png_patch0
[INFO] Empty or small patch detected, skipping: DIC_MIP_30.png_patch6
[INFO] Empty or small patch detected, skipping: DIC_MIP_30.png_patch11
[INFO] Empty or small patch detected, skipping: DIC_MIP_30.png_patch12
[INFO] Empty or small patch detected, skipping: DIC_MIP_33.png_patch38
[INFO] Empty or small patch detected, skipping: DIC_MIP_29.png_patch1
[INFO] Empty or small patch detected, skipping: DIC_MIP_36.png_patch0
[INFO] Empty or small patch detected, skipping: DIC_MIP_4.png_patch6
[INFO] Empty or small patch detected, skipping: DIC_MIP_4.png_patch13
[INFO] Empty or small patch detected, skipping: DIC_MIP_4.png_patch20
[INFO] Empty or small patch detected, skipping: DIC_MIP_4.png_patch27
[INFO] Empty or small patch detected, skipping: DIC_MIP_5.png_patch5
Image splitting and augmentation complete!
