In [1]:
import os
import re
import shutil
import cv2
import numpy as np
from PIL import Image


def sorter(text):
    num = re.findall(r'd\+', text)
    return int(num[0]) if num else 0


data_path = '../../Data/ISBI/'
source_img = 'train-volume.tif'
source_lbl = 'train-labels.tif'
source_test_img = 'test-volume.tif'

make_path_list = ['train/image', 'train/label', 'test']

for i in make_path_list:
    path = data_path + i
    # 만약 경로에 파일 존재 시, 파일 새로 생성
    if os.path.exists(path):
        print(f'{path} is already existed')
    else:
       print(f'{path} is created.')
         
       os.makedirs(path)

print()

tif_img = Image.open(os.path.join(data_path, source_img))
tif_lbl = Image.open(os.path.join(data_path, source_lbl))
tif_test_img = Image.open(os.path.join(data_path, source_test_img))


print(f'[Training data] # of image : {tif_img.n_frames} || # of label : {tif_lbl.n_frames}')
print(f'[Training data] (Hight, Width, "channel") || Image : {tif_img.size} || Label : {tif_lbl.size}')
print()
print(f'[Test data] # of image : {tif_test_img.n_frames} || # of label : {tif_test_img.n_frames}')
print(f'[Test data] (Hight, Width, "channel") || Image : {tif_test_img.size}')

../../Data/ISBI/train/image is already existed
../../Data/ISBI/train/label is already existed
../../Data/ISBI/test is already existed

[Training data] # of image : 30 || # of label : 30
[Training data] (Hight, Width, "channel") || Image : (512, 512) || Label : (512, 512)

[Test data] # of image : 30 || # of label : 30
[Test data] (Hight, Width, "channel") || Image : (512, 512)


In [2]:
# frame에서 랜덤하게 idx를 추출하기 위한 index 값 생성
random_idx = np.arange(tif_img.n_frames)
np.random.shuffle(random_idx)

# Validation data 저장할 path 생성
val_paths = ['valid/image', 'valid/label']

for path in val_paths:
    if os.path.exists(os.path.join(data_path, path)):
        pass
    else:
        os.makedirs(os.path.join(data_path, path))
        print(f'{path} is created.')


# train과 valid의 비율이 8:2가 되도록 설정
crit = int(0.8*tif_img.n_frames)
cnt = 1

for i in random_idx:
    tif_img.seek(i)
    tif_lbl.seek(i)
    tif_test_img.seek(i)

    img = tif_img.copy()
    lbl = tif_lbl.copy()
    test_img = tif_test_img.copy()
    
    if cnt <= crit:
        img_save_path = data_path + '/train/image'
        lbl_save_path = data_path + '/train/label'
    else:
        img_save_path = data_path + '/valid/image'
        lbl_save_path = data_path + '/valid/label'

    img.save(img_save_path+f'/image_{cnt-1}.jpg')    
    lbl.save(lbl_save_path+f'/image_{cnt-1}.png')
    test_img.save(data_path+f'/test/image_{cnt-1}.jpg')

    cnt+=1


In [3]:
lbl_path = '../../Data/ISBI/train/label/'


lbl_list = sorted(os.listdir(lbl_path), key=sorter)

total_class = {}

for lbl_name in lbl_list:
    lbl_file_path = os.path.join(lbl_path, lbl_name)
    lbl = cv2.imread(lbl_file_path, cv2.IMREAD_GRAYSCALE)

    # mask 데이터에 존재하는 class 확인
    uniq = np.unique(lbl)

    # mask에 존재하는 class의 픽셀 갯수 카운팅
    for cls in uniq:
        num = np.count_nonzero(lbl == cls)
        # 만약 처음보는 class의 경우 딕셔너리에 추가한 뒤 갯수 기록
        if not num in total_class:
            total_class[cls] = num
        # 이미 있는 class의 경우 값을 업데이트
        else:
            total_class[cls] += num

total_class

{None: 1, 0: 63444, 255: 198700}

In [4]:
def img2patch(image, patch_size):
    """
    Splits an image (either grayscale or RGB) into non-overlapping patches.

    Parameters:
    - image: The input image (grayscale or RGB) to be split.
    - patch_size: Size of the patches to be created.

    Returns:
    - An array of image patches. Shape(patches, Height, Width, (Channel))
    """

    # RGB 이미지인지 마스크 이미지인지 확인
    if len(image.shape) == 2:  # Mask image
        num_patches_axis = image.shape[0] // patch_size
    elif len(image.shape) == 3:  # RGB image
        num_patches_axis = image.shape[0] // patch_size
    else:
        raise ValueError("Invalid image shape. Image should be either 2D (grayscale) or 3D (RGB).")

    # Prepare an array to hold the patches
    patches = []
    
    for i in range(num_patches_axis):
        for j in range(num_patches_axis):
            # 좌 상단부터 우측으로 이동하며 patch를 분할
            start_i = i * patch_size
            end_i = start_i + patch_size
            start_j = j * patch_size
            end_j = start_j + patch_size
            
            # patch 픽셀들 추출
            if len(image.shape) == 2:  # Grayscale image
                patch = image[start_i:end_i, start_j:end_j]
            else:  # RGB image
                patch = image[start_i:end_i, start_j:end_j, :]
            
            # Add the patch to the list
            patches.append(patch)
    
    # numpy 형태로 변환하여 저장
    patches_array = np.stack(patches)
    
    return patches_array

In [5]:
train_img_path = '../../Data/ISBI/train/image'
train_lbl_path = '../../Data/ISBI/train/label'

valid_img_path = '../../Data/ISBI/valid/image/'
valid_lbl_path = '../../Data/ISBI/valid/label/'


for i in range(2):
    s=0
    idx = 0
    if i == 0:
        img_path = train_img_path
        lbl_path = train_lbl_path
    else:
        img_path = valid_img_path
        lbl_path = valid_lbl_path

    new_img_save_path = img_path+'/aug'
    new_lbl_save_path = lbl_path+'/aug'
    paths = [new_img_save_path, new_lbl_save_path]

    for path in paths:
        if os.path.exists(path):
            shutil.rmtree(path)
        
    img_list = sorted(os.listdir(img_path), key=sorter)
    lbl_list = sorted(os.listdir(lbl_path), key=sorter)

    if s == 0:
        for path in paths:
            if not os.path.exists(path):
                os.makedirs(path)
        s+=1

    for j in range(len(img_list)):
        img = cv2.imread(os.path.join(img_path, img_list[j]), cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        lbl = cv2.imread(os.path.join(lbl_path, lbl_list[j]), cv2.IMREAD_GRAYSCALE)

        img_patches = img2patch(img, 256)
        lbl_patches = img2patch(lbl, 256)

        for k in range(len(img_patches)):
            # input_tile = apply_mirroring(img_patches[k], 32)
            input_tile = img_patches[k]
            new_mask = lbl_patches[k]

            cv2.imwrite(new_img_save_path + f'/image_{idx}.jpg', input_tile)
            cv2.imwrite(new_lbl_save_path + f'/image_{idx}.png', new_mask)
            idx += 1         
