In [None]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os 
from glob import glob
from tqdm import tqdm
import json
import cv2
import shutil


In [None]:
def polygon2mask(polygon, mask, value):
    # polygon이 None이거나 비어있는 경우 처리
    if polygon[0][0] is None or len(polygon) == 0:
        return mask
    
    # polygon 내부에 None 값이 있는지 확인하고 필터링
    try:
        # None 값이 포함된 좌표 제거
        filtered_polygon = []
        for coord in polygon:
            if coord is not None and len(coord) == 2:
                if coord[0] is not None and coord[1] is not None:
                    filtered_polygon.append(coord)
        
        if len(filtered_polygon) < 3:  # 폴리곤을 만들기 위해 최소 3개 점 필요
            return mask
            
        polygon_array = np.array(filtered_polygon).reshape((-1, 1, 2)).astype(np.int32)
        mask_copy = np.ascontiguousarray(mask)
        cv2.fillPoly(mask_copy, [polygon_array], value)
        return mask_copy
        
    except (ValueError, TypeError) as e:
        print(f"Warning: Invalid polygon data - {e}")
        return mask

def create_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [None]:
#Stomach
integrated_class={
    "Background":0,
    "NT_stroma":1,
    "NT_Muscle":2,
    "NT_immune":3,
    "NT_epithelial":4,
    "NT_gastritis":5,
    "NT_intestinal_metaplasia":6,
    "Tumor":7,
    "Tumor_diffuse":8,
    "Tumor_intestinal":9
}
image_list=glob('../../data/NIPA/ST*/*.jpeg')
json_list=[f.replace('.jpeg','.json') for f in image_list]
class_count=len(integrated_class)
image_path='../../data/NIPA/stomach/images/'
mask_path='../../data/NIPA/stomach/masks/'
create_directory(image_path)
create_directory(mask_path)
for i in tqdm(range(len(image_list))):
    image=Image.open(image_list[i])
    width, height=image.size
    json_file=json.load(open(json_list[i]))
    mask=np.zeros((height,width,class_count),dtype=np.uint8)
    for j in range(len(json_file['content']['file']['objects'])):
        label_name=json_file['content']['file']['objects'][j]['label_nm']
        if label_name=="Cell_nucleus":
            continue
        channel_index=integrated_class[label_name]
        polygon=json_file['content']['file']['objects'][j]['coordinate']
        temp_mask = mask[:,:,channel_index].copy()
        mask[:,:,channel_index]=polygon2mask(polygon,temp_mask,1)
    mask[:,:,0]=1-(mask[:,:,1]|mask[:,:,2]|mask[:,:,3]|mask[:,:,4]|mask[:,:,5]|mask[:,:,6]|mask[:,:,7]|mask[:,:,8]|mask[:,:,9])
    total_mask = mask.sum(axis=-1) 
    if total_mask.max()>1:
        overlap_indices = np.where(total_mask>1)
        for idx in zip(*overlap_indices):
            # 우선순위: Tumor > NT_gastritis > NT_intestinal_metaplasia > NT_epithelial > NT_immune > NT_Muscle > NT_stroma
            # 가장 높은 우선순위 클래스만 남기고 나머지는 0으로 설정
            
            # Tumor 계열이 있으면 Tumor만 남기기
            if mask[idx[0], idx[1], 7] == 1 or mask[idx[0], idx[1], 8] == 1 or mask[idx[0], idx[1], 9] == 1:
                mask[idx[0], idx[1], 1:7] = 0  # NT 계열 모두 제거
                # Tumor 계열 중에서도 우선순위 적용 (Tumor_intestinal > Tumor_diffuse > Tumor)
                if mask[idx[0], idx[1], 9] == 1:  # Tumor_intestinal이 최우선
                    mask[idx[0], idx[1], 7] = 0
                    mask[idx[0], idx[1], 8] = 0
                elif mask[idx[0], idx[1], 8] == 1:  # Tumor_diffuse가 두번째
                    mask[idx[0], idx[1], 7] = 0
                    
            # Tumor가 없고 NT_gastritis가 있으면
            elif mask[idx[0], idx[1], 5] == 1:
                mask[idx[0], idx[1], 1:5] = 0
                mask[idx[0], idx[1], 6] = 0
                
            # NT_intestinal_metaplasia가 있으면
            elif mask[idx[0], idx[1], 6] == 1:
                mask[idx[0], idx[1], 1:6] = 0
                
            # NT_epithelial이 있으면
            elif mask[idx[0], idx[1], 4] == 1:
                mask[idx[0], idx[1], 1:4] = 0
                
            # NT_immune이 있으면
            elif mask[idx[0], idx[1], 3] == 1:
                mask[idx[0], idx[1], 1:3] = 0
                
            # NT_Muscle이 있으면
            elif mask[idx[0], idx[1], 2] == 1:
                mask[idx[0], idx[1], 1] = 0
    shutil.copy(image_list[i],image_path+os.path.basename(image_list[i]))
    np.save(mask_path+os.path.basename(image_list[i]).replace('.jpeg','.npy'),mask)


In [None]:
#breast
integrated_class={
    "Background":0,
    "NT_stroma": 1,
    "NT_epithelial":2,
    "NT_immune": 3,
    "Tumor": 4,
    "TP_invasive": 5,
    "TP_in_situ": 6,
    
}
image_list=glob('../../data/NIPA/BR*/*.jpeg')
json_list=[f.replace('.jpeg','.json') for f in image_list]
class_count=len(integrated_class)
image_path='../../data/NIPA/breast/images/'
mask_path='../../data/NIPA/breast/masks/'
create_directory(image_path)
create_directory(mask_path)
for i in tqdm(range(len(image_list))):
    image=Image.open(image_list[i])
    width, height=image.size
    json_file=json.load(open(json_list[i]))
    mask=np.zeros((height,width,class_count),dtype=np.uint8)
    for j in range(len(json_file['content']['file']['objects'])):
        label_name=json_file['content']['file']['objects'][j]['label_nm']
        if label_name=="Cell_nucleus":
            continue
        if label_name=="Stroma":
            label_name="NT_stroma"
        channel_index=integrated_class[label_name]
        polygon=json_file['content']['file']['objects'][j]['coordinate']
        temp_mask = mask[:,:,channel_index].copy()
        mask[:,:,channel_index]=polygon2mask(polygon,temp_mask,1)
    mask[:,:,0]=1-(mask[:,:,1]|mask[:,:,2]|mask[:,:,3]|mask[:,:,4]|mask[:,:,5]|mask[:,:,6])
    total_mask = mask.sum(axis=-1) 
    if total_mask.max()>1:
        overlap_indices = np.where(total_mask>1)
        for idx in zip(*overlap_indices):
            # 우선순위: TP_invasive > TP_in_situ > Tumor > NT_epithelial > NT_immune > NT_stroma
            # 가장 높은 우선순위 클래스만 남기고 나머지는 0으로 설정
            
            # TP_invasive가 있으면 다른 모든 클래스 제거
            if mask[idx[0], idx[1], 5] == 1:
                mask[idx[0], idx[1], 1:5] = 0  # NT 계열과 Tumor 모두 제거
                mask[idx[0], idx[1], 6] = 0    # TP_in_situ 제거
                
            # TP_invasive가 없고 TP_in_situ가 있으면
            elif mask[idx[0], idx[1], 6] == 1:
                mask[idx[0], idx[1], 1:5] = 0  # NT 계열과 Tumor 모두 제거
                
            # Tumor 계열이 없고 Tumor가 있으면
            elif mask[idx[0], idx[1], 4] == 1:
                mask[idx[0], idx[1], 1:4] = 0  # NT 계열 모두 제거
                
            # NT_epithelial이 있으면
            elif mask[idx[0], idx[1], 2] == 1:
                mask[idx[0], idx[1], 1] = 0    # NT_stroma 제거
                mask[idx[0], idx[1], 3] = 0    # NT_immune 제거
                
            # NT_immune이 있으면
            elif mask[idx[0], idx[1], 3] == 1:
                mask[idx[0], idx[1], 1] = 0    # NT_stroma 제거
    shutil.copy(image_list[i],image_path+os.path.basename(image_list[i]))
    np.save(mask_path+os.path.basename(image_list[i]).replace('.jpeg','.npy'),mask)