In [None]:
from glob import glob
import os
from tqdm import tqdm  
import shutil
import pandas as pd
import openslide
import numpy as np
import cv2
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
import json
def create_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [None]:
tif_list=glob('../../data/beetle/images/**/wsis/*.tif')
json_list=glob('../../data/beetle/annotations/jsons/*.json')
save_path='../../data/beetle/total_matched/'
create_directory(save_path + 'images/')
create_directory(save_path + 'jsons/')
# 파일명만 추출 (확장자 제거)
tif_names = {os.path.splitext(os.path.basename(f))[0] for f in tif_list}
json_names = {os.path.splitext(os.path.basename(f))[0] for f in json_list}

# 겹치는 파일명 찾기
common_names = tif_names & json_names

# 겹치는 파일만 필터링
tif_list = [f for f in tif_list if os.path.splitext(os.path.basename(f))[0] in common_names]
json_list = [f for f in json_list if os.path.splitext(os.path.basename(f))[0] in common_names]

print(f"Total matched files: {len(common_names)}")
print(f"TIF files: {len(tif_list)}")
print(f"JSON files: {len(json_list)}")
for i in tqdm(range(len(tif_list))):
    shutil.move(tif_list[i], save_path + 'images/'+os.path.basename(tif_list[i]))
    shutil.move(json_list[i], save_path + 'jsons/'+os.path.basename(json_list[i]))

In [None]:
tif_list=glob('../../data/beetle/total_matched/images/*.tif')
json_list=[f.replace('/images/', '/jsons/').replace('.tif', '.json') for f in tif_list]
class_list={'other':1,'non-invasive epithelium':2,'invasive epithelium':3,'necrosis':4}
origin_mpp=0.5 #20x
resize_mpp=2.0 #5x
patch_image_size=512
wsi_info=[]
shrinkage_rate = origin_mpp / resize_mpp
i=9
with open(json_list[i]) as f:
    json_data = json.load(f)
tif_path=tif_list[i]
slide = openslide.OpenSlide(tif_path)
width, height = slide.dimensions
thumbslide = slide.get_thumbnail((width//64,height//64))
thumbslide_np = np.array(thumbslide)

# annotation 시각화
overlay = thumbslide_np.copy()
scale_factor = thumbslide_np.shape[1] / width  # thumbnail과 원본 이미지 간의 스케일

# 클래스별 색상 지정
colors = {
    'other': (255, 255, 0),  # 노란색
    'non-invasive epithelium': (0, 255, 0),  # 초록색
    'invasive epithelium': (255, 0, 0),  # 빨간색
    'necrosis': (128, 0, 128)  # 보라색
}

# JSON에서 annotation 추출 및 그리기

for feature in json_data:
  
    coords = feature['coordinates']
    # 좌표를 thumbnail 크기에 맞게 스케일링
    scaled_coords = [(int(x * scale_factor), int(y * scale_factor)) for x, y in coords]
    pts = np.array(scaled_coords, np.int32)
    pts = pts.reshape((-1, 1, 2))
    
    # 클래스 이름 가져오기
    class_name = feature['label'].get('name', 'other')
    color = colors.get(class_name, (255, 255, 255))
    
    # polygon 그리기
    cv2.polylines(overlay, [pts], True, color, 2)

# 시각화
plt.figure(figsize=(15, 15))
plt.imshow(overlay)
plt.title(f'WSI with Annotations: {os.path.basename(tif_path)}')
plt.axis('off')
plt.tight_layout()
plt.show()


In [None]:
patch_save_path='../../data/beetle/total_matched_patches/images/'
patch_mask_save_path='../../data/beetle/total_matched_patches/masks/'
create_directory(patch_save_path)
create_directory(patch_mask_save_path)
tif_list=glob('../../data/beetle/total_matched/images/*.tif')
json_list=[f.replace('/images/', '/jsons/').replace('.tif', '.json') for f in tif_list]
class_list={'non-invasive epithelium':1,'invasive epithelium':2,'necrosis':3}
origin_mpp=0.5 #20x
resize_mpp=2.0 #5x
patch_image_size=512
wsi_info=[]
shrinkage_rate = origin_mpp / resize_mpp
overlap_size = 128

# 패치 크기 계산
patch_size_original = int(patch_image_size / shrinkage_rate)  # 원본 해상도에서의 패치 크기
for wsi_idx in tqdm(range(len(tif_list))):
    slide = openslide.OpenSlide(tif_list[wsi_idx])
    width, height = slide.dimensions
    base_filename = os.path.splitext(os.path.basename(tif_list[wsi_idx]))[0]
    mask_1 = np.zeros((height, width), dtype=np.uint8)
    mask_2 = np.zeros((height, width), dtype=np.uint8)
    mask_3 = np.zeros((height, width), dtype=np.uint8) 
    with open(json_list[wsi_idx]) as f:
        json_data = json.load(f)

    patch_count = 0

    for feature_idx, feature in enumerate(json_data):
        class_name = feature['label'].get('name', 'other')
        if class_name == 'other' or class_name not in class_list:
            continue
        
        coords = feature['coordinates']
        pts = np.array(coords, np.int32)
        if class_name=='non-invasive epithelium':
            cv2.fillPoly(mask_1, [pts], 255)
        elif class_name=='invasive epithelium':
            cv2.fillPoly(mask_2, [pts], 255)
        elif class_name=='necrosis':
            cv2.fillPoly(mask_3, [pts], 255)
    mask = np.stack([mask_1, mask_2, mask_3], axis=-1)
    for row in range(0, height-patch_size_original + overlap_size, patch_size_original - overlap_size):
        for col in range(0, width-patch_size_original + overlap_size, patch_size_original - overlap_size):
            # 패치의 실제 크기 계산 (이미지가 경계를 넘지 않도록)
            end_row = min(row + patch_size_original, height)
            end_col = min(col + patch_size_original, width)
            start_row = max(end_row - patch_size_original, 0)
            start_col = max(end_col - patch_size_original, 0)
            if mask[start_row:end_row, start_col:end_col, :].sum() == 0:
                continue  # 마스크에 관심 영역이 없으면 패치 저장하지 않음
            # WSI에서 패치 추출
            patch = slide.read_region((start_col, start_row), 0, (end_col - start_col, end_row - start_row)).convert("RGB")
            patch_np = np.array(patch)
            
            # 마스크에서 패치 추출
            patch_mask = mask[start_row:end_row, start_col:end_col, :]
            
            # 리사이즈
            patch_resized = cv2.resize(patch_np, (patch_image_size, patch_image_size), interpolation=cv2.INTER_LINEAR)
            patch_mask_resized = cv2.resize(patch_mask, (patch_image_size, patch_image_size), interpolation=cv2.INTER_NEAREST)
            
            # 저장
            patch_filename = f"{base_filename}/patch_{patch_count:04d}.png"
            mask_filename = f"{base_filename}/patch_{patch_count:04d}_mask.png"
            create_directory(os.path.join(patch_save_path, base_filename))
            create_directory(os.path.join(patch_mask_save_path, base_filename))
            cv2.imwrite(os.path.join(patch_save_path, patch_filename), cv2.cvtColor(patch_resized, cv2.COLOR_RGB2BGR))
            cv2.imwrite(os.path.join(patch_mask_save_path, mask_filename), patch_mask_resized)
            
            patch_count += 1


In [None]:
patch_save_path

In [None]:
len(glob('../../data/beetle/total_matched_patches/images/**/*.png'))