In [None]:
import openslide as ops
from glob import glob
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import cv2
import anndata as ad
import pandas as pd
import h5py
import json
from tqdm import tqdm
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [None]:
class_list = {
    0: "Tumor epithelial",
    1: "Non-tumor epithelial",
    2: "Basal/Myoepithelial",
    3: "Smooth muscle",
    4: "Fibroblast",
    5: "Endothelial",
    6: "T cell",
    7: "B cell",
    8: "Plasma cell",
    9: "Myeloid",
    10: "Adipocyte",
    11: "Other/Unknown"
}
class_list_inv = {v: k for k, v in class_list.items()}
marker_genes = {
    'Tumor epithelial': [
        b'EPCAM', b'KRT14', b'KRT5', b'KRT23', b'ERBB2', b'MKI67', b'GATA3'
    ],
    'Non-tumor epithelial': [
        b'EPCAM', b'KRT8', b'KRT18', b'KRT19', b'CDH1'
    ],
    'Basal/Myoepithelial': [
        b'KRT5', b'KRT14', b'ACTA2', b'MYL9'
    ],
    'Smooth muscle': [
        b'ACTA2', b'MYH11', b'TAGLN', b'MYLK', b'MYL9'
    ],
    'Fibroblast': [
        b'PDGFRA', b'PDGFRB', b'DPT', b'LUM', b'SFRP1', b'FBLN1', b'SFRP4'
    ],
    'Endothelial': [
        b'PECAM1', b'KDR', b'CD93', b'EGFL7', b'VWF', b'CLEC14A'
    ],
    'T cell': [
        b'CD3D', b'CD3E', b'CD3G', b'NKG7', b'GZMA', b'CCL5', b'TRAC', b'TCF7', b'LTB', b'IL2RG'
    ],
    'B cell': [
        b'CD79A', b'CD79B', b'MS4A1', b'MZB1', b'CD19', b'PAX5'
    ],
    'Plasma cell': [
        b'MZB1', b'PRDM1', b'TNFRSF17'
    ],
    'Myeloid': [
        b'CD68', b'CD14', b'CD163', b'MRC1', b'C1QA', b'AIF1',
        b'S100A8', b'CD86', b'ITGAX', b'TPSAB1', b'CPA3', b'KIT'
    ],
    'Adipocyte': [
        b'ADIPOQ', b'LPL', b'PPARG'
    ],
    'Other/Unknown': []
}
class_colors_hex = {
    "Tumor epithelial": "#FF0000",
    "Non-tumor epithelial": "#FFB6C1",
    "Basal/Myoepithelial": "#FFA500",
    "Smooth muscle": "#8B4513",
    "Fibroblast": "#00FF00",
    "Endothelial": "#0000FF",
    "T cell": "#FFFF00",
    "B cell": "#FF00FF",
    "Plasma cell": "#9400D3",
    "Myeloid": "#00FFFF",
    "Adipocyte": "#FFC0CB",
    "Other/Unknown": "#808080"
}
class_colors = {
    "Tumor epithelial": [255, 0, 0],        # 빨강 - 종양
    "Non-tumor epithelial": [255, 182, 193],  # 연한 분홍 - 정상 상피
    "Basal/Myoepithelial": [255, 165, 0],   # 주황 - 기저/근상피
    "Smooth muscle": [139, 69, 19],          # 갈색 - 평활근
    "Fibroblast": [0, 255, 0],               # 초록 - 섬유아세포
    "Endothelial": [0, 0, 255],              # 파랑 - 혈관내피
    "T cell": [255, 255, 0],                 # 노랑 - T세포
    "B cell": [255, 0, 255],                 # 마젠타 - B세포
    "Plasma cell": [148, 0, 211],            # 보라 - 형질세포
    "Myeloid": [0, 255, 255],                # 시안 - 골수계
    "Adipocyte": [255, 192, 203],            # 분홍 - 지방세포
    "Other/Unknown": [128, 128, 128]         # 회색 - 기타/미분류
}


In [None]:
wsi_list=glob('../../data/Brest_spatialTranscriptome/preprocessed_xenium/wsis/*.tif')
annotation_list=[f.replace('/wsis','/labels').replace('.tif','.csv') for f in wsi_list]
metadata_list=[f.replace('/wsis','/metadata').replace('.tif','.json') for f in wsi_list]
coord_list=[f.replace('/wsis','/patches').replace('.tif','.h5') for f in wsi_list]


In [None]:

patch_image_size=512
origin_patch_image_size=1024
for i in range(len(wsi_list)):
    slide=ops.OpenSlide(wsi_list[i])
    width, height = slide.dimensions
    metadata_file=metadata_list[i]
    annotation_file=annotation_list[i]
    coord_file=coord_list[i]
    with h5py.File(coord_file, "r") as f:
        coords = f["coords"][:]
    with open(metadata_file, 'r') as f:
        metadata = f.read()
    metadata = json.loads(metadata)
    if metadata['magnification']=='20x':
        magnification=2
    if metadata['magnification']=='40x':
        magnification=1
    else:
        print("Unsupported magnification", metadata['magnification'])
    annotation_df=pd.read_csv(annotation_file)
    for row in tqdm(range(height//(origin_patch_image_size//magnification))):
        for col in range(width//(origin_patch_image_size//magnification)):
            use_row_index=np.where((coords[:,1]>=row*(origin_patch_image_size//magnification)) & (coords[:,1]<(row+1)*(origin_patch_image_size//magnification)))[0]
            use_col_index=np.where((coords[:,0]>=col*(origin_patch_image_size//magnification)) & (coords[:,0]<(col+1)*(origin_patch_image_size//magnification)))[0]
            annotation=annotation_df.iloc[np.intersect1d(use_row_index,use_col_index)]
            filter_df=annotation_df.loc[(annotation_df['x1']>col*(origin_patch_image_size//magnification)) & (annotation_df['x1']<(col+1)*(origin_patch_image_size//magnification))]
            filter_df=filter_df.loc[(filter_df['y1']>row*(origin_patch_image_size//magnification)) & (filter_df['y1']<(row+1)*(origin_patch_image_size//magnification))]
            if filter_df.shape[0]==0:
                continue
            if len(use_row_index)==0 or len(use_col_index)==0:
                continue
            patch=slide.read_region(
                (col*(origin_patch_image_size//magnification), row*(origin_patch_image_size//magnification)),
                0,
                (origin_patch_image_size//magnification, origin_patch_image_size//magnification)
            ).convert("RGB")
            tissue_patch_x=col*(origin_patch_image_size//magnification)-((origin_patch_image_size//magnification))//2-((origin_patch_image_size//magnification))
            tissue_patch_y=row*(origin_patch_image_size//magnification)-((origin_patch_image_size//magnification))//2-((origin_patch_image_size//magnification))
            if tissue_patch_x<0:
                tissue_patch_x=0
            if tissue_patch_y<0:
                tissue_patch_y=0
            if tissue_patch_x+(origin_patch_image_size//magnification)*4>width:
                tissue_patch_x=width - (origin_patch_image_size//magnification)*4
            if tissue_patch_y+(origin_patch_image_size//magnification)*4>height:
                tissue_patch_y=height - (origin_patch_image_size//magnification)*4
            tissue_patch=slide.read_region(
                (tissue_patch_x, tissue_patch_y),
                0,
                ((origin_patch_image_size//magnification)*4, (origin_patch_image_size//magnification)*4)
            ).convert("RGB")
            patch=patch.resize((patch_image_size,patch_image_size))
            tissue_patch=tissue_patch.resize((patch_image_size,patch_image_size))
            pre_df=pd.DataFrame(columns=['x','y','w','h','class'])
            for k in range(len(filter_df)): #x,y,w,h 
                cell_class=filter_df.iloc[k]['class_name']
                y=int((filter_df.iloc[k]['y1']+filter_df.iloc[k]['y2'])//2 - row*(origin_patch_image_size//magnification))/(origin_patch_image_size//magnification)
                x=int((filter_df.iloc[k]['x1']+filter_df.iloc[k]['x2'])//2 - col*(origin_patch_image_size//magnification))/(origin_patch_image_size//magnification)
                w=int((filter_df.iloc[k]['x2'] - filter_df.iloc[k]['x1']))/(2/magnification)/patch_image_size
                h=int((filter_df.iloc[k]['y2'] - filter_df.iloc[k]['y1']))/(2/magnification)/patch_image_size
                if y>1:
                    y=1
                if x>1:
                    x=1
                pre_df.loc[len(pre_df)] = {'x':x, 'y':y, 'w':w, 'h':h, 'class':class_list_inv[cell_class]}
            save_image_dir=f'../../data/Brest_spatialTranscriptome/preprocessed_xenium/patch_train_data/{os.path.basename(wsi_list[i]).replace(".tif","")}/image/'
            save_annotation_dir=f'../../data/Brest_spatialTranscriptome/preprocessed_xenium/patch_train_data/{os.path.basename(wsi_list[i]).replace(".tif","")}/annotation/'
            save_tissue_dir=f'../../data/Brest_spatialTranscriptome/preprocessed_xenium/patch_train_data/{os.path.basename(wsi_list[i]).replace(".tif","")}/tissue_image/'
            create_dir(save_image_dir)
            create_dir(save_annotation_dir)
            create_dir(save_tissue_dir)
            patch.save(f'{save_image_dir}/patch_{row*(origin_patch_image_size//magnification)}_{col*(origin_patch_image_size//magnification)}.png')
            tissue_patch.save(f'{save_tissue_dir}/patch_{row*(origin_patch_image_size//magnification)}_{col*(origin_patch_image_size//magnification)}.png')
            pre_df.to_csv(f'{save_annotation_dir}/patch_{row*(origin_patch_image_size//magnification)}_{col*(origin_patch_image_size//magnification)}.csv', index=False)