In [3]:
import openslide as ops
from glob import glob
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import cv2
import anndata as ad
import pandas as pd
import h5py
import json
from tqdm import tqdm
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [4]:
class_list = {
    0: "epithelial",
    1: "Basal/Myoepithelial",
    2: "Smooth muscle",
    3: "Fibroblast",
    4: "Endothelial",
    5: "Lymphocyte",                # T + B 통합
    6: "Plasma cell",
    7: "Macrophage/Histiocyte",     # 통합
    8: "Neutrophil",
    9: "Adipocyte",
    10: "Other/Unknown"
}
marker_genes = {
    "epithelial": [
         "EPCAM", "KRT8", "KRT18", "KRT19",
    "ERBB2", "MKI67", "GATA3", "CDH1",
    "CLDN4"
    ],


    "Basal/Myoepithelial": [
       "MYH11", "ACTA2", "MYLK",
        "TAGLN", "CNN1", "MYL9"
    ],

    "Smooth muscle": [
        "MYH11", "ACTA2", "MYLK"
    ],

    "Fibroblast": [
        "PDGFRA", "PDGFR", "DPT", "LUM",
    "SFRP1", "FBLN1", "SFRP4", "POSTN",
    "COL1A1", "COL1A2", "COL3A1", "DCN", "THY1"
    ],

    "Endothelial": [
         "PECAM1", "KDR", "CD93", "EGFL7",
    "VWF", "CLEC14A", "MMRN2", "ESM1",
    "CD34", "CDH5"
    ],

    "Lymphocyte": [  # T + B 통합
        "CD3E", "CD3G",
        "GZMA", "GZMK", "NKG7", "CCL5",
        "TRAC", "TCF7", "LT", "IL2RG",
        "CD4", "CD8A",
        "CD79A", "CD79", "MS4A1", "CD19",
        "CD69", "CXCR4", "CCR7", "SELL"
    ],

    "Plasma cell": [
        "MZB1", "PRDM1", "TNFRSF17", "SLAMF7",
    "XBP1", "SDC1", "JCHAIN", "IRF4"
    ],

    "Macrophage/Histiocyte": [
        "CD68", "CD163", "MRC1", "C1QA",
    "AIF1", "CD14", "FCGR3A", "CX3CR1",
    "LST1", "CSF1R", "TYROBP"
    ],

    "Neutrophil": [
         "S100A8", "S100A9", "LYZ",
    "CEACAM8", "MPO", "ELANE"
    ],

    "Adipocyte": [
         "ADIPOQ", "LPL", "PPARG",
    "FABP4", "PLIN1", "CEBPA", "LEP"
    ],

    "Other/Unknown": []
}

class_colors_hex = {
    "epithelial": "#FF0000",        # 빨강
    "Basal/Myoepithelial": "#FFA500",     # 주황
    "Smooth muscle": "#8B4513",           # 갈색
    "Fibroblast": "#00FF00",              # 초록
    "Endothelial": "#0000FF",             # 파랑
    "Lymphocyte": "#FFFF00",              # 노랑 (T/B lymphocyte 통합)
    "Plasma cell": "#9400D3",             # 보라
    "Macrophage/Histiocyte": "#00FFFF",   # 시안(청록)
    "Neutrophil": "#1E90FF",              # DodgerBlue (밝은 파랑)
    "Adipocyte": "#FFC0CB",               # 핑크
    "Other/Unknown": "#808080"            # 회색
}
class_colors = {
    "epithelial": [255, 0, 0],            # 빨강 - 종양 상피
    "Basal/Myoepithelial": [255, 165, 0],       # 주황
    "Smooth muscle": [139, 69, 19],             # 갈색
    "Fibroblast": [0, 255, 0],                  # 초록
    "Endothelial": [0, 0, 255],                 # 파랑
    "Lymphocyte": [255, 255, 0],                # 노랑 (T/B 통합)
    "Plasma cell": [148, 0, 211],               # 보라
    "Macrophage/Histiocyte": [0, 255, 255],     # 시안 (청록)
    "Neutrophil": [30, 144, 255],               # 도저블루 (밝은 파랑)
    "Adipocyte": [255, 192, 203],               # 핑크
    "Other/Unknown": [128, 128, 128]            # 회색
}



class_list_inv = {v: k for k, v in class_list.items()}


In [None]:
wsi_list=glob('../../data/spatialTranscriptome/preprocessed_xenium/wsis/TENX*.tif')
annotation_list=[f.replace('/wsis','/labels').replace('.tif','.csv') for f in wsi_list]
metadata_list=[f.replace('/wsis','/metadata').replace('.tif','.json') for f in wsi_list]
coord_list=[f.replace('/wsis','/patches').replace('.tif','.h5') for f in wsi_list]

In [None]:

with open(metadata_list[0], 'r') as f:
    metadata = f.read()
metadata = json.loads(metadata)
metadata['pixel_size_um_estimated']

In [None]:

x20_mpp=0.4250
x5_mpp=1.7000
patch_image_size=512
for i in range(len(wsi_list)):
    slide=ops.OpenSlide(wsi_list[i])
    width, height = slide.dimensions
    metadata_file=metadata_list[i]
    annotation_file=annotation_list[i]
    coord_file=coord_list[i]
    with h5py.File(coord_file, "r") as f:
        coords = f["coords"][:]
    with open(metadata_file, 'r') as f:
        metadata = f.read()
    metadata = json.loads(metadata)
    annotation_df=pd.read_csv(annotation_file)
    x_min=coords[:,0].min()+1000
    y_min=coords[:,1].min()+1000
    x_max=coords[:,0].max()-1000
    y_max=coords[:,1].max()-1000
    try:
        slide_mpp=metadata['pixel_size']
    except:
        slide_mpp=metadata['pixel_size_um_embedded']
    x5_magnification=x5_mpp/slide_mpp
    x20_magnification=x20_mpp/slide_mpp
    x20_patch_image_size=int(patch_image_size*x20_magnification)
    tissue_slide=np.array(slide.get_thumbnail((width//x5_magnification, height//x5_magnification)))
    for row in tqdm(range(height//x20_patch_image_size)):
        for col in range(width//x20_patch_image_size):
            if row*(x20_patch_image_size)<y_min or (row+1)*(x20_patch_image_size)>y_max or col*(x20_patch_image_size)<x_min or (col+1)*(x20_patch_image_size)>x_max:
                continue

            filter_df=annotation_df.loc[(annotation_df['x2']>col*(x20_patch_image_size)) & (annotation_df['x1']<(col+1)*(x20_patch_image_size))]
            filter_df=filter_df.loc[(filter_df['y2']>row*(x20_patch_image_size)) & (filter_df['y1']<(row+1)*(x20_patch_image_size))]
            if filter_df.shape[0]<10:
                continue
            
            patch=slide.read_region(
                (col*(x20_patch_image_size), row*(x20_patch_image_size)),
                0,
                (x20_patch_image_size, x20_patch_image_size)
            ).convert("RGB")
            tissue_patch_x=col*(x20_patch_image_size)-((x20_patch_image_size))//2-((x20_patch_image_size))
            tissue_patch_y=row*(x20_patch_image_size)-((x20_patch_image_size))//2-((x20_patch_image_size))
            if tissue_patch_x<0:
                tissue_patch_x=0
            if tissue_patch_y<0:
                tissue_patch_y=0
            if tissue_patch_x+patch_image_size>width:
                tissue_patch_x=width - patch_image_size
            if tissue_patch_y+patch_image_size>height:
                tissue_patch_y=height - patch_image_size
            tissue_patch=tissue_slide[int(tissue_patch_y//x5_magnification):int(tissue_patch_y//x5_magnification+patch_image_size), int(tissue_patch_x//x5_magnification):int(tissue_patch_x//x5_magnification+patch_image_size), :]
            patch=patch.resize((patch_image_size,patch_image_size))
            pre_df=pd.DataFrame(columns=['x','y','w','h','class'])
            for k in range(len(filter_df)): #x,y,w,h 
                cell_class=filter_df.iloc[k]['class_name']
                y=int((filter_df.iloc[k]['y1']+filter_df.iloc[k]['y2'])//2 - row*(x20_patch_image_size))/(x20_patch_image_size)
                x=int((filter_df.iloc[k]['x1']+filter_df.iloc[k]['x2'])//2 - col*(x20_patch_image_size))/(x20_patch_image_size)
                w=int((filter_df.iloc[k]['x2'] - filter_df.iloc[k]['x1']))/x20_patch_image_size
                h=int((filter_df.iloc[k]['y2'] - filter_df.iloc[k]['y1']))/x20_patch_image_size
                if y>1:
                    y=1
                if x>1:
                    x=1
                if y<0:
                    y=0
                if x<0:
                    x=0    
                pre_df.loc[len(pre_df)] = {'x':x, 'y':y, 'w':w, 'h':h, 'class':class_list_inv[cell_class]}
            save_image_dir=f'../../data/spatialTranscriptome/preprocessed_xenium/patch_train_data/{os.path.basename(wsi_list[i]).replace(".tif","")}/image/'
            save_annotation_dir=f'../../data/spatialTranscriptome/preprocessed_xenium/patch_train_data/{os.path.basename(wsi_list[i]).replace(".tif","")}/annotation/'
            save_tissue_dir=f'../../data/spatialTranscriptome/preprocessed_xenium/patch_train_data/{os.path.basename(wsi_list[i]).replace(".tif","")}/tissue_image/'
            create_dir(save_image_dir)
            create_dir(save_annotation_dir)
            create_dir(save_tissue_dir)
            patch.save(f'{save_image_dir}/patch_{row*x20_patch_image_size}_{col*x20_patch_image_size}.png')
            Image.fromarray(tissue_patch).save(f'{save_tissue_dir}/patch_{row*x20_patch_image_size}_{col*x20_patch_image_size}.png')
            pre_df.to_csv(f'{save_annotation_dir}/patch_{row*x20_patch_image_size}_{col*x20_patch_image_size}.csv', index=False)

SyntaxError: expected 'except' or 'finally' block (1631284936.py, line 22)