In [1]:
import openslide as ops
from glob import glob
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import cv2
import anndata as ad
import pandas as pd
import h5py
import json
def create_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [2]:
class_list = {
    0: "Tumor epithelial",
    1: "Non-tumor epithelial",
    2: "Basal/Myoepithelial",
    3: "Smooth muscle",
    4: "Fibroblast",
    5: "Endothelial",
    6: "T cell",
    7: "B cell",
    8: "Plasma cell",
    9: "Myeloid",
    10: "Adipocyte",
    11: "Other/Unknown"
}
marker_genes = {
    'Tumor epithelial': [
        b'EPCAM', b'KRT14', b'KRT5', b'KRT23', b'ERBB2', b'MKI67', b'GATA3'
    ],
    'Non-tumor epithelial': [
        b'EPCAM', b'KRT8', b'KRT18', b'KRT19', b'CDH1'
    ],
    'Basal/Myoepithelial': [
        b'KRT5', b'KRT14', b'ACTA2', b'MYL9'
    ],
    'Smooth muscle': [
        b'ACTA2', b'MYH11', b'TAGLN', b'MYLK', b'MYL9'
    ],
    'Fibroblast': [
        b'PDGFRA', b'PDGFRB', b'DPT', b'LUM', b'SFRP1', b'FBLN1', b'SFRP4'
    ],
    'Endothelial': [
        b'PECAM1', b'KDR', b'CD93', b'EGFL7', b'VWF', b'CLEC14A'
    ],
    'T cell': [
        b'CD3D', b'CD3E', b'CD3G', b'NKG7', b'GZMA', b'CCL5', b'TRAC', b'TCF7', b'LTB', b'IL2RG'
    ],
    'B cell': [
        b'CD79A', b'CD79B', b'MS4A1', b'MZB1', b'CD19', b'PAX5'
    ],
    'Plasma cell': [
        b'MZB1', b'PRDM1', b'TNFRSF17'
    ],
    'Myeloid': [
        b'CD68', b'CD14', b'CD163', b'MRC1', b'C1QA', b'AIF1',
        b'S100A8', b'CD86', b'ITGAX', b'TPSAB1', b'CPA3', b'KIT'
    ],
    'Adipocyte': [
        b'ADIPOQ', b'LPL', b'PPARG'
    ],
    'Other/Unknown': []
}
class_colors_hex = {
    "Tumor epithelial": "#FF0000",
    "Non-tumor epithelial": "#FFB6C1",
    "Basal/Myoepithelial": "#FFA500",
    "Smooth muscle": "#8B4513",
    "Fibroblast": "#00FF00",
    "Endothelial": "#0000FF",
    "T cell": "#FFFF00",
    "B cell": "#FF00FF",
    "Plasma cell": "#9400D3",
    "Myeloid": "#00FFFF",
    "Adipocyte": "#FFC0CB",
    "Other/Unknown": "#808080"
}
class_colors = {
    "Tumor epithelial": [255, 0, 0],        # 빨강 - 종양
    "Non-tumor epithelial": [255, 182, 193],  # 연한 분홍 - 정상 상피
    "Basal/Myoepithelial": [255, 165, 0],   # 주황 - 기저/근상피
    "Smooth muscle": [139, 69, 19],          # 갈색 - 평활근
    "Fibroblast": [0, 255, 0],               # 초록 - 섬유아세포
    "Endothelial": [0, 0, 255],              # 파랑 - 혈관내피
    "T cell": [255, 255, 0],                 # 노랑 - T세포
    "B cell": [255, 0, 255],                 # 마젠타 - B세포
    "Plasma cell": [148, 0, 211],            # 보라 - 형질세포
    "Myeloid": [0, 255, 255],                # 시안 - 골수계
    "Adipocyte": [255, 192, 203],            # 분홍 - 지방세포
    "Other/Unknown": [128, 128, 128]         # 회색 - 기타/미분류
}


In [3]:
wsi_list=glob('../../data/Brest_spatialTranscriptome/preprocessed_xenium/wsis/*.tif')
annotation_list=[f.replace('/wsis','/labels').replace('.tif','.csv') for f in wsi_list]
metadata_list=[f.replace('/wsis','/metadata').replace('.tif','.json') for f in wsi_list]
coord_list=[f.replace('/wsis','/patches').replace('.tif','.h5') for f in wsi_list]


In [4]:
i=0
slide=ops.OpenSlide(wsi_list[i])
metadata_file=metadata_list[i]
annotation_file=annotation_list[i]
coord_file=coord_list[i]
with h5py.File(coord_file, "r") as f:
    coords = f["coords"][:]
with open(metadata_file, 'r') as f:
    metadata = f.read()
metadata = json.loads(metadata)
annotation_df=pd.read_csv(annotation_file)

In [5]:
annotation_df

Unnamed: 0,x1,y1,x2,y2,class_name
0,17071,6624,17104,6643,T cell
1,17077,6669,17110,6684,Fibroblast
2,17155,6681,17190,6706,Other/Unknown
3,17144,6696,17175,6719,T cell
4,17129,6700,17154,6721,Other/Unknown
...,...,...,...,...,...
141809,9324,30011,9357,30037,Fibroblast
141810,9386,31218,9451,31382,Fibroblast
141811,9327,31198,9432,31368,Fibroblast
141812,9316,31108,9342,31125,Fibroblast
