In [1]:
# Loading the Packages
%reload_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings('ignore')
import os
from pathlib import Path
from tqdm import tqdm
import yaml
import pickle
import re

import numpy as np
import pandas as pd
import scanpy as sc
from tifffile import imread, imwrite

from pprint import pprint
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams.update({
    "pgf.texsystem": "xelatex",      # 使用 XeLaTeX，如果不需要 LaTeX 公式渲染，可以省略
    'font.family': 'serif',          # 字体设置为衬线字体
    'text.usetex': False,            # 禁用 LaTeX，使用 Matplotlib 内置文字渲染
    'pgf.rcfonts': False,            # 禁用 pgf 的默认字体管理
    'pdf.fonttype': 42,              # 确保字体为 TrueType 格式，可被 Illustrator 编辑
    'ps.fonttype': 42,               # EPS 文件也使用 TrueType 格式
    'figure.dpi': 300,               # 设置图形分辨率
    'savefig.dpi': 300,              # 保存的图形文件分辨率
    'axes.unicode_minus': False,     # 避免负号问题
})

In [3]:
# workdir 
BASE_DIR = Path(r'G:\spatial_data')

# data dir
processed_dir = BASE_DIR / 'processed'

# analysis dir
CRT_PJT = '20250222_combined_analysis_of_pseudo_HCC3D'
analysis_dir = BASE_DIR / 'analysis' / CRT_PJT
typ_path = analysis_dir / "cell_typing"
proj_path = analysis_dir / "projection"
proj_path.mkdir(exist_ok=True)

In [4]:
with open(analysis_dir / "project_params.yaml", "r") as file:
    project_params = yaml.safe_load(file)
project_params.keys()

dict_keys(['RUN_ID_LIST', 'image_shape'])

# Imaris merge channels

In [5]:
import os
import shutil
import numpy as np
import subprocess
import tifffile
import xml.etree.ElementTree as ET
from pathlib import Path

def create_ome_xml(channels, size_x, size_y, size_z=1, size_t=1, dtype='uint16'):
    """
    创建符合 OME 标准的 XML 元数据。
    """
    OME_NAMESPACE = "http://www.openmicroscopy.org/Schemas/OME/2016-06"
    ET.register_namespace('ome', OME_NAMESPACE)
    
    ome = ET.Element("{%s}OME" % OME_NAMESPACE, xmlns=OME_NAMESPACE)
    image = ET.SubElement(ome, "Image", ID="Image:0")
    pixels = ET.SubElement(image, "Pixels",
                           DimensionOrder="XYZCT",
                           Type=dtype,  # 根据实际数据类型调整
                           SizeX=str(size_x),
                           SizeY=str(size_y),
                           SizeZ=str(size_z),
                           SizeC=str(len(channels)),
                           SizeT=str(size_t),
                           BigEndian="false",
                           SignificantBits=str(np.dtype(dtype).itemsize * 8))
    
    for i, channel in enumerate(channels):
        ET.SubElement(pixels, "Channel",
                      ID=f"Channel:0:{i}",
                      Name=channel.get('name', f'Channel_{i+1}'),
                      SamplesPerPixel="1")
    
    tree = ET.ElementTree(ome)
    return ET.tostring(ome, encoding='utf-8', method='xml').decode('utf-8')

def ImarisFileConverter(chn_files, output_path, temp_merge_dir=None, tiles=None, 
                        imaris_convert_path=r"C:\Program Files\Bitplane\ImarisFileConverter 9.6.0\ImarisConvert.exe", ):
    """
    处理所有TIFF 文件为OME-TIFF文件，并使用ImarisConvert.exe转换为.ims文件。
    """
    dir_name = os.path.dirname(output_path)
    os.makedirs(dir_name, exist_ok=True)
    if temp_merge_dir is None:
        temp_merge_dir = os.path.join(dir_name, "temp_merge")
    os.makedirs(temp_merge_dir, exist_ok=True)
            
    # 读取每个通道的 TIFF 文件
    channel_images = []
    for filepath in chn_files:
        if not os.path.exists(filepath):
            raise FileNotFoundError(f"Missing file: {filepath}.")
        else:
            img = tifffile.imread(filepath)
            # img[img > 65000] = 0  # 修复异常像素值
            channel_images.append(img)

    # 获取每个通道的形状并确保一致
    shapes = [img.shape for img in channel_images]
    # print(f"image shapes: {shapes}")
    
    if len(set(shapes)) != 1:
        raise ValueError(f"Image shapes do not match: {shapes}.")
    
    shape = shapes[0]
    if len(shape) == 2:
        size_y, size_x = shape
        size_z = 1
        size_t = 1
        combined_image = np.stack(channel_images, axis=0)  # (C, Y, X)
        axes = 'CYX'
    elif len(shape) == 3:
        size_z, size_y, size_x = shape
        size_t = 1
        combined_image = np.stack(channel_images, axis=0)  # (C, Z, Y, X)
        axes = 'CZYX'
    elif len(shape) == 4:
        size_t, size_z, size_y, size_x = shape
        combined_image = np.stack(channel_images, axis=0)  # (C, T, Z, Y, X)
        axes = 'CTZYX'
    else:
        raise ValueError(f"Unsupported image dimensions: {shape}.")
        
    # 确定数据类型
    dtype = str(combined_image.dtype)
    
    # 创建 OME-XML 元数据
    try:
        ome_xml = create_ome_xml(
            channels=[{'name': ch} for ch in chn_files],
            size_x=size_x,
            size_y=size_y,
            size_z=size_z,
            size_t=size_t,
            dtype=dtype
        )
    except Exception as e:
        print(f"Error creating OME-XML: {e}. Skipping.")
    
    # 保存为 OME-TIFF
    combined_tif_path = os.path.join(temp_merge_dir, f"combined.ome.tif")
    try:
        tifffile.imwrite(
            combined_tif_path,
            combined_image,
            photometric='minisblack',
            metadata={'axes': axes, 'ome': ome_xml},
            ome=True
        )
        # print(f"Successfully wrote OME-TIFF at {combined_tif_path}")
    except Exception as e:
        raise ValueError(f"Error writing OME-TIFF: {e}.")
    
    # 转换为 Imaris 文件
    cmd = [
        str(imaris_convert_path),
        '-if', 'OmeTiff',
        '-i', str(combined_tif_path),
        '-of', 'Imaris5',
        '-o', str(output_path),
    ]
    
    try:
        # print(f"Running ImarisConvert.exe...")
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        if result.returncode == 0:
            pass
            # print(f"Successfully converted to {output_path}")
        else:
            print(f"Error converting:")
            print(result.stdout)
            print(result.stderr)
    except Exception as e:
        print(f"Error running ImarisConvert.exe: {e}")
    
    # 清理临时 OME-TIFF 文件
    if os.path.exists(combined_tif_path): os.remove(combined_tif_path)

    # 清理临时合并文件夹
    if os.path.exists(temp_merge_dir): shutil.rmtree(temp_merge_dir)
    # print("All cycles have been processed.")

In [None]:
import cv2

def color_cells_uint16(tif_shape, convex_dict, df, key, resize=(1, 1)):
    """
    生成uint16类型的TIFF图像
    
    参数：
    tif_shape : tuple
        目标图像形状，格式为(height, width)
    convex_dict : dict
        细胞编号到凸包顶点坐标的字典，顶点为Nx2数组，每行是(y, x)
    df : pandas.DataFrame
        包含细胞灰度值的数据框，需有'gray'列
    
    返回：
    numpy.ndarray (dtype=uint16)
    """
    # 初始化uint16图像
    tif_image = np.zeros(tif_shape, dtype=np.uint16)
    
    # 提取灰度值并转换为uint16
    gray_dict = df[key].astype(np.uint16).to_dict()
    
    for cell, gray_value in gray_dict.items():
        if cell not in convex_dict: continue
        # 获取细胞的凸包顶点
        vertices = convex_dict[cell]
        # 转换为OpenCV所需格式 (N,1,2)
        # print(0, vertices)
        pts = vertices[:, ::-1]
        # print(1, pts)
        pts = pts.reshape((-1, 1, 2))
        pts = pts * list(resize)
        pts = pts.astype(np.int32)
        # print(2, pts)
        cv2.fillPoly(tif_image, [pts], color=int(gray_value))
    return tif_image

# create hulls

In [None]:
# from scipy.spatial import ConvexHull

# def create_hull(rna_labeled):
#     hulls = {}
#     df_group = rna_labeled.groupby("Cell Index")
#     for group in tqdm(df_group, desc="hull"):
#         coordinates = group[1][["Y", "X"]].values
#         try: hull = ConvexHull(coordinates)
#         except: continue
#         coordinate_path = np.vstack((coordinates[hull.vertices, 0], coordinates[hull.vertices, 1])).T
#         hulls[group[0]] = coordinate_path
#     return hulls

In [None]:
# hulls_all = dict()
# for RUN_ID in project_params['RUN_ID_LIST']:
#     slice = RUN_ID.split("_")[-1]
#     rna_labeled = pd.read_csv(processed_dir / RUN_ID / 'segmented' / "rna_labeled.csv")
#     hulls_slice = create_hull(rna_labeled)
#     # hulls all is hulls_slice with each key add an f'-{slice}'
#     hulls_all.update({f"{key}-{slice}": value for key, value in hulls_slice.items()})

In [None]:
# with open(proj_path/ "cell_hulls.pkl", "wb") as f: pickle.dump(hulls_all, f)

# cell type

In [None]:
cell_type_proj_path = proj_path / 'celltype'
cell_type_proj_path.mkdir(exist_ok=True)

In [None]:
with open(proj_path/ "cell_hulls.pkl", "rb") as f: hulls_all = pickle.load(f)
adata = sc.read_h5ad(typ_path / 'combine_adata_st.h5ad')
adata = adata[adata.obs['type']!='other']

In [None]:
with open(os.path.join(analysis_dir, 'cell_typing_params.yaml'), 'r') as f:
    annotaiton_params = yaml.load(f, Loader=yaml.FullLoader)
print(annotaiton_params.keys())

## type

In [None]:
downsample = 0.125
image_shape = project_params['image_shape'][downsample]
key = 'leiden_type'
save_dir = cell_type_proj_path / f"{key}"
save_dir.mkdir(exist_ok=True)

# draw only the region of interest
stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
for slice in range(1, 21):
    slice_df = adata.obs.loc[adata.obs['slice'] == slice, [key]].astype(np.uint16) + 1
    stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(0.125, 0.125))
imwrite(save_dir / f"{key}.tif", stack_image, metadata={'axes': 'ZXY'})

# save differnet gray scale in different channels
for gray_scale in np.unique(stack_image):
    if gray_scale == 0: continue
    mask = stack_image == gray_scale
    mask = mask.astype(np.uint16) * gray_scale
    imwrite(save_dir / f"{key}_{gray_scale}.tif", mask, metadata={'axes': 'ZXY'})

In [None]:
import re

key = 'leiden_type'
save_dir = cell_type_proj_path / f"{key}"
chn_files = sorted([str(save_dir/f) for f in os.listdir(save_dir) if re.search(r'_\d+\.tif$', f) and os.path.isfile(os.path.join(save_dir, f))],
    key=lambda x: int(re.search(r'_(\d+)\.tif$', x).group(1)))
ImarisFileConverter(chn_files, output_path=save_dir/'combined.ims')

In [None]:
# leiden_subtyp列e和subtype列是一一对应的，打印leiden_subtyp列e对应的subtype列的值
for i in sorted([int(_) for _ in adata.obs['leiden_type'].unique()]):
    name = list(adata.obs.loc[adata.obs['leiden_type'] == str(i), 'type'].unique())[0]
    print(f"\'{name}\'", end=',')
    # print(f"{i+1}\t{name}")

## subtype

In [None]:
downsample = 0.125
image_shape = project_params['image_shape'][downsample]
key = 'leiden_subtype'
save_dir = cell_type_proj_path / f"{key}"
save_dir.mkdir(exist_ok=True)

# draw only the region of interest
stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
for slice in range(1, 21):
    slice_df = adata.obs.loc[adata.obs['slice'] == slice, [key]].astype(np.uint16) + 1
    stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(0.125, 0.125))
imwrite(save_dir / f"{key}.tif", stack_image, metadata={'axes': 'ZXY'})

# save differnet gray scale in different channels
for gray_scale in np.unique(stack_image):
    if gray_scale == 0: continue
    mask = stack_image == gray_scale
    mask = mask.astype(np.uint16) * gray_scale
    imwrite(save_dir / f"{key}_{gray_scale}.tif", mask, metadata={'axes': 'ZXY'})

In [None]:
key = 'leiden_subtype'
save_dir = cell_type_proj_path / f"{key}"
chn_files = sorted([str(save_dir/f) for f in os.listdir(save_dir) if re.search(r'_\d+\.tif$', f) and os.path.isfile(os.path.join(save_dir, f))],
    key=lambda x: int(re.search(r'_(\d+)\.tif$', x).group(1)))
ImarisFileConverter(chn_files, output_path=save_dir/'combined.ims')

In [None]:
# leiden_subtyp列e和subtype列是一一对应的，打印leiden_subtyp列e对应的subtype列的值
for i in sorted([int(_) for _ in adata.obs['leiden_subtype'].unique()]):
    name = list(adata.obs.loc[adata.obs['leiden_subtype'] == str(i), 'subtype'].unique())[0]
    # print(f"{i+1}\t{name}")
    print(f"\'{name}\'", end=',')


# gene expression

In [47]:
output_path = proj_path / "gene_expression"
output_path.mkdir(exist_ok=True)
with open(proj_path/ "cell_hulls.pkl", "rb") as f: hulls_all = pickle.load(f)
adata = sc.read_h5ad(typ_path / 'adata.h5ad')

## HBV

In [89]:
gene = 'HBV'
obs_ksy = f'{gene}_level'
gene_exp = adata.raw[:, gene].X
# separate gene to 1-8 by percentile
percentile_bins = np.percentile(gene_exp, [70, 90, 95, 99, 99.5, 99.9, 99.99])
print(percentile_bins)

gene_exp = gene_exp.flatten()
mapped_levels = np.digitize(gene_exp, bins=percentile_bins, right=True) + 1
adata.obs[obs_ksy] = mapped_levels

[ 1.  3.  5.  9. 11. 17. 29.]


In [93]:
adata.obs.type.unique().tolist()

['CD4+',
 'other',
 'Mait',
 'Monocyte',
 'Macrophage',
 'DC',
 'T_reg',
 'Neutrophil',
 'Tumor',
 'NK',
 'CD8+',
 'B',
 'Endo',
 'Ep',
 'Mast',
 'Liver',
 'CAF']

In [94]:
downsample = 0.125
image_shape = project_params['image_shape'][downsample]
key = obs_ksy
save_dir = output_path / f"{key}"
save_dir.mkdir(exist_ok=True)

adata_use = adata[adata.obs.type.isin(['Tumor', 'Liver', 'Ep', 'Endo', 'CAF'])].copy()
# draw only the region of interest
stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
for slice in range(1, 21):
    slice_df = adata_use.obs.loc[adata_use.obs['slice'] == slice, [key]].astype(np.uint16)
    stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(downsample, downsample))
imwrite(save_dir / f"{key}.tif", stack_image, metadata={'axes': 'ZXY'})

# save differnet gray scale in different channels
for gray_scale in np.unique(stack_image):
    if gray_scale == 0: continue
    mask = stack_image == gray_scale
    mask = mask.astype(np.uint16) * gray_scale
    imwrite(save_dir / f"{key}_{gray_scale}.tif", mask, metadata={'axes': 'ZXY'})

# combine all channels
chn_files = sorted([str(save_dir/f) for f in os.listdir(save_dir) if re.search(r'_\d+\.tif$', f) and os.path.isfile(os.path.join(save_dir, f))],
    key=lambda x: int(re.search(r'_(\d+)\.tif$', x).group(1)))
ImarisFileConverter(chn_files, output_path=output_path/f'{key}.ims')
shutil.rmtree(save_dir)

## AFP

In [None]:
adata_tumor_plot_AFP_subsample = sc.pp.subsample(adata[adata.obs.subtype == 'Tumor_AFP+'],
                                                 n_obs=45730,
                                                 copy=True)
adata_tumor_plot_GPC3 = adata[adata.obs.subtype == 'Tumor_GPC3+']
adata_tumor_plot = adata[adata.obs.index.isin(list(adata_tumor_plot_AFP_subsample.obs.index)+list(adata_tumor_plot_GPC3.obs.index))]
adata_tumor_plot.write(r'E:\TMC\cell_typing\results\2023.9.28-_PRISM_HCC_final_downstream_analysis\2023.10.6_AFPsubsample\adata_AFPsubsampled_and_GPC3+.h5ad')

In [None]:
from collections import Counter
before_filter = Counter(list(adata_tumor_plot_AFP_subsample.obs['layer']))
after_filter = Counter(list(adata_tumor_plot_GPC3.obs['layer']))

data = dict(sorted(before_filter.items(), key=lambda d: int(d[0].replace('layer','')), reverse=False))
courses = list(data.keys())
values = list(data.values())
fig = plt.figure(figsize = (15, 3))
# creating the bar plot
plt.bar(courses, values)

data = dict(sorted(after_filter.items(), key=lambda d: int(d[0].replace('layer','')), reverse=False))
courses = list(data.keys())
values = list(data.values())
fig = plt.figure(figsize = (15, 3))
# creating the bar plot
plt.bar(courses, values)

courses = list(data.keys())
values = [list(dict(sorted(before_filter.items(), key=lambda d: int(d[0].replace('layer','')), reverse=False)).values())[_]/list(dict(sorted(after_filter.items(), key=lambda d: int(d[0].replace('layer','')), reverse=False)).values())[_] for _ in range(len(list(dict(sorted(after_filter.items(), key=lambda d: int(d[0].replace('layer','')), reverse=False)).values())))]
fig = plt.figure(figsize = (15, 3))
# creating the bar plot
plt.bar(courses, values)

In [None]:
adata.obs['AFP_content'] = [0]*len(adata)

for slice in range(20):
    tmp1 = adata[adata.obs.layer == f'layer{slice}']
    tmp1_index = tmp1.obs.index
    tmp1.obs.index = [_.split('-')[0] for _ in tmp1.obs.index]

    tmp2 = adata[adata.obs.layer == f'layer{slice}']
    tmp2.obs.index = [_.split('-')[0] for _ in tmp2.obs.index]

    tmp2 = tmp2[tmp1.obs.index]

    adata.obs['AFP_content'][tmp1_index] = pd.Series([_[0] for _ in tmp2[:,'AFP'].X],index=tmp1_index)

In [None]:
# fig, ax = plt.subplots(ncols=1,nrows=1,figsize=(20,5))
# a=adata[adata.obs.AFP_content!=0].obs['AFP_content']
# sns.histplot(a, bins=100, stat='count', alpha=1, kde=True,
#             edgecolor='white', linewidth=0.5,
#             # log=True,
#             ax=ax,
#             line_kws=dict(color='black', alpha=0.7, linewidth=1.5, label='KDE'),
#             # binrange=[0,100]
#             )
# y=ax.get_lines()[0].get_ydata()
# maxima = [float(j/len(y)*(max(a)-min(a))+min(a)) for j in argrelextrema(-np.array(y), np.less)[0]]

# for submaxima in maxima:
#     ax.axvline(x=submaxima, color='r', alpha=0.5, linestyle='--')
    
# plt.tight_layout()
# plt.show()

### percentile

In [None]:
tmp = adata[adata.obs.AFP_content != 0]
# tmp = tmp[tmp.obs.tmp_leiden != '-2']
content = sorted(list(tmp.obs.AFP_content))

In [None]:
for i in range(20):
    print(i+1, content.index(i+1)/len(content))

In [None]:
HBV_grade = {
    0: 1,
    1: 2,
    2: 3,
    3: 4,
    4: 5,
    5: 6,
    6: 7,
    7: 9,
    8: 11,
    9: 15,
    10: 20,
}

# HBV_grade = sorted(HBV_grade.items(), key=lambda x: -x[1])

# combine_adata_st.obs.leiden['HBV_grade'] = pd.Categorical([0]*len(combine_adata_st), categories=list(ROI_mask.keys()) + ['other'], ordered=False)
adata.obs["AFP_grade"] = [-2] * len(adata)
for cell in tqdm(adata.obs.index):
    for grade, value in HBV_grade.items():
        if adata.obs['AFP_content'].loc[cell] >= value:
            adata.obs['AFP_grade'].loc[cell] = grade

In [None]:
adata.write(r'e:\TMC\cell_typing\results\2023.9.28-_PRISM_HCC_final_downstream_analysis\2023.10.12_AFP_content\adata_AFP_content.h5ad')

### detailed percentile

In [None]:
tmp = adata[adata.obs.HBV_content != 0]
# tmp = tmp[tmp.obs.tmp_leiden != '-2']
content = list(tmp.obs.HBV_content)
content = sorted(content)
# np.percentile(content, [34,57,74,83,85,90,93,96,97,98,98.5,99])

In [None]:
for i in range(20):
    print(i+1, content.index(i+1)/len(content))

In [None]:
HBV_grade = {
    0: 1,
    1: 2,
    2: 3,
    3: 4,
    4: 5,
    5: 6,
    6: 7,
    7: 8,
    8: 11,
}

# HBV_grade = sorted(HBV_grade.items(), key=lambda x: -x[1])

# combine_adata_st.obs.leiden['HBV_grade'] = pd.Categorical([0]*len(combine_adata_st), categories=list(ROI_mask.keys()) + ['other'], ordered=False)
adata.obs["HBV_grade_detailed"] = [-2] * len(adata)
for cell in tqdm(adata.obs.index):
    for grade, value in HBV_grade.items():
        if adata.obs['HBV_content'].loc[cell] >= value:
            adata.obs['HBV_grade_detailed'].loc[cell] = grade

# STAGATE projection

In [None]:
import cv2

def color_cells_uint16(tif_shape, convex_dict, df, key, resize=(1, 1)):
    """
    生成uint16类型的TIFF图像
    
    参数：
    tif_shape : tuple
        目标图像形状，格式为(height, width)
    convex_dict : dict
        细胞编号到凸包顶点坐标的字典，顶点为Nx2数组，每行是(y, x)
    df : pandas.DataFrame
        包含细胞灰度值的数据框，需有'gray'列
    
    返回：
    numpy.ndarray (dtype=uint16)
    """
    # 初始化uint16图像
    tif_image = np.zeros(tif_shape, dtype=np.uint16)
    
    # 提取灰度值并转换为uint16
    gray_dict = df[key].astype(np.uint16).to_dict()
    
    for cell, gray_value in gray_dict.items():
        if cell not in convex_dict: continue
        # 获取细胞的凸包顶点
        vertices = convex_dict[cell]
        # 转换为OpenCV所需格式 (N,1,2)
        # print(0, vertices)
        pts = vertices[:, ::-1]
        # print(1, pts)
        pts = pts.reshape((-1, 1, 2))
        pts = pts * list(resize)
        pts = pts.astype(np.int32)
        # print(2, pts)
        cv2.fillPoly(tif_image, [pts], color=int(gray_value))
    return tif_image

In [None]:
STAGATE_path = analysis_dir / "STAGATE"
output_path = proj_path / "STAGATE"
output_path.mkdir(exist_ok=True)

In [None]:
with open(proj_path/ "cell_hulls.pkl", "rb") as f: hulls_all = pickle.load(f)
adata_STAGATE = sc.read_h5ad(STAGATE_path / 'adata_STAGATE.h5ad')

## mclust_11

In [None]:
# save column key as different gray scale
from tifffile import imwrite

downsample = 0.25
image_shape = project_params['image_shape'][downsample]
key = 'mclust_11'
stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
for slice in range(1, 21):
    slice_df = adata_STAGATE.obs.loc[adata_STAGATE.obs['slice'] == slice, [key]]
    stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(downsample, downsample))
imwrite(output_path / f"{key}_downsample.tif", stack_image, metadata={'axes': 'ZXY'})

# save different gray scale in different channels
from tifffile import imread

stack_image = imread(output_path / f"{key}_downsample.tif")
print(stack_image.shape)
for gray_scale in np.unique(stack_image):
    if gray_scale == 0: continue
    mask = stack_image == gray_scale
    mask = mask.astype(np.uint16) * gray_scale
    imwrite(output_path / f"{key}_{gray_scale}_downsample.tif", mask, metadata={'axes': 'ZXY'})

### annotated

In [None]:
with open(os.path.join(analysis_dir / "STAGATE_analysis_params.yaml"), "r") as f:
    STAGATE_params = yaml.load(f, Loader=yaml.FullLoader)
STAGATE_annotate = STAGATE_params['STAGATE_mclust_11_annotate']
pprint(STAGATE_annotate, sort_dicts=False)
map_dict = {str(mclust): str(num+1) for num, (key, mclust_list) in enumerate(STAGATE_annotate.items()) for mclust in mclust_list}
adata_STAGATE.obs['mclust_11_annotated'] = adata_STAGATE.obs['mclust_11'].astype(str).map(map_dict).astype(str)

{'Tumor_AFP+': [1],
 'Tumor_GPC3+': [3],
 'Tumor_pro_CXCL13': [4],
 'Tumor_Liver': [10],
 'Liver': [11],
 'CAF': [5, 9],
 'Epthelial_Neutrophil': [2, 6, 7],
 'B': [8]}


In [None]:
downsample = 0.125
image_shape = project_params['image_shape'][downsample]
key = 'mclust_11_annotated'
save_dir = output_path / f"{key}"
save_dir.mkdir(exist_ok=True)

# draw only the region of interest
stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
for slice in range(1, 21):
    slice_df = adata_STAGATE.obs.loc[adata_STAGATE.obs['slice'] == slice, [key]].astype(np.uint16)
    stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(downsample, downsample))
imwrite(save_dir / f"{key}.tif", stack_image, metadata={'axes': 'ZXY'})

# save differnet gray scale in different channels
for gray_scale in np.unique(stack_image):
    if gray_scale == 0: continue
    mask = stack_image == gray_scale
    mask = mask.astype(np.uint16) * gray_scale
    imwrite(save_dir / f"{key}_{gray_scale}.tif", mask, metadata={'axes': 'ZXY'})

# combine all channels
chn_files = sorted([str(save_dir/f) for f in os.listdir(save_dir) if re.search(r'_\d+\.tif$', f) and os.path.isfile(os.path.join(save_dir, f))],
    key=lambda x: int(re.search(r'_(\d+)\.tif$', x).group(1)))
ImarisFileConverter(chn_files, output_path=save_dir/f'{key}.ims')
shutil.rmtree(save_dir / f"{key}.tif")

In [None]:
import shutil
for folder in os.listdir(output_path):
    if not 'leiden_subtype' in folder: continue
    if os.path.isfile(os.path.join(output_path, folder)): continue
    folder_path = os.path.join(output_path, folder)
    num = folder.split('_')[1]
    try: 
        shutil.move(os.path.join(folder_path, 'combined.ims'), os.path.join(output_path, f"mclust_11_{num}_subtype.ims"))
        shutil.rmtree(folder_path)
    except: continue

### type

In [None]:
downsample = 0.125
image_shape = project_params['image_shape'][downsample]
key = 'leiden_type'

for clust in tqdm([7, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], desc="STAGATE_mclust"):
    save_dir = output_path / f"mclust_{clust}_{key}"
    save_dir.mkdir(exist_ok=True)
    # draw only the region of interest
    adata_tmp = adata_STAGATE[adata_STAGATE.obs['mclust_11'] == str(clust)]
    stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
    for slice in range(1, 21):
        slice_df = adata_tmp.obs.loc[adata_tmp.obs['slice'] == slice, [key]].astype(np.uint16)
        slice_df += 1
        stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(downsample, downsample))
    imwrite(save_dir / f"{key}.tif", stack_image, metadata={'axes': 'ZXY'})
    # save differnet gray scale in different channels
    for gray_scale in np.unique(stack_image):
        if gray_scale == 0: continue
        mask = stack_image == gray_scale
        mask = mask.astype(np.uint16) * gray_scale
        imwrite(save_dir / f"{key}_{gray_scale}.tif", mask, metadata={'axes': 'ZXY'})

In [None]:
with open(os.path.join(analysis_dir, 'cell_typing_params.yaml'), 'r') as f:
    annotaiton_params = yaml.load(f, Loader=yaml.FullLoader)
print(annotaiton_params.keys())
print(list(annotaiton_params['leiden_annotation'].keys()))

### subtype

In [None]:
adata_STAGATE.obs.slice = adata_STAGATE.obs.slice.map(lambda x: int(x.split('slice')[1])+1)
adata_STAGATE.obs.index = adata_STAGATE.obs.index.map(lambda x: x.split('-')[0])
adata_STAGATE.obs.index = adata_STAGATE.obs.index + '-' + adata_STAGATE.obs.slice.astype(str)

In [None]:
import re

downsample = 0.125
image_shape = project_params['image_shape'][downsample]
key = 'leiden_subtype'

for clust in tqdm(list(range(1, 12)), desc="STAGATE_mclust"):
    save_dir = output_path / f"mclust_{clust}_{key}"
    save_dir.mkdir(exist_ok=True)
    # draw only the region of interest
    adata_tmp = adata_STAGATE[adata_STAGATE.obs['mclust_11'] == str(clust)]
    stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
    for slice in range(1, 21):
        slice_df = adata_tmp.obs.loc[adata_tmp.obs['slice'] == slice, [key]].astype(np.uint16)
        slice_df += 1
        stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(downsample, downsample))
    imwrite(save_dir / f"{key}.tif", stack_image, metadata={'axes': 'ZXY'})
    
    # save differnet gray scale in different channels
    for gray_scale in np.unique(stack_image):
        if gray_scale == 0: continue
        mask = stack_image == gray_scale
        mask = mask.astype(np.uint16) * gray_scale
        imwrite(save_dir / f"{key}_{gray_scale}.tif", mask, metadata={'axes': 'ZXY'})

    # combine all channels
    chn_files = sorted([str(save_dir/f) for f in os.listdir(save_dir) if re.search(r'_\d+\.tif$', f) and os.path.isfile(os.path.join(save_dir, f))],
        key=lambda x: int(re.search(r'_(\d+)\.tif$', x).group(1)))
    ImarisFileConverter(chn_files, output_path=output_path/f'mclust_{clust}_{key}.ims')

## mclust_10

In [None]:
# save column key as different gray scale
from tifffile import imwrite

downsample = 0.125
image_shape = project_params['image_shape'][downsample]

key = 'mclust_10'
save_dir = output_path / f"{key}"
save_dir.mkdir(exist_ok=True)
stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
for slice in range(1, 21):
    slice_df = adata_STAGATE.obs.loc[adata_STAGATE.obs['slice'] == slice, [key]]
    stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(downsample, downsample))
# imwrite(save_dir / f"{key}.tif", stack_image, metadata={'axes': 'ZXY'})
# stack_image = imread(save_dir / f"{key}_downsample.tif")
print(stack_image.shape)
for gray_scale in np.unique(stack_image):
    if gray_scale == 0: continue
    mask = stack_image == gray_scale
    mask = mask.astype(np.uint16) * gray_scale
    imwrite(save_dir / f"{key}_{gray_scale}.tif", mask, metadata={'axes': 'ZXY'})

# combine all channels
chn_files = sorted([str(save_dir/f) for f in os.listdir(save_dir) if re.search(r'_\d+\.tif$', f) and os.path.isfile(os.path.join(save_dir, f))],
    key=lambda x: int(re.search(r'_(\d+)\.tif$', x).group(1)))
ImarisFileConverter(chn_files, output_path=output_path/f'{key}.ims')
shutil.rmtree(save_dir)

(20, 5000, 5625)


### annotated

In [None]:
with open(os.path.join(analysis_dir / "STAGATE_analysis_params.yaml"), "r") as f:
    STAGATE_params = yaml.load(f, Loader=yaml.FullLoader)
STAGATE_annotate = STAGATE_params['STAGATE_mclust_10_annotate']
pprint(STAGATE_annotate, sort_dicts=False)
map_dict = {str(mclust): str(num+1) for num, (key, mclust_list) in enumerate(STAGATE_annotate.items()) for mclust in mclust_list}
adata_STAGATE.obs['mclust_10_annotated'] = adata_STAGATE.obs['mclust_10'].astype(str).map(map_dict).astype(str)

{'Tumor_AFP+': [1],
 'Tumor_GPC3+': [3],
 'Tumor_pro_CXCL13': [4],
 'pioneer_immune': [5],
 'CAF': [9],
 'B': [8],
 'Liver': [10],
 'Epthelial_Neutrophil': [2, 6, 7]}


In [None]:
downsample = 0.125
image_shape = project_params['image_shape'][downsample]
key = 'mclust_10_annotated'
save_dir = output_path / f"{key}"
save_dir.mkdir(exist_ok=True)

# draw only the region of interest
stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
for slice in range(1, 21):
    slice_df = adata_STAGATE.obs.loc[adata_STAGATE.obs['slice'] == slice, [key]].astype(np.uint16)
    stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(downsample, downsample))
imwrite(save_dir / f"{key}.tif", stack_image, metadata={'axes': 'ZXY'})

# save differnet gray scale in different channels
for gray_scale in np.unique(stack_image):
    if gray_scale == 0: continue
    mask = stack_image == gray_scale
    mask = mask.astype(np.uint16) * gray_scale
    imwrite(save_dir / f"{key}_{gray_scale}.tif", mask, metadata={'axes': 'ZXY'})

# combine all channels
chn_files = sorted([str(save_dir/f) for f in os.listdir(save_dir) if re.search(r'_\d+\.tif$', f) and os.path.isfile(os.path.join(save_dir, f))],
    key=lambda x: int(re.search(r'_(\d+)\.tif$', x).group(1)))
ImarisFileConverter(chn_files, output_path=output_path/f'{key}.ims')
shutil.rmtree(save_dir)

### type

In [None]:
downsample = 0.125
image_shape = project_params['image_shape'][downsample]
key = 'leiden_type'

for clust in tqdm(list(range(1, 11)), desc="STAGATE_mclust_10_type"):
    save_dir = output_path / f"mclust_{clust}_{key}"
    save_dir.mkdir(exist_ok=True)
    
    # draw only the region of interest
    adata_tmp = adata_STAGATE[adata_STAGATE.obs['mclust_11'] == str(clust)]
    stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
    for slice in range(1, 21):
        slice_df = adata_tmp.obs.loc[adata_tmp.obs['slice'] == slice, [key]].astype(np.uint16)
        slice_df += 1
        stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(downsample, downsample))
    
    # save differnet gray scale in different channels
    for gray_scale in np.unique(stack_image):
        if gray_scale == 0: continue
        mask = stack_image == gray_scale
        mask = mask.astype(np.uint16) * gray_scale
        imwrite(save_dir / f"{key}_{gray_scale}.tif", mask, metadata={'axes': 'ZXY'})
    
    # combine all channels
    chn_files = sorted([str(save_dir/f) for f in os.listdir(save_dir) if re.search(r'_\d+\.tif$', f) and os.path.isfile(os.path.join(save_dir, f))],
        key=lambda x: int(re.search(r'_(\d+)\.tif$', x).group(1)))
    ImarisFileConverter(chn_files, output_path=output_path/f'mclust_10_{clust}_{key}.ims')
    shutil.rmtree(save_dir)

STAGATE_mclust_10_type: 100%|██████████| 10/10 [53:42<00:00, 322.23s/it] 


In [None]:
# leiden_subtyp列e和subtype列是一一对应的，打印leiden_subtyp列e对应的subtype列的值
for i in sorted([int(_) for _ in adata_STAGATE.obs['leiden_type'].unique()]):
    name = list(adata_STAGATE.obs.loc[adata_STAGATE.obs['leiden_type'] == str(i), 'type'].unique())[0]
    print(f"{i+1}\t{name}")
    # print(f"\'{name}\'", end=',')

-1	other
1	Liver
2	Tumor
3	Endo
4	Ep
5	CAF
6	DC
7	Mait
8	Mast
9	Monocyte
10	Neutrophil
11	Macrophage
12	CD4+
13	CD8+
14	T_reg
15	B
16	NK


### subtype

In [None]:
import re

downsample = 0.125
image_shape = project_params['image_shape'][downsample]
key = 'leiden_subtype'

for clust in tqdm(list(range(1, 11)), desc="STAGATE_mclust_10_subtype"):
    save_dir = output_path / f"mclust_{clust}_{key}"
    save_dir.mkdir(exist_ok=True)
    
    # draw only the region of interest
    adata_tmp = adata_STAGATE[adata_STAGATE.obs['mclust_10'] == str(clust)]
    stack_image = np.zeros(shape=(20, image_shape[0], image_shape[1]), dtype=np.uint16)
    for slice in range(1, 21):
        slice_df = adata_tmp.obs.loc[adata_tmp.obs['slice'] == slice, [key]].astype(np.uint16)
        slice_df += 1
        stack_image[slice-1,:,:] = color_cells_uint16(image_shape, hulls_all, slice_df, key, resize=(downsample, downsample))
    
    # save differnet gray scale in different channels
    for gray_scale in np.unique(stack_image):
        if gray_scale == 0: continue
        mask = stack_image == gray_scale
        mask = mask.astype(np.uint16) * gray_scale
        imwrite(save_dir / f"{key}_{gray_scale}.tif", mask, metadata={'axes': 'ZXY'})

    # combine all channels
    chn_files = sorted([str(save_dir/f) for f in os.listdir(save_dir) if re.search(r'_\d+\.tif$', f) and os.path.isfile(os.path.join(save_dir, f))],
        key=lambda x: int(re.search(r'_(\d+)\.tif$', x).group(1)))
    ImarisFileConverter(chn_files, output_path=output_path/f'mclust_10_{clust}_{key}.ims')
    shutil.rmtree(save_dir)

STAGATE_mclust_10_subtype: 100%|██████████| 10/10 [2:15:30<00:00, 813.08s/it] 


In [None]:
# leiden_subtyp列e和subtype列是一一对应的，打印leiden_subtyp列e对应的subtype列的值
for i in sorted([int(_) for _ in adata_STAGATE.obs['leiden_subtype'].unique()]):
    name = list(adata_STAGATE.obs.loc[adata_STAGATE.obs['leiden_subtype'] == str(i), 'subtype'].unique())[0]
    print(f"{i+1}\t{name}")
    # print(f"\'{name}\'", end=',')

-1	other
1	Liver
2	Tumor_AFP+
3	Tumor_GPC3+
4	Tumor_proliferation
5	Endo_PECAM1+
6	Ep_EPCAM+
7	CAF_ACTA2+
8	cDC1_CLEC9A+
9	cDC2_CD1C+
10	pDC_LILRA4+
11	Mait_SLC4A10+
12	Mast_CPA3+
13	Monocyte_CD14+
14	Monocyte_CD14+, CD16+
15	Monocyte_CD16+
16	Neutrophil_CSF3R+
17	Neutrophil_CSF3R+, S100A8+
18	Macrophage_LYVE1+
19	Cyto_T_CD4+
20	T_CD4+, CTLA4+
21	T_CD4+, CXCL13+
22	T_CD4+, PD1+
23	T_CD4+, PD1+, CTLA4+
24	Cyto_T_CD8+
25	T_CD8+, CXCL13+
26	T_CD8+, PD1+
27	T_reg
28	B_CD79A+
29	B_CD79A+, MS4A1+
30	B_MS4A1+
31	Plasma_B_CD79A+, MZB1+
32	NK_NCAM1+
