# nhood_enrichment

In [2]:
# Loading the Packages
%reload_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')
import os
from pathlib import Path
from tqdm import tqdm

import numpy as np
import pandas as pd
import scanpy as sc

import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams.update({
    "pgf.texsystem": "xelatex",      # 使用 XeLaTeX，如果不需要 LaTeX 公式渲染，可以省略
    'font.family': 'serif',          # 字体设置为衬线字体
    'text.usetex': False,            # 禁用 LaTeX，使用 Matplotlib 内置文字渲染
    'pgf.rcfonts': False,            # 禁用 pgf 的默认字体管理
    'pdf.fonttype': 42,              # 确保字体为 TrueType 格式，可被 Illustrator 编辑
    'ps.fonttype': 42,               # EPS 文件也使用 TrueType 格式
    'figure.dpi': 300,               # 设置图形分辨率
    'savefig.dpi': 300,              # 保存的图形文件分辨率
    'axes.unicode_minus': False,     # 避免负号问题
})

# workdir 
BASE_DIR = Path(r'G:\spatial_data\analysis')
RUN_ID = '20250222_combined_analysis_of_pseudo_HCC3D'

# Load one slide exp
base_path = BASE_DIR / f'{RUN_ID}'
data_path = base_path / "segmented"
typ_path = base_path / "cell_typing"
output_path = base_path / "nhood_enrichment"
output_path.mkdir(parents=True, exist_ok=True)

## load data

In [3]:
combine_adata_st = sc.read_h5ad(typ_path / 'combine_adata_st.h5ad')
combine_adata_st.obsm['spatial'] = np.array([combine_adata_st.obs.X_pos,combine_adata_st.obs.Y_pos]).T
combine_adata_st.obsm['spatial3d'] = np.array([
    combine_adata_st.obs.X_pos,combine_adata_st.obs.Y_pos,
    [int(_.replace('slice',''))*10/0.1625 for _ in combine_adata_st.obs.slice]]).T
combine_adata_st = combine_adata_st[combine_adata_st.obs.type!='other']
print(combine_adata_st)
combine_adata_st.obs.head()

View of AnnData object with n_obs × n_vars = 1218279 × 30
    obs: 'dataset', 'slice', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'n_genes', 'n_counts', 'type', 'leiden', 'sample', 'tissue', 'leiden_res=4', 'leiden_res=8', 'leiden_res=5', 'leiden_res=6', 'leiden_res=7', 'tmp_leiden', 'leiden_subtype', 'subtype', 'leiden_type', 'Y_pos', 'X_pos', 'region', 'ROI'
    var: 'n_cells_by_counts-CNP0000650', 'mean_counts-CNP0000650', 'log1p_mean_counts-CNP0000650', 'pct_dropout_by_counts-CNP0000650', 'total_counts-CNP0000650', 'log1p_total_counts-CNP0000650', 'n_cells-CNP0000650', 'mean-CNP0000650', 'std-CNP0000650', 'n_cells_by_counts-GSE140228', 'mean_counts-GSE140228', 'log1p_mean_counts-GSE140228', 'pct_dropout_by_counts-GSE140228', 'total_counts-GSE140228', 'log1p_total_counts-GSE140228', 'n_cells-GSE140228', 'mean-GSE140228', 'std-GSE140228', 'n_cells_by_counts-GSE151530', 'mean_counts-GSE151530', 'log1p_mean_counts-GSE151530', 'pct_dropout_by_c

Unnamed: 0,dataset,slice,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,n_genes,n_counts,type,leiden,...,leiden_res=6,leiden_res=7,tmp_leiden,leiden_subtype,subtype,leiden_type,Y_pos,X_pos,region,ROI
3,PRISM_HCC,slice0,7,2.079442,9.0,2.302585,7,9.0,CD4+,18,...,16,14,19,19,"T_CD4+, CTLA4+",11,600.2792,26378.43502,other,other
6,PRISM_HCC,slice0,12,2.564949,20.0,3.044522,12,20.0,CD4+,3,...,12,0,4,20,"T_CD4+, CXCL13+",11,670.27147,25614.760921,other,other
12,PRISM_HCC,slice0,7,2.079442,11.0,2.484907,7,11.0,CD4+,3,...,20,85,61,20,"T_CD4+, CXCL13+",11,710.849615,25682.656401,other,other
15,PRISM_HCC,slice0,6,1.94591,7.0,2.079442,6,7.0,CD4+,3,...,12,129,27,20,"T_CD4+, CXCL13+",11,745.758777,26846.66926,other,other
16,PRISM_HCC,slice0,8,2.197225,18.0,2.944439,8,18.0,Mait,2,...,40,40,40,10,Mait_SLC4A10+,6,799.203877,25874.766723,other,other


In [4]:
import yaml
with open(base_path / 'nhood_enrichment_params.yaml') as f:
    params = yaml.load(f, Loader=yaml.FullLoader)

type_reorder = params['type_reorder']
subtype_reorder = params['subtype_reorder']

print('type')
print('not in:', set(type_reorder)-set(combine_adata_st.obs.type.unique()))
print('not plot:', set(combine_adata_st.obs.type.unique())-set(type_reorder))

print('subtype')
print('not in:', set(subtype_reorder)-set(combine_adata_st.obs.subtype.unique()))
print('not plot:', set(combine_adata_st.obs.subtype.unique())-set(subtype_reorder))

type
not in: set()
not plot: set()
subtype
not in: set()
not plot: set()


## cal neighbors

In [5]:
import squidpy as sq

sq.gr.spatial_neighbors(combine_adata_st, coord_type="generic", spatial_key="spatial3d")

## plot all

In [None]:
combine_adata_st.obs.type = pd.Categorical(combine_adata_st.obs.type, categories=type_reorder)
sq.gr.nhood_enrichment(combine_adata_st, cluster_key="type", n_perms=3000)
combine_adata_st.obs.subtype = pd.Categorical(combine_adata_st.obs.subtype, categories=subtype_reorder)
sq.gr.nhood_enrichment(combine_adata_st, cluster_key="subtype", n_perms=3000)

In [None]:
fig,ax = plt.subplots(figsize=(20,10), ncols=2, nrows=1)
sq.pl.nhood_enrichment(combine_adata_st, cluster_key="type", method='ward', cmap="coolwarm", ax=ax[0], vmin=-100, vmax=100)
sq.pl.nhood_enrichment(combine_adata_st, cluster_key="subtype", method='ward', cmap="coolwarm", ax=ax[1], vmin=-80, vmax=80)
plt.tight_layout()
plt.show()

In [None]:
import scipy.cluster.hierarchy as sch

fig, ax = plt.subplots(figsize=(23, 10),ncols=2, nrows=1)
order = type_reorder
enrichment_matrix = pd.DataFrame(combine_adata_st.uns["type_nhood_enrichment"]['zscore'], index=order, columns=order)
# linkage = sch.linkage(enrichment_matrix, method='ward')
# dendrogram = sch.dendrogram(linkage, no_plot=True)
# order = [int(i) for i in dendrogram['leaves']]
# sorted_matrix = enrichment_matrix.iloc[order, order]
sns.heatmap(enrichment_matrix, cmap="coolwarm", vmin=-80, vmax=80, ax=ax[0])

order = subtype_reorder
enrichment_matrix = pd.DataFrame(combine_adata_st.uns["subtype_nhood_enrichment"]['zscore'], index=order, columns=order)
# linkage = sch.linkage(enrichment_matrix, method='ward')
# dendrogram = sch.dendrogram(linkage, no_plot=True)
# order = [int(i) for i in dendrogram['leaves']]
# sorted_matrix = enrichment_matrix.iloc[order, order]
sns.heatmap(enrichment_matrix, cmap="coolwarm", vmin=-80, vmax=80, ax=ax[1])

plt.tight_layout()
plt.savefig(os.path.join(output_path, 'nhood_enrichment.png'))
plt.close()

## plot by regions

In [13]:
# show the cell in different ROI in one plot with different color
fig, ax = plt.subplots(figsize=(5, 5))
sc.pl.spatial(combine_adata_st[combine_adata_st.obs.slice=='slice10'], color='region', ax=ax, show=False, spot_size=100)
# reverse y axis
plt.gca().invert_yaxis()
plt.savefig(output_path / 'region_projection.png')
plt.close()

In [10]:
plot_range = {
    'ROI1': [-50, 50],
    'ROI2': [-40, 40],
    'ROI3': [-50, 50],
    'ROI4': [-50, 50],
    'other': [-50, 50],
    }
for roi in combine_adata_st.obs.region.unique():
    adata_roi = combine_adata_st[combine_adata_st.obs.region == roi]
    vmin, vmax = plot_range[roi]
    # vmin, vmax = None, None
    sq.gr.nhood_enrichment(adata_roi, cluster_key="type", n_perms=3000)
    sq.gr.nhood_enrichment(adata_roi, cluster_key="subtype", n_perms=3000)

    fig, ax = plt.subplots(figsize=(23, 10),ncols=2, nrows=1)
    sq.pl.nhood_enrichment(adata_roi, cluster_key="type", method='ward', cmap="coolwarm", vmin=vmin, vmax=vmax, ax=ax[0])
    sq.pl.nhood_enrichment(adata_roi, cluster_key="subtype", method='ward', cmap="coolwarm", vmin=vmin, vmax=vmax, ax=ax[1])
    plt.tight_layout()
    plt.savefig(os.path.join(output_path, f'nhood_enrichment_cluster_region_{roi}.png'))
    plt.close()

100%|██████████| 3000/3000 [00:28<00:00, 104.37/s]
100%|██████████| 3000/3000 [00:37<00:00, 81.06/s]
100%|██████████| 3000/3000 [01:29<00:00, 33.52/s]
100%|██████████| 3000/3000 [02:13<00:00, 22.40/s]
100%|██████████| 3000/3000 [01:11<00:00, 42.24/s]
100%|██████████| 3000/3000 [01:36<00:00, 30.94/s]
100%|██████████| 3000/3000 [00:48<00:00, 62.12/s]
100%|██████████| 3000/3000 [01:04<00:00, 46.19/s]
100%|██████████| 3000/3000 [00:13<00:00, 224.59/s]
100%|██████████| 3000/3000 [00:15<00:00, 194.20/s]


## plot by rois

In [16]:
# show the cell in different ROI in one plot with different color
fig, ax = plt.subplots(figsize=(5, 5))
sc.pl.spatial(combine_adata_st[combine_adata_st.obs.slice=='slice10'], color='ROI', ax=ax, show=False, spot_size=100)
# reverse y axis
plt.gca().invert_yaxis()
plt.savefig(output_path / 'ROI_projection.png')
plt.close()

In [8]:
plot_range = {
    'ROI_1': [-20, 20],
    'ROI_2': [-10, 10],
    'ROI_3': [-15, 15],
    'ROI_4': [-25, 25],
    'ROI_5': [-20, 20],
    'other': [-100, 200],
    }
for roi in combine_adata_st.obs.ROI.unique():
    adata_roi = combine_adata_st[combine_adata_st.obs.ROI == roi]
    vmin, vmax = plot_range[roi]
    # vmin, vmax = None, None
    sq.gr.nhood_enrichment(adata_roi, cluster_key="type", n_perms=3000)
    sq.gr.nhood_enrichment(adata_roi, cluster_key="subtype", n_perms=3000)

    fig, ax = plt.subplots(figsize=(23, 10),ncols=2, nrows=1)
    sq.pl.nhood_enrichment(adata_roi, cluster_key="type", method='ward', cmap="coolwarm", vmin=vmin, vmax=vmax, ax=ax[0])
    sq.pl.nhood_enrichment(adata_roi, cluster_key="subtype", method='ward', cmap="coolwarm", vmin=vmin, vmax=vmax, ax=ax[1])
    plt.tight_layout()
    plt.savefig(os.path.join(output_path, f'nhood_enrichment_cluster_ROI_{roi}.png'))
    plt.close()

100%|██████████| 3000/3000 [03:10<00:00, 15.77/s]
100%|██████████| 3000/3000 [04:32<00:00, 11.02/s]
100%|██████████| 3000/3000 [00:13<00:00, 223.75/s]
100%|██████████| 3000/3000 [00:15<00:00, 197.21/s]
100%|██████████| 3000/3000 [00:11<00:00, 251.72/s]
100%|██████████| 3000/3000 [00:13<00:00, 228.86/s]
100%|██████████| 3000/3000 [00:17<00:00, 171.02/s]
100%|██████████| 3000/3000 [00:21<00:00, 137.28/s]
100%|██████████| 3000/3000 [00:13<00:00, 225.06/s]
100%|██████████| 3000/3000 [00:14<00:00, 212.54/s]
100%|██████████| 3000/3000 [00:11<00:00, 257.03/s]
100%|██████████| 3000/3000 [00:13<00:00, 225.39/s]
