In [15]:
# Loading the Packages
%reload_ext autoreload
%autoreload 2

import os
# important for gpd.sjoin
os.environ["USE_PYGEOS"] = "0"

from pathlib import Path
import pickle
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import scanpy as sc
import squidpy as sq
import igraph
import random
import math
from sklearn.preprocessing import MinMaxScaler

import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams.update({
    "pgf.texsystem": "xelatex",      # 使用 XeLaTeX，如果不需要 LaTeX 公式渲染，可以省略
    'font.family': 'serif',          # 字体设置为衬线字体
    'text.usetex': False,            # 禁用 LaTeX，使用 Matplotlib 内置文字渲染
    'pgf.rcfonts': False,            # 禁用 pgf 的默认字体管理
    'pdf.fonttype': 42,              # 确保字体为 TrueType 格式，可被 Illustrator 编辑
    'ps.fonttype': 42,               # EPS 文件也使用 TrueType 格式
    'figure.dpi': 300,               # 设置图形分辨率
    'savefig.dpi': 300,              # 保存的图形文件分辨率
    'axes.unicode_minus': False,     # 避免负号问题
})

In [16]:
# workdir 
BASE_DIR = Path(r'G:\spatial_data\analysis')
RUN_ID = '20250222_combined_analysis_of_pseudo_HCC3D'

# Load one slide exp
base_path = BASE_DIR / f'{RUN_ID}'
data_path = base_path / "segmented"
typ_path = base_path / "cell_typing"
output_path = base_path / "interaction_graph"
output_path.mkdir(exist_ok=True)

In [17]:
import matplotlib.colors as clr

zissou = [
    "#3A9AB2",
    "#6FB2C1",
    "#91BAB6",
    "#A5C2A3",
    "#BDC881",
    "#DCCB4E",
    "#E3B710",
    "#E79805",
    "#EC7A05",
    "#EF5703",
    "#F11B00",
]

colormap = clr.LinearSegmentedColormap.from_list("Zissou", zissou)
colormap_r = clr.LinearSegmentedColormap.from_list("Zissou", zissou[::-1])

In [18]:
def get_interaction_matrix(adata, cluster_key, spatial_key, normalized):
    """
    Get the interaction matrix for a given batch of cells using Squidpy.

    Parameters
    - adata (anndata): The anndata object containing the cells to find interactions between.
    - cluster_key (str): The key in adata.obs to use for defining discrete interacting groups.
    - spatial_key (str): The key in adata.obsm to use for spatial coordinates.
    - normalized (bool): Whether to normalize the interaction matrix.

    Returns
    - interaction_matrix (np.ndarray): The interaction matrix.
    """

    adata = adata.copy()
    sq.gr.spatial_neighbors(adata, spatial_key=spatial_key)
    sq.gr.interaction_matrix(adata, cluster_key=cluster_key, normalized=normalized)

    return adata.uns[f"{cluster_key}_interactions"]


def mean_interaction_matrix(adata, batches, cluster_key, spatial_key="X_spatial", normalized=True):
    """
    Get the mean interaction matrix for a set of batches of cells using Squidpy.

    Parameters
    - adata (anndata): The anndata object containing the cells to find interactions between.
    - batches (list): The list of batch names to use in the interaction analysis.
    - cluster_key (str): The key in adata.obs to use for defining discrete interacting groups.
    - spatial_key (str): The key in adata.obsm to use for spatial coordinates.
    - normalized (bool): Whether to normalize the interaction matrix.

    Returns
    - mean_interactions (np.ndarray): The mean interaction matrix.
    """

    interactions = [
        get_interaction_matrix(
            adata=adata[adata.obs["batch"] == b],
            spatial_key=spatial_key,
            cluster_key=cluster_key,
            normalized=normalized,
        )
        for b in batches
    ]

    mean_interactions = sum(interactions) / len(batches)

    return mean_interactions


def create_mean_interaction_graph(adata, batches, cluster_key, spatial_key="spatial", interaction_cutoff=0.05):
    """
    Create an interaction graph for a set of batches of cells using Squidpy.

    Parameters
    - adata (anndata): The anndata object containing the cells to find interactions between.
    - batches (list): The list of batch names to use in the interaction analysis.
    - cluster_key (str): The key in adata.obs to use for defining discrete interacting groups.
    - spatial_key (str): The key in adata.obsm to use for spatial coordinates.
    - interaction_cutoff (float): The cutoff for interactions to be included in the graph.

    Returns
    - g (igraph.Graph): The interaction graph.
    """

    M = mean_interaction_matrix(adata=adata, batches=batches, cluster_key=cluster_key, spatial_key=spatial_key)
    M[M < interaction_cutoff] = 0
    g = igraph.Graph.Weighted_Adjacency(M)
    g.vs["label"] = adata.obs[cluster_key].cat.categories
    return g


def get_mean_expression(adata, batches, gene, cluster_key):
    """
    Calculate the mean expression of specified genes for each batch and gate in the provided AnnData object.

    Parameters:
    - adata (AnnData): Annotated data matrix with observations (rows) and variables (columns).
    - gene (str): A gene name for which mean expression is calculated.

    Returns:
    - pd.DataFrame: A DataFrame containing mean expression values for the gene, batch.
    """
    import re

    adata = adata[adata.obs.batch.isin(batches)]
    keys = [gene, "batch", cluster_key]
    df = sc.get.obs_df(adata, keys=keys)
    grouped = df.groupby(["batch", cluster_key]).mean().reset_index()
    grouped = grouped[[cluster_key, gene]].groupby(cluster_key).mean().reset_index()
    expression = grouped[gene].tolist()
    expression = expression / np.max(expression)
    return expression

In [19]:
def plot_graph(g, vertex_colors, ax, layout="kk", highlight="Cd8_T-Cell_P14"):
    """
    Plot an igraph graph with specified vertex colors and layout.

    Parameters
    - g (igraph.Graph): The graph to plot.
    - vertex_colors (list): The list of colors to use for the vertices.
    - ax (matplotlib.axes.Axes): The axes to plot the graph on.
    - layout (str): The layout to use for the graph.
    - highlight (str): The vertex to highlight in the graph.

    Returns
    - None
    """

    random.seed(42)
    try:
        node = g.vs["label"].index(highlight)
        highlight_edges = g.incident(node, "all")
    except:
        highlight_edges = []

    edge_color = [
        "90,10,0" if i in highlight_edges else "0,0,0" for i in range(len(g.es))
    ]
    igraph.plot(
        g,
        target=ax,
        layout=layout,
        edge_color=[
            f"rgba({c}, {w})" for c, w in zip(edge_color, scale_numbers(g.es["weight"]))
        ],
        edge_arrow_size=0.005,
        edge_width=1,
        vertex_color=vertex_colors,
        vertex_label_dist=-1,
        vertex_label_size=8,
    )


def scale_numbers(input_list, target_min=0.4, target_max=0.9):
    """
    Scale a list of numbers to a target range.

    Parameters
    - input_list (list): The list of numbers to scale.
    - target_min (float): The minimum value of the target range.
    - target_max (float): The maximum value of the target range.

    Returns
    - scaled_list (list): The list of scaled numbers.
    """

    # Find the minimum and maximum values in the input list
    min_value = min(input_list)
    max_value = max(input_list)

    # Scale each number in the input list to the target range
    scaled_list = [
        ((x - min_value) / (max_value - min_value)) * (target_max - target_min)
        + target_min
        for x in input_list
    ]

    return scaled_list


def ceil_division(numerator, denominator):
    return int(math.ceil(numerator / denominator))


def global_layout(adata, cluster_key, batches, spatial_key='spatial', layout="kk", interaction_cutoff=0.05):
    """
    Create a global layout for the cells in an AnnData object using an interaction graph.

    Parameters
    - adata (anndata): The AnnData object containing the cells to create a layout for.
    - cluster_key (str): The key in adata.obs to use for defining discrete interacting groups.
    - batches (list): The list of batch names to use in the interaction analysis.
    - layout (str): The layout to use for the graph.

    Returns
    - layout (igraph.Layout): The global layout for the cells in the AnnData object.
    """

    g = create_mean_interaction_graph(adata, batches, cluster_key, spatial_key=spatial_key, interaction_cutoff=interaction_cutoff)
    random.seed(42)
    layout = g.layout(layout)
    return layout

## general neighborhood

In [20]:
adata_direct = sc.read_h5ad(typ_path / 'adata.h5ad')
combine_adata_st = sc.read_h5ad(typ_path / 'combine_adata_st.h5ad')
adata = adata_direct[adata_direct.obs.index.isin(combine_adata_st.obs.index)]
adata.obs = combine_adata_st.obs

# format for later analysis
adata.obs = adata.obs.rename(columns={'X_pos':'X', 'Y_pos':'Y'})
adata.obsm['spatial'] = adata.obs.loc[:, ['X', 'Y']].values
adata.obsm['spatial3d'] = np.array([adata.obs.X, adata.obs.Y, 
                                    [int(_.replace('slice',''))*10/0.1625 for _ in adata.obs.slice]]).T

adata.obs['batch'] = adata.obs['dataset']
adata = adata[adata.obs['type'] != 'other']
print(adata)
adata.obs.head()

View of AnnData object with n_obs × n_vars = 1218279 × 31
    obs: 'dataset', 'slice', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'n_genes', 'n_counts', 'type', 'leiden', 'sample', 'tissue', 'leiden_res=4', 'leiden_res=8', 'leiden_res=5', 'leiden_res=6', 'leiden_res=7', 'tmp_leiden', 'leiden_subtype', 'subtype', 'leiden_type', 'Y', 'X', 'region', 'ROI', 'batch'
    var: 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'mean', 'std'
    uns: 'leiden', 'log1p', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca', 'X_umap', 'spatial', 'spatial3d'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'


Unnamed: 0,dataset,slice,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,n_genes,n_counts,type,leiden,...,leiden_res=7,tmp_leiden,leiden_subtype,subtype,leiden_type,Y,X,region,ROI,batch
3,PRISM_HCC,slice0,7,2.079442,9.0,2.302585,7,9.0,CD4+,18,...,14,19,19,"T_CD4+, CTLA4+",11,600.2792,26378.43502,other,other,PRISM_HCC
6,PRISM_HCC,slice0,12,2.564949,20.0,3.044522,12,20.0,CD4+,3,...,0,4,20,"T_CD4+, CXCL13+",11,670.27147,25614.760921,other,other,PRISM_HCC
12,PRISM_HCC,slice0,7,2.079442,11.0,2.484907,7,11.0,CD4+,3,...,85,61,20,"T_CD4+, CXCL13+",11,710.849615,25682.656401,other,other,PRISM_HCC
15,PRISM_HCC,slice0,6,1.94591,7.0,2.079442,6,7.0,CD4+,3,...,129,27,20,"T_CD4+, CXCL13+",11,745.758777,26846.66926,other,other,PRISM_HCC
16,PRISM_HCC,slice0,8,2.197225,18.0,2.944439,8,18.0,Mait,2,...,40,40,10,Mait_SLC4A10+,6,799.203877,25874.766723,other,other,PRISM_HCC


In [21]:
batches = {"pseudo3D": ["PRISM_HCC"]}
all_batches = list(np.concatenate(list(batches.values())))

Make the interaction graph, colored by celltype

In [22]:
import yaml
with open(typ_path / 'params.yaml') as file:
    type_colormap = yaml.load(file, Loader=yaml.FullLoader)['type_colormap']
type_colormap

{'Liver': [1, 0.392, 0],
 'Tumor': [0.751, 0.491, 0],
 'Endo': [1, 0, 1],
 'Ep': [0, 1, 0],
 'CAF': [0, 0, 1],
 'DC': [1, 0.259, 0],
 'Mait': [1, 0, 0.434],
 'Mast': [1, 0, 0],
 'Monocyte': [0, 0.471, 1],
 'Neutrophil': [1, 1, 0],
 'Macrophage': [0.7, 1, 0],
 'CD4+': [0.5, 0.5, 0.5],
 'CD8+': [1, 0.8, 0],
 'T_reg': [0, 1, 0.672],
 'B': [0, 1, 1],
 'NK': [1, 0, 0],
 'other': [0.9, 0.9, 0.9]}

In [23]:
# We have defined colors for the Type but not the Subtype annotation. Hence, we will create a assignment
# from subtype to type to pick the colors.
subtype_to_type = (adata.obs.groupby(["subtype", "type"]).size().reset_index().rename(columns={0: "count"}))
subtype_to_type = subtype_to_type[subtype_to_type["count"] > 0].set_index("subtype")

In [None]:
# interaction_cutoff = 0.05
# gl = global_layout(adata=adata, cluster_key="subtype", batches=["PRISM_HCC"], interaction_cutoff=interaction_cutoff)
figure_path = output_path / "general"
figure_path.mkdir(exist_ok=True)
for batch in batches.keys():
    M = mean_interaction_matrix(adata=adata, batches=batches[batch], cluster_key='subtype', spatial_key='spatial3d')
    for iter in range(21):
        interaction_cutoff = 0.01 * iter
        fig, ax = plt.subplots(1, 1, figsize=(7.5, 6))
        m = M.copy()
        m[m < interaction_cutoff] = 0
        g = igraph.Graph.Weighted_Adjacency(m)
        g.vs["label"] = adata.obs['subtype'].cat.categories
        node_colors = [type_colormap[subtype_to_type["type"].loc[t]] for t in adata.obs["subtype"].cat.categories]
        try: plot_graph(g=g, vertex_colors=node_colors, ax=ax, layout='kk')
        except ZeroDivisionError: continue
        ax.set_title(f"{batch}: subtype, interaction_cutoff={interaction_cutoff}")
        fig.tight_layout()
        plt.savefig(figure_path / f"{batch}_subtype_inte_g_cutoff={iter}x0.01.png", bbox_inches='tight')
        plt.close()

## neighborhood region

In [31]:
batches = {"2D": ["PRISM_HCC"]}
all_batches = list(np.concatenate(list(batches.values())))

In [33]:
import yaml
with open(typ_path / 'params.yaml') as file:
    type_colormap = yaml.load(file, Loader=yaml.FullLoader)['type_colormap']
type_colormap

{'Liver': [1, 0.392, 0],
 'Tumor': [0.751, 0.491, 0],
 'Endo': [1, 0, 1],
 'Ep': [0, 1, 0],
 'CAF': [0, 0, 1],
 'DC': [1, 0.259, 0],
 'Mait': [1, 0, 0.434],
 'Mast': [1, 0, 0],
 'Monocyte': [0, 0.471, 1],
 'Neutrophil': [1, 1, 0],
 'Macrophage': [0.7, 1, 0],
 'CD4+': [0.5, 0.5, 0.5],
 'CD8+': [1, 0.8, 0],
 'T_reg': [0, 1, 0.672],
 'B': [0, 1, 1],
 'NK': [1, 0, 0],
 'other': [0.9, 0.9, 0.9]}

In [34]:
# We have defined colors for the Type but not the Subtype annotation. Hence, we will create a assignment
# from subtype to type to pick the colors.
subtype_to_type = (adata.obs.groupby(["subtype", "type"]).size().reset_index().rename(columns={0: "count"}))
subtype_to_type = subtype_to_type[subtype_to_type["count"] > 0].set_index("subtype")

In [None]:
# interaction_cutoff = 0.05
# gl = global_layout(adata=adata, cluster_key="subtype", batches=["PRISM_HCC"], interaction_cutoff=interaction_cutoff)
for roi in adata.obs.region.unique():
    if roi == 'other': continue
    adata_tmp = adata[adata.obs['region'] == roi]
    figure_path = output_path / f'{roi}_interaction_graph'
    figure_path.mkdir(exist_ok=True)

    for batch in batches.keys():
        M = mean_interaction_matrix(adata=adata_tmp, batches=batches[batch], cluster_key='subtype', spatial_key='spatial3d')
        for iter in range(21):
            interaction_cutoff = 0.01 * iter
            fig, ax = plt.subplots(1, 1, figsize=(7.5, 6))
            m = M.copy()
            m[m < interaction_cutoff] = 0
            g = igraph.Graph.Weighted_Adjacency(m)
            g.vs["label"] = adata_tmp.obs['subtype'].cat.categories
            node_colors = [type_colormap[subtype_to_type["type"].loc[t]] for t in adata_tmp.obs["subtype"].cat.categories]
            try: 
                plot_graph(g=g, vertex_colors=node_colors, ax=ax, layout='kk')
            except: 
                plt.close()
                continue
            ax.set_title(f"{batch}: subtype, interaction_cutoff={interaction_cutoff}")
            fig.tight_layout()
            plt.savefig(figure_path / f"{batch}_subtype_inte_g_cutoff={iter}x0.01.png", bbox_inches='tight')
            plt.close() 

## neighborhood roi

In [38]:
batches = {"2D": ["PRISM_HCC"]}
all_batches = list(np.concatenate(list(batches.values())))

In [39]:
import yaml
with open(typ_path / 'params.yaml') as file:
    type_colormap = yaml.load(file, Loader=yaml.FullLoader)['type_colormap']
type_colormap

{'Liver': [1, 0.392, 0],
 'Tumor': [0.751, 0.491, 0],
 'Endo': [1, 0, 1],
 'Ep': [0, 1, 0],
 'CAF': [0, 0, 1],
 'DC': [1, 0.259, 0],
 'Mait': [1, 0, 0.434],
 'Mast': [1, 0, 0],
 'Monocyte': [0, 0.471, 1],
 'Neutrophil': [1, 1, 0],
 'Macrophage': [0.7, 1, 0],
 'CD4+': [0.5, 0.5, 0.5],
 'CD8+': [1, 0.8, 0],
 'T_reg': [0, 1, 0.672],
 'B': [0, 1, 1],
 'NK': [1, 0, 0],
 'other': [0.9, 0.9, 0.9]}

In [40]:
# We have defined colors for the Type but not the Subtype annotation. Hence, we will create a assignment
# from subtype to type to pick the colors.
subtype_to_type = (adata.obs.groupby(["subtype", "type"]).size().reset_index().rename(columns={0: "count"}))
subtype_to_type = subtype_to_type[subtype_to_type["count"] > 0].set_index("subtype")

In [None]:
# interaction_cutoff = 0.05
# gl = global_layout(adata=adata, cluster_key="subtype", batches=["PRISM_HCC"], interaction_cutoff=interaction_cutoff)
for roi in adata.obs.ROI.unique():
    if roi == 'other': continue
    adata_tmp = adata[adata.obs['ROI'] == roi]
    figure_path = output_path / f'{roi}_interaction_graph'
    figure_path.mkdir(exist_ok=True)

    for batch in batches.keys():
        M = mean_interaction_matrix(adata=adata_tmp, batches=batches[batch], cluster_key='subtype', spatial_key='spatial3d')
        for iter in range(21):
            interaction_cutoff = 0.01 * iter
            fig, ax = plt.subplots(1, 1, figsize=(7.5, 6))
            m = M.copy()
            m[m < interaction_cutoff] = 0
            g = igraph.Graph.Weighted_Adjacency(m)
            g.vs["label"] = adata_tmp.obs['subtype'].cat.categories
            node_colors = [type_colormap[subtype_to_type["type"].loc[t]] for t in adata_tmp.obs["subtype"].cat.categories]
            try: 
                plot_graph(g=g, vertex_colors=node_colors, ax=ax, layout='kk')
            except: 
                plt.close()
                continue
            ax.set_title(f"{batch}: subtype, interaction_cutoff={interaction_cutoff}")
            fig.tight_layout()
            plt.savefig(figure_path / f"{batch}_subtype_inte_g_cutoff={iter}x0.01.png", bbox_inches='tight')
            plt.close()

## Figure 3c

In [None]:
gates = {
    "Top": {
        "edges": [
            [0.15, 0.5],
            [0.6, 0.7],
            [0.8, 0.7],
            [0.8, 1.03],
            [0.15, 1.03],
        ],
        "label_position": {"x": 0.16, "y": 0.9},
        "fill": "#3A9AB244",
        "stroke": "#3A9AB2",
    },
    "Crypt": {
        "edges": [
            [0.15, 0.48],
            [0.6, 0.68],
            [0.8, 0.68],
            [0.8, 0.25],
            [0.2, 0],
            [0.15, 0],
        ],
        "label_position": {"x": 0.16, "y": 0.05},
        "fill": "#F11B0044",
        "stroke": "#F11B00",
    },
    "Muscularis": {
        "edges": [[0.22, 0], [0.8, 0.23], [6, 0.23], [6, 0], [0.22, 0]],
        "label_position": {"x": 0.6, "y": 0.05},
        "fill": "#BDC88155",
        "stroke": "#BDC881",
    },
}

In [None]:
# Custom biexponential transformation.
def transformation(x, a=0.1, b=0.1, c=0.5, d=2.5, f=4, w=1):
    x = np.array(x)
    return a * np.exp(b * ((x - w))) - c * np.exp(-d * (x - w)) + f


def classify_cells(adata, gates, transformation=transformation):
    """
    Classify cells based on the IMAP gates.

    Parameters:
    - adata (anndata): The anndata object containing the cells to classify.
    - gates (dict): A dictionary containing the gates to classify the cells with.
    - transformation (function): A function to transform the x values of the gates.

    Returns:
    - result (geopandas dataframe): A geopandas dataframe containing the classified cells.
    """
    from shapely.geometry import Point
    from shapely.geometry.polygon import Polygon
    import geopandas as gpd

    adata.obs["epithelial_distance_transformed"] = transformation(
        adata.obs["epithelial_distance"]
    )
    adata.obs["gate"] = False

    print("Creating polygons")
    polygons = {}
    for gate in gates:
        # Apply transformation to x values
        points = [
            [transformation(element[0])] + element[1:]
            for element in gates[gate]["edges"]
        ]
        polygons[gate] = Polygon(points)
    polygons = gpd.GeoSeries(polygons)
    gpd_poly = gpd.GeoDataFrame({"gates": polygons}, geometry="gates")

    print("Creating cells")
    cells = gpd.GeoSeries.from_xy(
        adata.obs["epithelial_distance_transformed"], adata.obs["crypt_villi_axis"]
    )
    gpd_cells = gpd.GeoDataFrame({"cells": cells}, geometry="cells")

    print("Joining cells and polygons")
    result = gpd.sjoin(
        gpd_cells,
        gpd_poly,
        how="left",
    )
    return result


classification = classify_cells(adata, gates)
classification

In [None]:
adata.obs["gate"] = classification["index_right"]

In [None]:
def make_name(gate, cell):
    if cell == "Cd8_T-Cell_P14":
        if gate == "Top":
            return "P14 top"
        elif gate == "Crypt":
            return "P14 crypt"
        elif gate == "Muscularis":
            return "P14 muscularis"
        else:
            return "P14 undeterminded"
    else:
        return cell


adata.obs["Subtype_gate"] = [
    make_name(gate, cell) for gate, cell in zip(adata.obs["gate"], adata.obs["Subtype"])
]
adata = adata[~(adata.obs["Subtype_gate"] == "P14 undeterminded")]
adata.obs["Subtype_gate"] = adata.obs["Subtype_gate"].astype("category")

Heatmap for locations

In [None]:
# heatmap
m = mean_interaction_matrix(adata, batches=all_batches, cluster_key="Subtype_gate")

In [None]:
population_of_intertest = ["P14 top", "P14 crypt", "P14 muscularis"]
positions = [
    list(adata.obs["Subtype_gate"].cat.categories).index(element)
    for element in population_of_intertest
]
m_p14 = m[positions]

In [None]:
# Scale each row from min to max
scaler = MinMaxScaler()
normalized_array = scaler.fit_transform(m_p14.T).T

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 1))
sns.heatmap(
    normalized_array,
    xticklabels=adata.obs["Subtype_gate"].cat.categories,
    yticklabels=population_of_intertest,
    ax=ax,
    cmap=colormap,
    linecolor="white",
    linewidths=0.5,
)