In [2]:
import numpy as np
import anndata as ad
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import anndata
import seaborn as sns
import matplotlib.colors as mcolors
from scipy.stats import zscore
import scvelo as scv
import bbknn

In [180]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import OrderedDict
# 数据准备
adata = l23_PC1_shifted
adata.X = np.nan_to_num(adata.X)

# 假设 new_genesets 已经定义，这里用它作为 genesets
genesets = ordered_genesets

slice_categories = ['NR1_1', 'NR2_1', 'DR1_1', 'DR2_1']
n_rows, n_cols = len(genesets), len(slice_categories)

# 创建绘图布局，每个子图大小为 6×2.5 英寸（总图尺寸根据行列数自动计算）

valid_genesets = OrderedDict()
for name, geneset in genesets.items():
    if not geneset:
        continue

    ok = True
    for sc in slice_categories:
        sub = adata[adata.obs['slice'] == sc, geneset]
        if sub.n_obs == 0:
            ok = False
            break

        # 计算平均表达并做 5–95% 截断后归一化
        g = sub.X.mean(axis=1).flatten()
        vmin, vmax = np.percentile(g, 5), np.percentile(g, 95)
        g_clipped = np.clip(g, vmin, vmax)
        if np.all(g_clipped == 0):
            # 如果整个切片上表达都是 0，就跳过这个基因集
            ok = False
            break

    if ok:
        valid_genesets[name] = geneset

# 2. 根据过滤后的 valid_genesets 来创建画布
n_rows = len(valid_genesets)
n_cols = len(slice_categories)
fig, axs = plt.subplots(n_rows, n_cols, figsize=(6 * n_cols, 2.5 * n_rows))

# 3. 画图
for row_idx, (geneset_name, geneset) in enumerate(valid_genesets.items()):
    for col_idx, sc in enumerate(slice_categories):
        ax = axs[row_idx, col_idx]
        slice_data = adata[adata.obs['slice'] == sc]
        # 计算表达
        g = slice_data[:, geneset].X.mean(axis=1).flatten()
        vmin, vmax = np.percentile(g, 5), np.percentile(g, 95)
        g_clipped = np.clip(g, vmin, vmax)
        # 归一化再缩放…
        g_min, g_max = g_clipped.min(), g_clipped.max()
        g_norm = (g_clipped - g_min) / (g_max - g_min) if g_max>g_min else np.zeros_like(g_clipped)
        # 选择基准 slice 放在第一行这里略…

        # 直接绘散点
        coords = slice_data.obsm['stereo_rotated']
        ax.scatter(coords[:,0], coords[:,1], c=g_norm, cmap='seismic', s=20, vmin=0, vmax=1)
#       ax.set_title(f"{sc} - {geneset_name}", fontsize=8)
        ax.axis('off')

plt.tight_layout()
#plt.show()
output_path = "F:/spatial/RSP/figures/RSP_l23_TF_adjusted_g.pdf"
plt.savefig(output_path, format='pdf', bbox_inches='tight')
plt.close()
print(f"PDF saved as {output_path}")

PDF saved as F:/spatial/RSP/figures/RSP_l23_TF_adjusted_g.pdf


In [179]:
from collections import OrderedDict
adata = l23_PC1_shifted
adata.X = np.nan_to_num(adata.X)
genesets = ordered_genesets
slice_categories = ['NR1_1', 'NR2_1', 'DR1_1', 'DR2_1']

# --- 过滤：删除那些在任意一个 slice 上没有细胞或表达全为 0 的 geneset ---
valid_genesets = OrderedDict()
for name, geneset in genesets.items():
    if not geneset:
        continue
    ok = True
    for sc in slice_categories:
        sub = adata[adata.obs['slice'] == sc, geneset]
        if sub.n_obs == 0:
            ok = False
            break
        # 计算平均表达并做 5–95% 截断后归一化
        g = sub.X.mean(axis=1).flatten()
        vmin, vmax = np.percentile(g, 5), np.percentile(g, 95)
        g_clipped = np.clip(g, vmin, vmax)
        if np.all(g_clipped == 0):
            ok = False
            break
    if ok:
        valid_genesets[name] = geneset

# 用过滤后的集合进行绘图
genesets = valid_genesets

# --- 绘图 ---
n_rows = len(genesets)
fig, axs = plt.subplots(n_rows, 1, figsize=(4, 4 * n_rows), constrained_layout=True)

# 为每个 geneset 绘制一个折线图
for row_idx, (geneset_name, geneset) in enumerate(genesets.items()):
    ax = axs[row_idx]  # 获取子图

    if not geneset:
        ax.axis("off")
        continue

    slice_g_values = {}
    slice_gmin_gmax = {}
    slice_vmin_vmax = {}
    vmax_vmin_diffs = {}

    # Step 1: 计算每个 slice 的 g 值并缩放
    for slice_category in slice_categories:
        slice_data = adata[adata.obs['slice'] == slice_category]
        if slice_data.n_obs > 0:
            g = slice_data[:, geneset].X.mean(axis=1).flatten()
            vmin, vmax = np.percentile(g, 5), np.percentile(g, 95)
            g_clipped = np.clip(g, vmin, vmax)
            g_min, g_max = np.min(g_clipped), np.max(g_clipped)
            g_norm = (g_clipped - g_min) / (g_max - g_min)
            slice_g_values[slice_category] = (g_norm, slice_data.obs['digital_layer'])
            slice_gmin_gmax[slice_category] = (g_min, g_max)
            slice_vmin_vmax[slice_category] = (vmin, vmax)
            vmax_vmin_diffs[slice_category] = vmax - vmin

    # Step 2: 选择基准 slice（vmax - vmin 最大的 slice）
    base_slice = max(vmax_vmin_diffs, key=vmax_vmin_diffs.get)
    base_gmin, base_gmax = slice_gmin_gmax[base_slice]

    # Step 3: 调整其他 slice 的 g 值
    adjusted_g_values = {}
    for slice_category in slice_categories:
        g_norm, x = slice_g_values.get(slice_category, (None, None))
        g_min, g_max = slice_gmin_gmax.get(slice_category, (None, None))
        if g_norm is not None:
            # 使用基准 slice 的缩放公式调整 g 值
            g_adjusted = 0.5 + (g_norm - 0.5) * (g_max - g_min) / (base_gmax - base_gmin)
            adjusted_g_values[slice_category] = (g_adjusted, x)

    # Step 4: 合并 NR 和 DR 数据
    nr_g_values = []
    nr_x_values = []
    dr_g_values = []
    dr_x_values = []

    for slice_category in ['NR1_1', 'NR2_1']:
        if slice_category in adjusted_g_values:
            g, x = adjusted_g_values[slice_category]
            nr_g_values.append(g)
            nr_x_values.append(x)

    for slice_category in ['DR1_1', 'DR2_1']:
        if slice_category in adjusted_g_values:
            g, x = adjusted_g_values[slice_category]
            dr_g_values.append(g)
            dr_x_values.append(x)

    if nr_g_values:
        nr_g_values = np.concatenate(nr_g_values)
        nr_x_values = np.concatenate(nr_x_values)

    if dr_g_values:
        dr_g_values = np.concatenate(dr_g_values)
        dr_x_values = np.concatenate(dr_x_values)

    smooth_frac = 0.66  # 越大曲线越平滑

    NR_color = '#E7241D'
    DR_color = '#2372A9'
    
    # Step 5: 绘制综合后的折线图
    for g_values, x_values, color, label in zip(
        [nr_g_values, dr_g_values],
        [nr_x_values, dr_x_values],
        [NR_color, DR_color],
        ['NR', 'DR']
    ):
        if g_values.size > 0:
            bins = np.arange(min(x_values), max(x_values), 0.5)
            bin_centers = 0.5 * (bins[1:] + bins[:-1])

            # 计算每个 bin 的均值和标准差
            binned_means = [np.mean(g_values[(x_values >= bins[i]) & (x_values < bins[i + 1])])
                            for i in range(len(bins) - 1)]
            binned_std = [np.std(g_values[(x_values >= bins[i]) & (x_values < bins[i + 1])])
                          for i in range(len(bins) - 1)]

            # LOWESS 会返回和 bin_centers 对应长度的平滑值
            means_smooth = lowess(
                       binned_means, 
                       bin_centers, 
                       frac=smooth_frac, 
                       return_sorted=False
             )
            std_smooth = lowess(
                       binned_std, 
                       bin_centers, 
                       frac=smooth_frac, 
                       return_sorted=False
             )

            # 绘制平滑后的曲线
            ax.plot(bin_centers, means_smooth, color=color, label=label)

            # 绘制阴影：用平滑后的标准差 * 0.05
            ax.fill_between(
                 bin_centers,
                 means_smooth - std_smooth * 0.05,
                 means_smooth + std_smooth * 0.05,
                 color=color, alpha=0.1
             )

    # 设置图标题和标签
    ax.set_title(f"{geneset_name}", fontsize=14)
    #ax.set_ylim(0.2, 0.8)
    ax.set_xlabel('Digital Layer', fontsize=12)
    ax.set_ylabel('Normalized Gene Expression', fontsize=12)

    ax.legend(title="Conditions", fontsize=10, loc="upper right")

# 添加总标题
plt.suptitle("Line Plots of Gene Expression across Conditions", fontsize=16, y=1.02)
#plt.show()
output_path = "F:/spatial/RSP/figures/RSP_l23_TF_adjusted_g_lineplot.pdf"
plt.savefig(output_path, format='pdf', bbox_inches='tight')
plt.close()
print(f"PDF saved as {output_path}")

PDF saved as F:/spatial/RSP/figures/RSP_l23_TF_adjusted_g_lineplot.pdf
