In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
import matplotlib as mpl

mpl.rcParams['svg.fonttype'] = 'none'   
mpl.rcParams['pdf.fonttype'] = 42       

In [None]:
wd = "/public/home/liwang/project/xu_lab/Nature_QiangShi_2025_CMs"

In [None]:
sc.__version__

# Nature_QiangShi_2025_CMs

In [None]:
PanCancer_AnnData = sc.read_h5ad(f"{wd}/data/PanCancer_igt_s9_fine_counts.h5ad")

In [None]:
PanCancer_AnnData

In [None]:
# normalization
PanCancer_AnnData.layers['counts'] = PanCancer_AnnData.X.copy()
sc.pp.normalize_total(PanCancer_AnnData, target_sum=1e4)
sc.pp.log1p(PanCancer_AnnData)

In [None]:
PanCancer_AnnData[1:10].X.max()

In [None]:
PanCancer_AnnData

In [None]:
SAGA_complex = ["TRRAP", "SGF29", "KAT2A", "KAT2B", "TADA2B", "TADA3", 
                "SUPT20H", "SUPT3H", "SUPT7L", "TADA1", "TAF5L", "TAF6L", "TAF9B", "TAF10", "TAF12", 
                "SF3B3", "SF3B5", 
                "ATXN7", "ATXN7L3", "ENY2", "USP22"]

In [None]:
for CancerType in PanCancer_AnnData.obs['cancerType'].cat.categories:
    CancerType_AnnData = PanCancer_AnnData[PanCancer_AnnData.obs['cancerType'] == CancerType]
    sc.pl.dotplot(CancerType_AnnData, var_names = SAGA_complex, groupby='majorCluster', title=CancerType)

In [None]:
for CancerType in PanCancer_AnnData.obs['cancerType'].cat.categories:
    CancerType_AnnData = PanCancer_AnnData[PanCancer_AnnData.obs['cancerType'] == CancerType]
    sc.pl.dotplot(CancerType_AnnData, var_names = SAGA_complex, groupby='majorCluster', standard_scale='var', title=CancerType)

In [None]:
sc.pl.umap(PanCancer_AnnData[PanCancer_AnnData.obs['cancerType'] == 'CRC'], color = 'majorCluster')

In [None]:
sc.pl.umap(PanCancer_AnnData[PanCancer_AnnData.obs['cancerType'] == 'CRC'], color = 'KAT2A')

In [None]:
sc.pl.umap(PanCancer_AnnData[PanCancer_AnnData.obs['cancerType'] == 'CRC'], color = 'SUPT20H')

In [None]:
sc.pl.umap(PanCancer_AnnData[PanCancer_AnnData.obs['cancerType'] == 'CRC'], color = 'SGF29')

In [None]:
# SAGA_complex score
Complex_Score_Dict = {}
for CancerType in PanCancer_AnnData.obs['cancerType'].cat.categories:
    if CancerType == "LYM": # remove Hematologic cancer
        continue
        
    #subste CancerType
    CancerType_AnnData = PanCancer_AnnData[PanCancer_AnnData.obs['cancerType'] == CancerType].copy()
    CancerType_SAGA_AnnData = CancerType_AnnData[:,SAGA_complex]

    #Cell SAGA_Zscore
    CancerType_SAGA_Zscore = (CancerType_SAGA_AnnData
                              .to_df()
                              .transform(func=lambda x: (x - x.mean()) / x.std(), axis=0)
                              .apply(func='mean', axis=1)
                              .rename("SAGA_Zscore"))

    # merge metadata
    df = pd.concat([CancerType_SAGA_AnnData.obs[["cancerType", "majorCluster"]].reset_index(drop=True),
                    CancerType_SAGA_Zscore.reset_index(drop=True)], axis=1)

    # Cluster SAGA_Zscore
    df = df.groupby(['cancerType', 'majorCluster'], observed=True)['SAGA_Zscore'].mean().reset_index()
    

    Complex_Score_Dict[CancerType] = df

In [None]:
# concat all solid tumor
PanCancer_SAGA_ZScore_df = pd.concat(Complex_Score_Dict.values(), ignore_index=True)

PanCancer_SAGA_ZScore_df['SAGA_Zscore_minmax'] = (PanCancer_SAGA_ZScore_df.groupby('cancerType', observed=True)['SAGA_Zscore']
                                  .transform(lambda x: (x - x.min()) / (x.max() - x.min())))

PanCancer_SAGA_ZScore_df.to_csv(f"{wd}/objects/PanCancer_SAGA_ZScore_df.csv", index_label = 'index')

In [None]:
PanCancer_SAGA_ZScore_df = pd.read_csv(f"{wd}/objects/PanCancer_SAGA_ZScore_df.csv", index_col = 'index')

In [None]:
PanCancer_SAGA_ZScore_df.head()

In [None]:

df = PanCancer_SAGA_ZScore_df.copy()

# order
df["cancerType"] = pd.Categorical(df["cancerType"], categories=CancerType_order, ordered=True)
df["majorCluster"] = pd.Categorical(df["majorCluster"], categories=MajorCluster_order, ordered=True)

# plot
fig = plt.figure(figsize=(14, 4))
ax = sns.scatterplot(
    data=df,
    x="cancerType", 
    y="majorCluster", 
    size="SAGA_Zscore_minmax", 
    hue="SAGA_Zscore_minmax",
    sizes=(20, 200),       
    palette="Reds",     
    edgecolor="none"
)

# modify
plt.xticks(rotation=90, fontsize=10)
plt.yticks(fontsize=10)
plt.xlabel("MajorCluster", fontsize=12, weight="bold")
plt.ylabel("CancerType", fontsize=12, weight="bold")

# 
handles, labels = ax.get_legend_handles_labels()
# 
ax.legend([],[],frameon=False)

# 
norm = plt.Normalize(df["SAGA_Zscore_minmax"].min(), df["SAGA_Zscore_minmax"].max())
sm = plt.cm.ScalarMappable(cmap="Reds", norm=norm)
sm.set_array([])
cbar = ax.figure.colorbar(sm, ax=ax)
cbar.set_label("SAGA_Zscore_minmax", fontsize=12)

plt.tight_layout()
plt.show()

fig.savefig(f"{wd}/figures/PanCancer_SAGA_Complex_ZScore_Dotplot2.pdf")

In [None]:
PanCancer_AnnData = sc.read_h5ad(f"{wd}/data/PanCancer_igt_s9_fine_counts.h5ad", backed='r')
CancerType_AnnData = PanCancer_AnnData[PanCancer_AnnData.obs['cancerType'] == 'CRC'].to_memory()

CancerType_AnnData.isbacked

In [None]:
# normalization
CancerType_AnnData.layers['counts'] = CancerType_AnnData.X.copy()
sc.pp.normalize_total(CancerType_AnnData, target_sum=1e4)
sc.pp.log1p(CancerType_AnnData)

In [None]:
SAGA_complex = ["TRRAP", "SGF29", "KAT2A", "KAT2B", "TADA2B", "TADA3", 
                "SUPT20H", "SUPT3H", "SUPT7L", "TADA1", "TAF5L", "TAF6L", "TAF9B", "TAF10", "TAF12", 
                "SF3B3", "SF3B5", 
                "ATXN7", "ATXN7L3", "ENY2", "USP22"]


MajorCluster_order = ['Epithelial', 'Endothelial', 'Stromal', 'ILC', 'CD8T', 'CD4T', 'Myeloid', 'B']

In [None]:
fig = plt.figure(figsize = (12,8), dpi=300)
fig = sc.pl.dotplot(CancerType_AnnData, var_names = SAGA_complex, groupby='majorCluster', categories_order = MajorCluster_order, standard_scale='var', title='CRC', return_fig = True)
fig.savefig(f"{wd}/figures/CRC_SAGA_Complex_Exp_Dotplot.pdf")