In [None]:
import numpy as np
import pandas as pd
import pathlib as pl
import scanpy as sc
import seaborn as sns

In [None]:
def score_sig(adata, signature, score_name):    
    adata.layers["counts"] = adata.X.copy()
    sc.pp.normalize_total(adata, target_sum=10000)
    sc.pp.log1p(adata)
    sc.tl.score_genes(adata, gene_list=signature, score_name=score_name)
    adata.X = adata.layers["counts"]
    del adata.uns["log1p"]
    
    return adata

In [None]:
def get_clustermap_stability(basedir_1, basedir_2, basedir_full, data_path):
    metasignatures = {}
    for f in basedir_1.iterdir():
        name = f.stem + "_split1"
        metasignatures[name] = pd.read_csv(f, index_col=0)
    for f in basedir_2.iterdir():
        name = f.stem + "_split2"
        metasignatures[name] = pd.read_csv(f, index_col=0)
    for f in basedir_full.iterdir():
        name = f.stem + "_full"
        metasignatures[name] = pd.read_csv(f, index_col=0)
    
    adata = sc.read_h5ad(data_path)
    for sig in metasignatures:
        adata = score_sig(adata, metasignatures[sig].values.ravel()[:50], score_name=sig)
        
    df_corr = adata.obs[list(metasignatures.keys())].corr()
    
    labels = df_corr.index.to_frame()
    labels.columns = ["Split"]
    labels["Split"] = labels["Split"].apply(lambda x: "green" if "split1" in x else ("tab:orange" if "split2"  in x else "purple"))
    
    annot = df_corr.applymap(lambda x: np.nan if x is x<0.65 else x).round(2)
    annot = annot.fillna("").astype(str)

    g = sns.clustermap(df_corr, cmap="vlag", row_colors=labels,annot=annot,fmt = '')
    return g

In [None]:
basedir_1 = pl.Path("/path/to/metasignatures/split1")
basedir_2 = pl.Path("/path/to/metasignatures/split2")
basedir_full = pl.Path("/path/to/metasignatures/full")
data_path = pl.Path("path/to/data")

In [None]:
g_esophag = get_clustermap_stability(basedir_1=basedir_1, basedir_2=basedir_2, basedir_full=basedir_full, data_path=data_path)

In [None]:
g_esophag.figure.savefig("figures/stability_esophag.svg",bbox_inches="tight")

# Comparing stability across datasets

In [None]:
basedir_1 = pl.Path("/path/to/signatures/crc")
basedir_2 = pl.Path("/path/to/signatures/crc_icms")
data_path_1 = pl.Path("/path/to/crc/data")
data_path_2 = pl.Path("/path/to/crc_icms/data")

In [None]:
metasignatures = {}
for f in basedir_1.iterdir():
    name = f.stem + "_crc"
    metasignatures[name] = pd.read_csv(f, index_col=0)
for f in basedir_2.iterdir():
    name = f.stem + "_crc_icms"
    metasignatures[name] = pd.read_csv(f, index_col=0)
    
adata = sc.read_h5ad(data_path_1)
for sig in metasignatures:
    adata = score_sig(adata, metasignatures[sig].values.ravel()[:50], score_name=sig)

df_corr = adata.obs[list(metasignatures.keys())].corr()
    
labels = df_corr.index.to_frame()
labels.columns = ["Dataset"]
labels["Dataset"] = labels["Dataset"].apply(lambda x: "green" if "crc_icms" in x else "tab:orange")

annot = df_corr.applymap(lambda x: np.nan if x is x<0.65 else x).round(2)
annot = annot.fillna("").astype(str)

g1 = sns.clustermap(df_corr, cmap="vlag", row_colors=labels,annot=annot,fmt = '')

In [None]:
g1.figure.savefig("figures/stability_crc_vs_icms_scored_crc.svg",bbox_inches="tight")

In [None]:
adata = sc.read_h5ad(data_path_2)
for sig in metasignatures:
    adata = score_sig(adata, metasignatures[sig].values.ravel()[:50], score_name=sig)

df_corr = adata.obs[list(metasignatures.keys())].corr()
    
labels = df_corr.index.to_frame()
labels.columns = ["Dataset"]
labels["Dataset"] = labels["Dataset"].apply(lambda x: "green" if "crc_icms" in x else "tab:orange")

annot = df_corr.applymap(lambda x: np.nan if x is x<0.65 else x).round(2)
annot = annot.fillna("").astype(str)

g2 = sns.clustermap(df_corr, cmap="vlag", row_colors=labels,annot=annot,fmt = '')

In [None]:
g2.figure.savefig("figures/stability_crc_vs_icms_scored_icms.svg",bbox_inches="tight")

In [None]:
basedir_1 = pl.Path("/path/to/gbm/signatures")
basedir_2 = pl.Path("/path/to/hgg/signatures")
data_path_1 = pl.Path("/path/to/hgg/data")
data_path_2 = pl.Path("/path/to/gbm/data")

In [None]:
metasignatures = {}
for f in basedir_1.iterdir():
    name = f.stem + "_gbm"
    metasignatures[name] = pd.read_csv(f, index_col=0)
for f in basedir_2.iterdir():
    name = f.stem + "_hgg"
    metasignatures[name] = pd.read_csv(f, index_col=0)
    
adata = sc.read_h5ad(data_path_1)
for sig in metasignatures:
    adata = score_sig(adata, metasignatures[sig].values.ravel()[:50], score_name=sig)

df_corr = adata.obs[list(metasignatures.keys())].corr()
    
labels = df_corr.index.to_frame()
labels.columns = ["Dataset"]
labels["Dataset"] = labels["Dataset"].apply(lambda x: "green" if "gbm" in x else "tab:orange")

annot = df_corr.applymap(lambda x: np.nan if x is x<0.65 else x).round(2)
annot = annot.fillna("").astype(str)

g1 = sns.clustermap(df_corr, cmap="vlag", row_colors=labels,annot=annot,fmt = '')

In [None]:
g1.figure.savefig("figures/stability_gbm_vs_hgg_scored_hgg.svg",bbox_inches="tight")

In [None]:
adata = sc.read_h5ad(data_path_2)
for sig in metasignatures:
    adata = score_sig(adata, metasignatures[sig].values.ravel()[:50], score_name=sig)

df_corr = adata.obs[list(metasignatures.keys())].corr()
    
labels = df_corr.index.to_frame()
labels.columns = ["Dataset"]
labels["Dataset"] = labels["Dataset"].apply(lambda x: "green" if "gbm" in x else "tab:orange")

annot = df_corr.applymap(lambda x: np.nan if x is x<0.65 else x).round(2)
annot = annot.fillna("").astype(str)

g2 = sns.clustermap(df_corr, cmap="vlag", row_colors=labels,annot=annot,fmt = '')

In [None]:
g2.figure.savefig("figures/stability_gbm_vs_hgg_scored_gbm.svg",bbox_inches="tight")

# Comparing stability across datasets - Neftel

In [None]:
basedir_1 = pl.Path("/neftel/results/hgg")
data_path_1 = pl.Path("/path/to/hgg/data")

In [None]:
def rename_gbm_score(adata):
    adata.obs["MESlike"] = adata.obs[['mesenchymal1_score', 'mesenchymal2_score']].max(1)
    adata.obs["NPClike"] = adata.obs[['neural_precursor1_score', 'neural_precursor2_score']].max(1)

    adata.obs.rename(columns={"oligodendrocytic_precursor_score": "OPClike",
                              "astrocyte_score": "AClike"},
                              inplace=True)
    return adata

In [None]:
metasignatures = {}
for f in basedir_1.iterdir():
    name = f.stem + "_hgg"
    metasignatures[name] = pd.read_csv(f, index_col=0)
    
adata = sc.read_h5ad(data_path_1)
for sig in metasignatures:
    adata = score_sig(adata, metasignatures[sig].values.ravel()[:50], score_name=sig)
    
adata = rename_gbm_score(adata)

In [None]:
adata.obs= adata.obs.rename(columns={"AClike": "metaprogram1_gbm","OPClike": "metaprogram2_gbm",
                          "NPClike": "metaprogram3_gbm","MESlike": "metaprogram4_gbm"})

In [None]:
metaprograms_gbm = [f"metaprogram{i+1}_gbm" for i in range(4)]
df_corr = adata.obs[list(metasignatures.keys())+metaprograms_gbm].corr()

In [None]:
labels = df_corr.index.to_frame()
labels.columns = ["Dataset"]
labels["Dataset"] = labels["Dataset"].apply(lambda x: "green" if "gbm" in x else "tab:orange")

annot = df_corr.applymap(lambda x: np.nan if x is x<0.65 else x).round(2)
annot = annot.fillna("").astype(str)

g1 = sns.clustermap(df_corr, cmap="vlag", row_colors=labels,annot=annot,fmt = '')

In [None]:
g1.figure.savefig("figures/stability_gbm_vs_hgg_scored_hgg_neftel.svg",bbox_inches="tight")

In [None]:
basedir_1 = pl.Path("/path/to/neftel/results/crc")
data_path_1 = pl.Path("/path/to/crc_icms/")

In [None]:
metasignatures = {}
for mprog in [f"metaprogram{i+1}" for i in range(5)]:
    name = mprog + "_crc"
    metasignatures[name] = pd.read_csv(basedir_1 / f"crc{mprog}.csv", index_col=0)
for mprog in [f"metaprogram{i+1}" for i in range(5)]:
    name = mprog + "_crc_icms"
    metasignatures[name] = pd.read_csv(basedir_1 / f"crc_icms{mprog}.csv", index_col=0)
    
adata = sc.read_h5ad(data_path_1)
for sig in metasignatures:
    adata = score_sig(adata, metasignatures[sig].values.ravel()[:50], score_name=sig)

df_corr = adata.obs[list(metasignatures.keys())].corr()
    
labels = df_corr.index.to_frame()
labels.columns = ["Dataset"]
labels["Dataset"] = labels["Dataset"].apply(lambda x: "green" if "crc_icms" in x else "tab:orange")

annot = df_corr.applymap(lambda x: np.nan if x is x<0.65 else x).round(2)
annot = annot.fillna("").astype(str)

g1 = sns.clustermap(df_corr, cmap="vlag", row_colors=labels,annot=annot,fmt = '')

In [None]:
g1.figure.savefig("figures/stability_crc_vs_crc_icms_scored_crc_icms_neftel.svg",bbox_inches="tight")