In [None]:
import anndata
import os
import pathlib

import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt

from matplotlib.patches import Rectangle

In [None]:
def rename_gbm_score(adata):
    adata.obs["MESlike"] = adata.obs[['mesenchymal1_score', 'mesenchymal2_score']].max(1)
    adata.obs["NPClike"] = adata.obs[['neural_precursor1_score', 'neural_precursor2_score']].max(1)

    adata.obs.rename(columns={"oligodendrocytic_precursor_score": "OPClike",
                              "astrocyte_score": "AClike"},
                              inplace=True)
    return adata

In [None]:
def gbm_neftel():
    adata = anndata.read_text("/path/to/TPM/data/from/Neftel")
    adata = adata.transpose()
    tsne_coords = pd.read_csv("/path/to/tsne/coord/from/Neftel", 
                              delimiter="\t", index_col=0).drop("TYPE")
    meta_data = pd.read_csv("/path/to/meta/data/from/Neftel", 
                            delimiter="\t", index_col=0).drop("TYPE")
    adata.obs = adata.obs.join(meta_data)
    adata.obs = adata.obs.join(tsne_coords)

    adata.obs[['MESlike2', 'MESlike1', 'AClike', 'OPClike', 'NPClike1', 'NPClike2']]= adata.obs[['MESlike2', 'MESlike1', 'AClike', 'OPClike', 'NPClike1', 'NPClike2']].astype(float)

    adata = adata[adata.obs["CellAssignment"]=="Malignant"].copy()
    adata = adata[adata.obs.notna().all(1)].copy()

    adata.obs["MESlike"] = adata.obs[['MESlike2', 'MESlike1']].max(1)
    adata.obs["NPClike"] = adata.obs[['NPClike1', 'NPClike2']].max(1)
    
    return adata

In [None]:
def get_high_corr(x): 
    if x<0.65:
        return np.nan
    else:
        return x

In [None]:
cancer_known_sigs = {"gbm": ["MESlike",  "AClike",
                             "NPClike", "OPClike"],
                    "hgg_gbm": ["MESlike",  "AClike",
                             "NPClike", "OPClike"],
                     "hgg": ["MESlike",  "AClike",
                             "NPClike", "OPClike"],
                    "scc_red": ["Basal", "Differentiated", "TSK",
                             "Cycling"],
                    }

In [None]:
cancer_types = ["gbm","hgg","hgg_gbm","scc_red"]

In [None]:
dpath_basedir = pathlib.Path("/path/to/the/preprocessed/data")
cancer_data_path = {"hgg": dpath_basedir / "glioblastoma/2022-10-11_17-17-22/data/malignant.h5ad", 
                    "gbm": dpath_basedir / "glioblastoma_ss_neftel/_LAST", 
                   "scc_red": dpath_basedir / "scc_red/malignant_old.h5ad", }

In [None]:
base_dir = pathlib.Path("/path/to/results/folder/from/scalop")

In [None]:
corr_metasig = {}
for cancer in cancer_types:
    print(cancer)
    name = cancer
    
    if cancer in ["hgg_gbm"]:
        adata= gbm_neftel()
        print("Downloaded adata")
            
        sig_dir = base_dir / "glioblastoma"
        
    elif cancer in ["gbm"]:
        adata= gbm_neftel()
        print("Downloaded adata")
        col_to_compare = cancer_known_sigs[cancer]
        corr_metasig[cancer] = adata.obs[col_to_compare].corr()
        continue
        
    else:
        adata = sc.read_h5ad(cancer_data_path[cancer])
        sc.pp.normalize_total(adata)
        sc.pp.log1p(adata)
        print("Downloaded adata")
        
        if cancer=="hgg":
            adata = rename_gbm_score(adata)
            sig_dir = base_dir / "glioblastoma"
        else:
            sig_dir = base_dir / "scc_red"
        
    metasig_names = []
    for i in range(len(list(sig_dir.iterdir()))):
        sig_path = sig_dir / f"metaprogram{i+1}.csv"
        sig = pd.read_csv(sig_path, index_col=0).iloc[:50, 0].tolist()
        sc.tl.score_genes(adata, gene_list=sig, score_name=f"metaprogram{i+1}")
        metasig_names.append(f"metaprogram{i+1}")
    col_to_compare = cancer_known_sigs[cancer] + metasig_names
    corr_metasig[cancer] = adata.obs[col_to_compare].corr().loc[cancer_known_sigs[cancer],metasig_names]

In [None]:
corr_metasig["gbm"] = corr_metasig["gbm"].rename(columns={"MESlike": "Meta-sig. 1", "AClike": "Meta-sig. 2", "NPClike": "Meta-sig. 3", "OPClike": "Meta-sig. 4"})

In [None]:
cancer_names = {"gbm": "GBM", "hgg": "HGG","hgg_gbm": "HGG, scored on GBM",
                "scc_red": "SCC", }
fig, ax = plt.subplots(1,4,figsize=(12,2.4),gridspec_kw={'width_ratios': [3,3,3,4]})
flatax = ax.flatten()
hide_cbar = [False]*(len(cancer_types)-1)+[True]
for i,cancer in enumerate(cancer_types):
    
    df = corr_metasig[cancer].copy()
    df.columns = df.columns.str.replace("metaprogram", "Meta-sig. ")
    df.index = df.index.str.replace("like","-like")
    
    annot = df.applymap(get_high_corr).round(2)
    annot = annot.fillna("").astype(str)

    sns.heatmap(data=df, cmap="vlag", center=0.0, vmin=-1, vmax=1, annot=annot, fmt = '', ax=flatax[i], cbar=hide_cbar[i])
    
    column_max = df.fillna(-np.inf).idxmax(axis=0)

    for col, variable in enumerate(df.columns):
        if df[variable].max()<0.65:
            continue
        position = df.index.get_loc(column_max[variable])
        flatax[i].add_patch(Rectangle((col, position),1,1, fill=False, edgecolor='tab:red', lw=2))
        
    
    flatax[i].set_xticklabels(flatax[i].get_xticklabels(),rotation="45",horizontalalignment="right")
    flatax[i].set_yticklabels(flatax[i].get_yticklabels(),rotation="0",verticalalignment="center")
    flatax[i].set_title(cancer_names[cancer], fontsize=15)
    
fig.tight_layout()
fig.savefig("path/to/save",bbox_inches="tight",dpi=300)
fig.savefig("path/to/save",bbox_inches="tight")