In [None]:
from typing import Tuple, Dict, List

import anndata
import infercnvpy
import pathlib


import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
def get_high_cnv_metasig(diffCNV: pd.DataFrame, metasig: str, qval: float=0.05, threshold_perc: float=0.2) -> Tuple[pd.Series, pd.Series]:
    
    ms_df = diffCNV.loc[:,diffCNV.columns.str.startswith(metasig)]
    ms_df = ms_df[ms_df[f"{metasig}_qvalues"]<qval]
    
    ms_df_gains = ms_df[ms_df[f"{metasig}_patients_gain"]>1]
    ms_df_losses = ms_df[ms_df[f"{metasig}_patients_loss"]>1]
    
    sorted_gains = (ms_df_gains[f"{metasig}_perc_gains"] - ms_df_gains[f"{metasig}_rest_gains"]).sort_values(ascending=False)
    sorted_losses = (ms_df_losses[f"{metasig}_perc_losses"] - ms_df_losses[f"{metasig}_rest_losses"]).sort_values(ascending=False)
    
    high_gains = sorted_gains[sorted_gains>=threshold_perc]
    high_losses = sorted_losses[sorted_losses>=threshold_perc]
    
    return high_gains, high_losses

def get_high_cnv(diffCNV: pd.DataFrame, metasigs: np.ndarray) -> Dict[str, List[pd.Series]]:
    high_cnvs = {}
    for metasig in metasigs:
        high_gains, high_losses = get_high_cnv_metasig(diffCNV, metasig, threshold_perc=0.25)
        high_cnvs[metasig] = [high_gains, high_losses]
    return high_cnvs


def get_gained_genes(most_diff_region: pd.DataFrame, adata: anndata.AnnData) -> List[str]:
    gained_genes = []
    idx = most_diff_region.index.str.split(":")
    for i in range(most_diff_region.shape[0]):
        reg = idx[i]
        chromosome = reg[0]
        start = int(reg[1].split("-")[0])
        end = int(reg[1].split("-")[1])
        selgenes = adata.var[adata.var.chromosome == chromosome]
        selgenes = selgenes[selgenes.start>=start]
        selgenes = list(selgenes[selgenes.end<=end].index)
        gained_genes+=selgenes
    return gained_genes

In [None]:
def get_dotplot(adata, marker_genes, figsize=(10,5)):
    adata.layers["counts"] = adata.X.copy()
    sc.pp.normalize_total(adata, target_sum=10000)
    sc.pp.log1p(adata)
    sc.pl.dotplot(adata, marker_genes, swap_axes=True, groupby="metamembership", save="dotplotmarker.svg", figsize=figsize)
    adata.X = adata.layers["counts"].copy()
    return adata

In [None]:
basedir = pathlib.Path("/path/to/esophag/metasignatures/from/cansig")

In [None]:
diffCNV = pd.read_csv(basedir / "diff-cnvs.csv",index_col=0)

In [None]:
adata = sc.read_h5ad("/path/to/esophag/preprocessed/data")

In [None]:
metamembership = pd.read_csv(basedir / "cell-metamembership.csv",index_col=0).replace({"-2.0": "undecided"})

In [None]:
adata.obs = pd.concat([adata.obs,metamembership],axis=1)

In [None]:
metasigs = np.unique(diffCNV.columns.str.split("_").str[0])
metasigs = np.setdiff1d(metasigs, ["outlier"])

In [None]:
high_cnvs = get_high_cnv(diffCNV, metasigs)

In [None]:
high_cnvs['metasig5'][0].to_csv("sign_gains_metasig5_escc.csv")

In [None]:
df = pd.Series(high_cnvs['metasig5'][0].index.str.split(":").str[0]).value_counts()
sign_gained_chromosomes = df.index
df

In [None]:
msdiffCNV = diffCNV.loc[high_cnvs['metasig5'][0].index,diffCNV.columns.str.startswith("metasig5")]

In [None]:
pc_gain_description = []
for chrom in sign_gained_chromosomes:
    df1 = high_cnvs['metasig5'][0][high_cnvs['metasig5'][0].index.str.startswith(chrom)].describe().loc[["mean","25%","75%"]]
    df1.index = ["Difference mean","Difference 25%","Difference 75%"]
    df2 = msdiffCNV.loc[msdiffCNV.index.str.startswith(chrom)]["metasig5_patients_gain"].describe().loc[["mean","min","max"]]
    df2.index = ["Patient mean","Patient min","Patient max"]
    df = pd.concat([df1,df2])
    df.name = chrom
    pc_gain_description.append(df)
pc_gain_description = pd.concat(pc_gain_description,axis=1).round(2).T
pc_gain_description.loc[sorted(sign_gained_chromosomes, key=lambda x: int(x[3:]))]
pc_gain_description = pd.concat([pc_gain_description,pc_gain_description.describe().loc[["mean"]]])
pc_gain_description

In [None]:
cnv_genes = {}
for ms in high_cnvs:
    cnv_genes[ms] = [[],[]]
    gains = high_cnvs[ms][0]
    if gains.shape[0] != 0:
        cnv_genes[ms][0].append(get_gained_genes(gains,adata))
    losses = high_cnvs[ms][1]
    if losses.shape[0] != 0:
        cnv_genes[ms][1].append(get_gained_genes(losses,adata))

In [None]:
cnv_genes

In [None]:
infercnvpy.pl.chromosome_heatmap_summary(adata, groupby="metamembership", save="summary_heatmap_escc_usecase.svg")

In [None]:
metasignatures = {}
sigdir = basedir / "signatures/"
for f in sigdir.iterdir():
    metasignatures[f.stem] = pd.read_csv(f,index_col=0).values.ravel()

In [None]:
len(cnv_genes["metasig5"][0][0])

In [None]:
upreg_genes = np.intersect1d(metasignatures['metasig5'][:100],cnv_genes["metasig5"][0][0])
AKCMPIG, len(upreg_genes)

In [None]:
gsea_df = pd.read_csv(basedir / 'gsea-dataframe.csv', index_col=0).set_index("Term")

In [None]:
ms_gsea = gsea_df[gsea_df["cluster"]=="metasig5"]
ms_gsea = ms_gsea[ms_gsea["FDR q-val"]<0.05].dropna()

In [None]:
ms_gsea

In [None]:
wnt_genes = ms_gsea.loc["HALLMARK_WNT_BETA_CATENIN_SIGNALING"].Lead_genes.split(";")

In [None]:
np.intersect1d(upreg_genes,wnt_genes)

In [None]:
marker_genes = metasignatures["metasig5"][:20]

In [None]:
marker_genes

In [None]:
esophag = get_dotplot(adata, marker_genes, figsize=(7,7))

In [None]:
obs_df = adata.obs[adata.obs.metamembership=="metasig5"].copy()
obs_df = obs_df.groupby("sample_id").count()["batch"]

In [None]:
cell_counts = adata.obs["sample_id"].value_counts()

In [None]:
prop_patients_stem = (obs_df/cell_counts).round(2)*100
prop_patients_stem = prop_patients_stem.sort_values(ascending=False).to_frame().reset_index()
prop_patients_stem.columns = ["Sample ID","Fraction highly expressed"]

In [None]:
plt.rcParams.update({'font.size': 22})
fig, ax = plt.subplots(1,1,figsize=(30,5))
sns.barplot(data=prop_patients_stem, x="Sample ID", y="Fraction highly expressed", 
            order=prop_patients_stem["Sample ID"].ravel(), ax=ax, color="red")
ax.set_title("Fraction highly expressed")
ax.set_xticklabels(ax.get_xticklabels(),rotation=45)
ax.set_xlabel("")
ax.set_ylabel("%")
ax.set_ylim([0,100])
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines["bottom"].set_linewidth(1.5)
ax.spines["left"].set_linewidth(1.5)
ax.axhline(25,xmin=0,xmax=prop_patients_stem.shape[0],color="gray")
fig.savefig("figures/stemness_fractionpatients.png")
fig.savefig("figures/stemness_fractionpatients.svg")

In [None]:
prop_patients_stem.describe()