In [None]:
import pandas as pd
import numpy as np
import scanpy as sc

import matplotlib.pyplot as plt
import seaborn as sns

from statannotations.Annotator import Annotator

from tqdm.notebook import tqdm

from scipy.stats import fisher_exact

In [None]:
import pathlib as pl
import os

In [None]:
def pretty_ax(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=True,
        labelleft = True)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

In [None]:
from typing import List, Tuple
def get_markers_high_mt(full_results: pd.DataFrame, adata: sc.AnnData, 
                        sample_col: str, malignant_cells: List[str], 
                        tme_cells: List[str], qval_lim: float=0.05) -> Tuple[pd.DataFrame, pd.DataFrame]:

    case_samples = full_results[full_results["Category"]=="Case"].index

    casadata = adata[adata.obs[sample_col].isin(case_samples)].copy()
    casadata.obs.HighMT = casadata.obs.HighMT.astype("category")
    casadata.obs.Malignant = casadata.obs.Malignant.astype("category")

    maladata = casadata[casadata.obs.cleaned_celltype.isin(malignant_cells)].copy()
    tmeadata = casadata[casadata.obs.cleaned_celltype.isin(tme_cells)].copy()
    highmtadata = casadata[casadata.obs.HighMT==1].copy()

    highmt_markers = {}
    sc.tl.rank_genes_groups(maladata, groupby="HighMT")
    highmt_markers["Malignant"] = sc.get.rank_genes_groups_df(maladata, group="1")
    sc.tl.rank_genes_groups(tmeadata, groupby="HighMT")
    highmt_markers["TME"] = sc.get.rank_genes_groups_df(tmeadata, group="1")
    sc.tl.rank_genes_groups(highmtadata, groupby="Malignant")
    highmt_markers["HighMal"] = sc.get.rank_genes_groups_df(highmtadata, group="1")

    df1 = highmt_markers["HighMal"].set_index("names")
    df1.columns+="_HighMal"
    df2 = highmt_markers["Malignant"].set_index("names")
    df2.columns+="_TMEMal"

    malhighmarker = pd.concat([df1,df2],axis=1)
    malhighmarker = malhighmarker[(malhighmarker["scores_HighMal"]>0) & (malhighmarker["scores_TMEMal"]>0)]
    malhighmarker["pvals_BergerUnionTest"] = malhighmarker[["pvals_adj_HighMal","pvals_adj_TMEMal"]].max(axis=1)

    malhighmarker = malhighmarker[malhighmarker["pvals_BergerUnionTest"]<qval_lim].sort_values("pvals_BergerUnionTest")

    df1 = highmt_markers["HighMal"].set_index("names")
    df1.columns+="_HighMal"
    df2 = highmt_markers["TME"].set_index("names")
    df2.columns+="_TMEMal"

    tmehighmarker = pd.concat([df1,df2],axis=1)
    tmehighmarker = tmehighmarker[(tmehighmarker["scores_HighMal"]<0) & (tmehighmarker["scores_TMEMal"]>0)]
    tmehighmarker["pvals_BergerUnionTest"] = tmehighmarker[["pvals_adj_HighMal","pvals_adj_TMEMal"]].max(axis=1)

    tmehighmarker = tmehighmarker[tmehighmarker["pvals_BergerUnionTest"]<qval_lim].sort_values("pvals_BergerUnionTest")
    
    return malhighmarker, tmehighmarker, highmt_markers

In [None]:
full_resdir = pl.Path("/add/path/here/markers_highmt")

In [None]:
color_mapping = {"Case": "r", "Positive Control": "b", "Negative Control": "y"}

In [None]:
def plot_boxplot(adata, y, ax=None, name=None):
    pairs = [((0,0),(0,1)),((1,0),(1,1)),((0,0),(1,1)),((0,1),(1,0))]

    if ax is None:
        fig, ax = plt.subplots(1,1,figsize=(3,2))
    sns.boxplot(data=adata.obs, x="Malignant", y=y, 
                        hue="HighMT", ax=ax)
    pretty_ax(ax)
    ax.legend(frameon=False, title="HighMT",bbox_to_anchor=(1,1,0,0))
    if name is not None:
        ax.set_ylabel("")
        ax.set_title(name)

    annot = Annotator(
        ax,
        pairs=pairs,
        data=adata.obs, x="Malignant", y=y, hue="HighMT"
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()

# Download stress/dissociation signatures

In [None]:
core_genes_stress = pd.read_csv("/add/path/here/auxiliary_data/coregene_df-FALSE-v3.csv")

list_core_genes = core_genes_stress[core_genes_stress["logFC"]>0].gene_symbol.ravel()

red_core_genes = core_genes_stress.head(40).gene_symbol.ravel()

dissociation_genes = pd.read_csv("/add/path/here/auxiliary_data/dissociation_genes-vanDenBrink2017.csv",header=None).astype(str)
dissociation_genes = dissociation_genes[0].str.upper().ravel()

dissociation_genes_machado = pd.read_csv("/add/path/here/auxiliary_data/dissociation_Machado2021.csv",header=None).astype(str)
dissociation_genes_machado = dissociation_genes_machado[0].str.upper().ravel()

dissociation_prostate_specific = ["JUN","FOS","EGR1","ATF3","JUNB","GADD45B","IER2","ZFP36",
"DNAJB1","RHOB","NR4A1","UBC","HES1"]

In [None]:
common_disso_genes = np.intersect1d(list_core_genes.astype(str),dissociation_genes.astype(str))
common_disso_genes = np.intersect1d(common_disso_genes,dissociation_genes_machado.astype(str))

# Uveal melanoma Durante

In [None]:
resdir = full_resdir / "UvealMelanoma_Durante"
os.makedirs(resdir, exist_ok=True)

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/UvealMelanoma_Durante_10X/filtered_adata.h5ad")

In [None]:
all_samples = adata.obs["sample"].unique()

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

In [None]:
adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Malignant").astype(int)

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

In [None]:
sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

## Check dissociation stress

In [None]:
sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

# Remove patients with high pct MT in healthy cells

In [None]:
pct_counts_values = adata.obs.groupby(["sample","Malignant"])["pct_counts_mt"].median().unstack()

pct_counts_values.columns = ["TME","Malignant"]

In [None]:
pct_counts_values.sort_values("TME",ascending=False)

In [None]:
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["sample","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

In [None]:
all_vc = adata.obs[["sample","HighMT","Malignant"]].value_counts().unstack()

In [None]:
all_fisher_OR, all_fisher_p = {},{}
for sample in all_vc.index.get_level_values(0).unique():
    
    if (all_vc.loc[sample].sum(axis=0)<30).sum()>0:
        print(sample, "does not have enough malignant/TME cells")
        continue
    if (all_vc.loc[sample].sum(axis=1).loc[1]<20):
        print(sample, "does not have enough HighMT cells")
        OR, p = 0, 1
    else:
        OR, p = fisher_exact(all_vc.loc[sample].loc[[0,1],[0,1]].fillna(0))
    all_fisher_OR[sample] = [OR]
    all_fisher_p[sample] = [p]

In [None]:
fisher_results = pd.concat([pd.DataFrame(all_fisher_p), pd.DataFrame(all_fisher_OR)]).T

fisher_results.columns = ["p","OR"]

fisher_results = fisher_results.sort_values("OR")

#fisher_results = fisher_results[fisher_results["p"]<0.05].sort_values("OR")

In [None]:
full_results = pd.concat([fisher_results,pct_counts_values,pct_high_mt],axis=1).dropna()

full_results["Category"] = "Case"
full_results.loc[(full_results.TME>15),"Category"] = "Negative Control"
full_results.loc[(full_results.TME<15) & (full_results.OR<2),"Category"] = "Positive Control"
full_results.loc[(full_results.TME<15) & (full_results.OR>2) & (full_results.Pct_HighMT<0.15),"Category"] = "Positive Control"

#plot_order = (full_results.Malignant - full_results.TME).sort_values().index
plot_order = full_results.sort_values("TME").index
full_results = full_results.loc[plot_order]

colors = full_results["Category"].replace(color_mapping).ravel()

fig, ax = plt.subplots(1,1,figsize=(3.5,5))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant")
ax.vlines(x=15, ymin=ax.get_ylim()[0], ymax=ax.get_ylim()[1], color="grey", linestyle="--")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/UvealMelanoma_Durante.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,5))
sns.boxplot(data=adata.obs, y="sample", x="Dissociation stress", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("Dissociation stress")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/UvealMelanoma_Durante_dissociation.png", 
            dpi=200, bbox_inches="tight")

In [None]:
adata.obs[["sample","pct_counts_mt","cleaned_celltype","HighMT","Malignant","Dissociation stress"]].to_csv("/add/path/here/info-pct-counts-full/uvealmelanoma_adata.csv")

# Find markers of pct counts MT in cases

In [None]:
case_samples = full_results[full_results["Category"]=="Case"].index

In [None]:
malhighmarker, tmehighmarker, fullmarkers = get_markers_high_mt(full_results=full_results, adata=adata, 
                        sample_col="sample", malignant_cells=["Malignant"], 
                        tme_cells=['T_cell', 'Monocyte', 'Plasma_cell', 'Endothelial', 'B_cell'], qval_lim = 0.1) 

In [None]:
fullmarkers["Malignant"].to_csv(resdir / "full_highmt_vs_lowmt_mal_dgex.csv")

# SCLC Chan

In [None]:
resdir = full_resdir / "SCLC_Chan_10X"
os.makedirs(resdir, exist_ok=True)

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/SCLC_Chan_10X/filtered_adata.h5ad")

In [None]:
adata = adata[adata.obs.source!="pleural_effusion"].copy()

In [None]:
all_samples = adata.obs["sample"].unique()

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

In [None]:
adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Malignant").astype(int)

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

In [None]:
sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

## Score dissociation stress

In [None]:
sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

# Remove patients with high pct MT in healthy cells

In [None]:
pct_counts_values = adata.obs.groupby(["sample","Malignant"]).median()["pct_counts_mt"].unstack()

pct_counts_values.columns = ["TME","Malignant"]

In [None]:
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["sample","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

In [None]:
adata.obs["highlevel_celltype"] = adata.obs.cleaned_celltype.apply(lambda x: "TME" if x not in ["Malignant","Epithelial"] else x)

In [None]:
pct_counts_values.sort_values("TME",ascending=False)

In [None]:
all_vc = adata.obs[["sample","HighMT","Malignant"]].value_counts().unstack()

In [None]:
all_fisher_OR, all_fisher_p = {},{}
for sample in all_vc.index.get_level_values(0).unique():
    
    if (all_vc.loc[sample].sum(axis=0)<30).sum()>0:
        print(sample, "does not have enough malignant/TME cells")
        continue
    if (all_vc.loc[sample].sum(axis=1).loc[1]<20):
        print(sample, "does not have enough HighMT cells")
        OR, p = 0, 1
    else:
        OR, p = fisher_exact(all_vc.loc[sample].loc[[0,1],[0,1]].fillna(0))
    all_fisher_OR[sample] = [OR]
    all_fisher_p[sample] = [p]

In [None]:
fisher_results = pd.concat([pd.DataFrame(all_fisher_p), pd.DataFrame(all_fisher_OR)]).T

fisher_results.columns = ["p","OR"]

fisher_results = fisher_results.sort_values("OR")

In [None]:
full_results = pd.concat([fisher_results,pct_counts_values,pct_high_mt],axis=1).dropna()

full_results["Category"] = "Case"
full_results.loc[(full_results.TME>15),"Category"] = "Negative Control"
full_results.loc[(full_results.TME<15) & (full_results.OR<2),"Category"] = "Positive Control"
full_results.loc[(full_results.TME<15) & (full_results.OR>2) & (full_results.Pct_HighMT<0.15),"Category"] = "Positive Control"

plot_order = full_results.sort_values("TME").index
full_results = full_results.loc[plot_order]

colors = full_results["Category"].replace(color_mapping).ravel()

fig, ax = plt.subplots(1,1,figsize=(3.5,7))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant", bbox_to_anchor=(1,1,0,0))
ax.vlines(x=15, ymin=ax.get_ylim()[0], ymax=ax.get_ylim()[1], color="grey", linestyle="--")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/SCLC_Chan.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,7))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", order=plot_order,
            hue="highlevel_celltype", 
            hue_order=["TME","Epithelial","Malignant"], 
            palette={"TME": "tab:blue", "Epithelial": "tab:orange", "Malignant": "tab:red"})
pretty_ax(ax)

ax.legend(frameon=False, title="High-level cell type", bbox_to_anchor=(1,1,0,0))
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")
for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/SCLC_Chan_wNormal-Epithelial.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,7))
sns.boxplot(data=adata.obs, y="sample", x="Dissociation stress", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("Dissociation stress")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/SLCL_Chan_dissociation.png", 
            dpi=200, bbox_inches="tight")

In [None]:
adata.obs[["sample","pct_counts_mt","cell_type","HighMT","Malignant","Dissociation stress"]].to_csv("/add/path/here/info-pct-counts-full/sclc_adata.csv")

## Find high MT markers

In [None]:
malhighmarker, tmehighmarker, fullmarkers = get_markers_high_mt(full_results=full_results, adata=adata, 
                        sample_col="sample", malignant_cells=["Malignant"], 
                        tme_cells=["B_cell", "T_cell", "Dendritic", "Macrophage", "Plasma",
                                   "Fibroblast", "Endothelial", "Epithelial", "Mast"]) 

In [None]:
fullmarkers["Malignant"].to_csv(resdir / "full_highmt_vs_lowmt_mal_dgex.csv")

In [None]:
malhighmarker.to_csv(resdir / "mal_high_markers.csv")
tmehighmarker.to_csv(resdir / "tme_high_markers.csv")

# Pancreas Steele 10X

In [None]:
resdir = full_resdir / "Steele_Pancreas_10X"
os.makedirs(resdir, exist_ok=True)

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Steele_Pancreas_10X/filtered_adata.h5ad")

In [None]:
all_samples = adata.obs["sample"].unique()

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

In [None]:
adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Malignant").astype(int)

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

## Score dissociation stress

In [None]:
sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

# Remove patients with high pct MT in healthy cells

In [None]:
pct_counts_values = adata.obs.groupby(["sample","Malignant"]).median()["pct_counts_mt"].unstack()

pct_counts_values.columns = ["TME","Malignant"]

In [None]:
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["sample","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

In [None]:
adata.obs["highlevel_celltype"] = adata.obs.cleaned_celltype.apply(lambda x: "TME" if x not in ["Malignant","Epithelial"] else x)

In [None]:
pct_counts_values.sort_values("TME",ascending=False)

In [None]:
all_vc = adata.obs[["sample","HighMT","Malignant"]].value_counts().unstack()

In [None]:
all_fisher_OR, all_fisher_p = {},{}
for sample in all_vc.index.get_level_values(0).unique():
    
    if (all_vc.loc[sample].sum(axis=0)<30).sum()>0:
        print(sample, "does not have enough malignant/TME cells")
        continue
    if (all_vc.loc[sample].sum(axis=1).loc[1]<20):
        print(sample, "does not have enough HighMT cells")
        OR, p = 0, 1
    else:
        OR, p = fisher_exact(all_vc.loc[sample].loc[[0,1],[0,1]].fillna(0))
    all_fisher_OR[sample] = [OR]
    all_fisher_p[sample] = [p]

In [None]:
fisher_results = pd.concat([pd.DataFrame(all_fisher_p), pd.DataFrame(all_fisher_OR)]).T

fisher_results.columns = ["p","OR"]

fisher_results = fisher_results.sort_values("OR")

In [None]:
full_results = pd.concat([fisher_results,pct_counts_values,pct_high_mt],axis=1).dropna()

full_results["Category"] = "Case"
full_results.loc[(full_results.TME>15),"Category"] = "Negative Control"
full_results.loc[(full_results.TME<15) & (full_results.OR<2),"Category"] = "Positive Control"
full_results.loc[(full_results.TME<15) & (full_results.OR>2) & (full_results.Pct_HighMT<0.15),"Category"] = "Positive Control"

plot_order = full_results.sort_values("TME").index
full_results = full_results.loc[plot_order]

colors = full_results["Category"].replace(color_mapping).ravel()

fig, ax = plt.subplots(1,1,figsize=(3.5,7))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant", bbox_to_anchor=(1,1,0,0))
ax.vlines(x=15, ymin=ax.get_ylim()[0], ymax=ax.get_ylim()[1], color="grey", linestyle="--")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/Pancreas_Steele.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,7))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", order=plot_order,
            hue="highlevel_celltype", 
            hue_order=["TME","Epithelial","Malignant"], 
            palette={"TME": "tab:blue", "Epithelial": "tab:orange", "Malignant": "tab:red"})
pretty_ax(ax)

ax.legend(frameon=False, title="High-level cell type", bbox_to_anchor=(1,1,0,0))
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")
for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/Pancreas_Steele_wNormal-Epithelial.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,7))
sns.boxplot(data=adata.obs, y="sample", x="Dissociation stress", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("Dissociation stress")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/Pancreas_Steele_dissociation.png", 
            dpi=200, bbox_inches="tight")

In [None]:
adata.obs[["sample","pct_counts_mt","cell_type","HighMT","Malignant","Dissociation stress"]].to_csv("/add/path/here/info-pct-counts-full/pancreas_adata.csv")

# Find markers of pct counts MT in cases

In [None]:
malhighmarker, tmehighmarker, fullmarkers = get_markers_high_mt(full_results=full_results, adata=adata, 
                        sample_col="sample", malignant_cells=["Malignant"], 
                        tme_cells=["B_cell", "T_cell", "Dendritic", "Macrophage", "Plasma",
                                   "Fibroblast", "Endothelial", "Epithelial", "Granulocyte", "Mast"]) 

In [None]:
fullmarkers["Malignant"].to_csv(resdir / "full_highmt_vs_lowmt_mal_dgex.csv")

In [None]:
malhighmarker.to_csv(resdir / "mal_high_markers.csv")
tmehighmarker.to_csv(resdir / "tme_high_markers.csv")

# Metastatic pancreas Raghavan 10X

In [None]:
resdir = full_resdir / "Raghavan_Pancreas_10X"
os.makedirs(resdir, exist_ok=True)

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Raghavan_Pancreas_10X/filtered_adata.h5ad")

In [None]:
all_samples = adata.obs["sample"].unique()

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

In [None]:
adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Malignant").astype(int)

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

## Score dissociation stress

In [None]:
sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

# Remove patients with high pct MT in healthy cells

In [None]:
pct_counts_values = adata.obs.groupby(["sample","Malignant"]).median()["pct_counts_mt"].unstack()

pct_counts_values.columns = ["TME","Malignant"]

In [None]:
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["sample","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

In [None]:
adata.obs["highlevel_celltype"] = adata.obs.cleaned_celltype.apply(lambda x: "TME" if x not in ["Malignant","Epithelial"] else x)

In [None]:
pct_counts_values.sort_values("TME",ascending=False)

In [None]:
all_vc = adata.obs[["sample","HighMT","Malignant"]].value_counts().unstack()

In [None]:
all_fisher_OR, all_fisher_p = {},{}
for sample in all_vc.index.get_level_values(0).unique():
    
    if (all_vc.loc[sample].sum(axis=0)<30).sum()>0:
        print(sample, "does not have enough malignant/TME cells")
        continue
    if (all_vc.loc[sample].sum(axis=1).loc[1]<20):
        print(sample, "does not have enough HighMT cells")
        OR, p = 0, 1
    else:
        OR, p = fisher_exact(all_vc.loc[sample].loc[[0,1],[0,1]].fillna(0))
    all_fisher_OR[sample] = [OR]
    all_fisher_p[sample] = [p]

In [None]:
fisher_results = pd.concat([pd.DataFrame(all_fisher_p), pd.DataFrame(all_fisher_OR)]).T

fisher_results.columns = ["p","OR"]

fisher_results = fisher_results.sort_values("OR")

In [None]:
full_results = pd.concat([fisher_results,pct_counts_values,pct_high_mt],axis=1).dropna()

full_results["Category"] = "Case"
full_results.loc[(full_results.TME>15),"Category"] = "Negative Control"
full_results.loc[(full_results.TME<15) & (full_results.OR<2),"Category"] = "Positive Control"
full_results.loc[(full_results.TME<15) & (full_results.OR>2) & (full_results.Pct_HighMT<0.15),"Category"] = "Positive Control"

plot_order = full_results.sort_values("TME").index
full_results = full_results.loc[plot_order]

colors = full_results["Category"].replace(color_mapping).ravel()

fig, ax = plt.subplots(1,1,figsize=(3.5,9))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant", bbox_to_anchor=(1,1,0,0))
ax.vlines(x=15, ymin=ax.get_ylim()[0], ymax=ax.get_ylim()[1], color="grey", linestyle="--")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/MetPancreas_Raghavan.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,9))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", order=plot_order,
            hue="highlevel_celltype", 
            hue_order=["TME","Epithelial","Malignant"], 
            palette={"TME": "tab:blue", "Epithelial": "tab:orange", "Malignant": "tab:red"})
pretty_ax(ax)

ax.legend(frameon=False, title="High-level cell type", bbox_to_anchor=(1,1,0,0))
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")
for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/MetPancreas_Raghavan_wNormal-Epithelial.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,9))
sns.boxplot(data=adata.obs, y="sample", x="Dissociation stress", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("Dissociation stress")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/MetPancreas_Raghavan_dissociation.png", 
            dpi=200, bbox_inches="tight")

In [None]:
adata.obs[["sample","pct_counts_mt","cell_type","HighMT","Malignant","Dissociation stress"]].to_csv("/add/path/here/info-pct-counts-full/met_pancreas_adata.csv")

# Find markers of pct counts MT in cases

In [None]:
malhighmarker, tmehighmarker, fullmarkers = get_markers_high_mt(full_results=full_results, adata=adata, 
                        sample_col="sample", malignant_cells=["Malignant"], 
                        tme_cells=["B_cell", "T_cell", "Dendritic", "Macrophage", "Plasma",
                                   "Fibroblast", "Endothelial", "Epithelial"]) 

In [None]:
fullmarkers["Malignant"].to_csv(resdir / "full_highmt_vs_lowmt_mal_dgex.csv")

In [None]:
malhighmarker.to_csv(resdir / "mal_high_markers.csv")
tmehighmarker.to_csv(resdir / "tme_high_markers.csv")

# Prostate Song SeqWell

In [None]:
resdir = full_resdir / "Song_Prostate_SeqWell"
os.makedirs(resdir, exist_ok=True)

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Song_Prostate_SeqWell/filtered_adata.h5ad")

In [None]:
all_samples = adata.obs["sample"].unique()

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

In [None]:
adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Malignant").astype(int)

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

## Score dissociation stress

In [None]:
sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

# Remove patients with high pct MT in healthy cells

In [None]:
adata = adata[adata.obs.source=="Tumor"].copy()

In [None]:
pct_counts_values = adata.obs.groupby(["sample","Malignant"]).median()["pct_counts_mt"].unstack()

pct_counts_values.columns = ["TME","Malignant"]

In [None]:
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["sample","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

In [None]:
adata.obs["highlevel_celltype"] = adata.obs.cleaned_celltype.apply(lambda x: "TME" if x not in ["Malignant","Epithelial"] else x)

In [None]:
pct_counts_values.sort_values("TME",ascending=False)

In [None]:
all_vc = adata.obs[["sample","HighMT","Malignant"]].value_counts().unstack()

In [None]:
all_fisher_OR, all_fisher_p = {},{}
for sample in all_vc.index.get_level_values(0).unique():
    
    if (all_vc.loc[sample].sum(axis=0)<30).sum()>0:
        print(sample, "does not have enough malignant/TME cells")
        continue
    if (all_vc.loc[sample].sum(axis=1).loc[1]<20):
        print(sample, "does not have enough HighMT cells")
        OR, p = 0, 1
    else:
        OR, p = fisher_exact(all_vc.loc[sample].loc[[0,1],[0,1]].fillna(0))
    all_fisher_OR[sample] = [OR]
    all_fisher_p[sample] = [p]

In [None]:
fisher_results = pd.concat([pd.DataFrame(all_fisher_p), pd.DataFrame(all_fisher_OR)]).T

fisher_results.columns = ["p","OR"]

fisher_results = fisher_results.sort_values("OR")

In [None]:
full_results = pd.concat([fisher_results,pct_counts_values,pct_high_mt],axis=1).dropna()

full_results["Category"] = "Case"
full_results.loc[(full_results.TME>15),"Category"] = "Negative Control"
full_results.loc[(full_results.TME<15) & (full_results.OR<2),"Category"] = "Positive Control"
full_results.loc[(full_results.TME<15) & (full_results.OR>2) & (full_results.Pct_HighMT<0.15),"Category"] = "Positive Control"

plot_order = full_results.sort_values("TME").index
full_results = full_results.loc[plot_order]

colors = full_results["Category"].replace(color_mapping).ravel()

fig, ax = plt.subplots(1,1,figsize=(3.5,5))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant", bbox_to_anchor=(1,1,0,0))
ax.vlines(x=15, ymin=ax.get_ylim()[0], ymax=ax.get_ylim()[1], color="grey", linestyle="--")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/Prostate_Song.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,6))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", order=plot_order,
            hue="highlevel_celltype", 
            hue_order=["TME","Epithelial","Malignant"], 
            palette={"TME": "tab:blue", "Epithelial": "tab:orange", "Malignant": "tab:red"})
pretty_ax(ax)

ax.legend(frameon=False, title="High-level cell type", bbox_to_anchor=(1,1,0,0))
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")
for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/Prostate_Song_wNormal-Epithelial.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,6))
sns.boxplot(data=adata.obs, y="sample", x="Dissociation stress", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("Dissociation stress")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/Prostate_Song_dissociation.png", 
            dpi=200, bbox_inches="tight")

In [None]:
adata.obs[["sample","pct_counts_mt","cell_type","HighMT","Malignant","Dissociation stress"]].to_csv("/add/path/here/info-pct-counts-full/prostate_song_adata.csv")

# RCC Bi 10X

In [None]:
resdir = full_resdir / "Bi_RCC_10X"
os.makedirs(resdir, exist_ok=True)

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Bi_RCC_10X/filtered_adata.h5ad")

all_samples = adata.obs["biosample_id"].unique()

adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype.isin(["Tumor","TP1","TP2"])).astype(int)

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

## Score dissociation stress

In [None]:
sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

# Remove patients with high pct MT in healthy cells

In [None]:
pct_counts_values = adata.obs.groupby(["sample","Malignant"]).median()["pct_counts_mt"].unstack()

pct_counts_values.columns = ["TME","Malignant"]

In [None]:
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["sample","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

In [None]:
adata.obs["highlevel_celltype"] = adata.obs.cleaned_celltype.apply(lambda x: "TME" if x not in ["TP1","TP2","Tumor"] else "Malignant")

In [None]:
pct_counts_values.sort_values("TME",ascending=False)

In [None]:
all_vc = adata.obs[["sample","HighMT","Malignant"]].value_counts().unstack()

In [None]:
all_fisher_OR, all_fisher_p = {},{}
for sample in all_vc.index.get_level_values(0).unique():
    
    if (all_vc.loc[sample].sum(axis=0)<30).sum()>0:
        print(sample, "does not have enough malignant/TME cells")
        continue
    if (all_vc.loc[sample].sum(axis=1).loc[1]<20):
        print(sample, "does not have enough HighMT cells")
        OR, p = 0, 1
    else:
        OR, p = fisher_exact(all_vc.loc[sample].loc[[0,1],[0,1]].fillna(0))
    all_fisher_OR[sample] = [OR]
    all_fisher_p[sample] = [p]

In [None]:
fisher_results = pd.concat([pd.DataFrame(all_fisher_p), pd.DataFrame(all_fisher_OR)]).T

fisher_results.columns = ["p","OR"]

fisher_results = fisher_results.sort_values("OR")

In [None]:
full_results = pd.concat([fisher_results,pct_counts_values,pct_high_mt],axis=1).dropna()

full_results["Category"] = "Case"
full_results.loc[(full_results.TME>15),"Category"] = "Negative Control"
full_results.loc[(full_results.TME<15) & (full_results.OR<2),"Category"] = "Positive Control"
full_results.loc[(full_results.TME<15) & (full_results.OR>2) & (full_results.Pct_HighMT<0.15),"Category"] = "Positive Control"

plot_order = full_results.sort_values("TME").index
full_results = full_results.loc[plot_order]

colors = full_results["Category"].replace(color_mapping).ravel()

fig, ax = plt.subplots(1,1,figsize=(3.5,3.5))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant", bbox_to_anchor=(1,1,0,0))
ax.vlines(x=15, ymin=ax.get_ylim()[0], ymax=ax.get_ylim()[1], color="grey", linestyle="--")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/RCC_Bi.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,3.5))
sns.boxplot(data=adata.obs, y="sample", x="Dissociation stress", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("Dissociation stress")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/RCC_Bi_dissociation.png", 
            dpi=200, bbox_inches="tight")

In [None]:
adata.obs[["sample","pct_counts_mt","cell_type_higher","HighMT","Malignant","Dissociation stress"]].to_csv("/add/path/here/info-pct-counts-full/RCC_adata.csv")

# Find markers of pct counts MT in cases

In [None]:
malhighmarker, tmehighmarker, fullmarkers = get_markers_high_mt(full_results=full_results, adata=adata, 
                        sample_col="sample", malignant_cells=["TP1","TP2","Tumor"], 
                        tme_cells=["T cell","TAM","T-Helper","NK","CD8+ T cell","Myeloid","Monocyte",
             "B cell","NKT","T-Reg","Plasma cell","Macrophage","DC","Endothelial","Fibroblast","Mast cell"]) 

In [None]:
fullmarkers["Malignant"].to_csv(resdir / "full_highmt_vs_lowmt_mal_dgex.csv")

In [None]:
malhighmarker.to_csv(resdir / "mal_high_markers.csv")
tmehighmarker.to_csv(resdir / "tme_high_markers.csv")

# NasoCarcinoma Chen 10X

In [None]:
resdir = full_resdir / "Chen_NasoCarcinoma_10X"
os.makedirs(resdir, exist_ok=True)

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Chen_NasoCarcinoma_10X/filtered_adata.h5ad")

all_samples = adata.obs["sample"].unique()

adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype.isin(["Malignant"])).astype(int)

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

## Score dissociation stress

In [None]:
sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

# Remove patients with high pct MT in healthy cells

In [None]:
pct_counts_values = adata.obs.groupby(["sample","Malignant"]).median()["pct_counts_mt"].unstack()

pct_counts_values.columns = ["TME","Malignant"]

In [None]:
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["sample","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

In [None]:
adata.obs["highlevel_celltype"] = adata.obs.cleaned_celltype.apply(lambda x: "TME" if x not in ["Malignant","Epithelial"] else x)

In [None]:
pct_counts_values.sort_values("TME",ascending=False)

In [None]:
all_vc = adata.obs[["sample","HighMT","Malignant"]].value_counts().unstack()

In [None]:
all_fisher_OR, all_fisher_p = {},{}
for sample in all_vc.index.get_level_values(0).unique():
    
    if (all_vc.loc[sample].sum(axis=0)<30).sum()>0:
        print(sample, "does not have enough malignant/TME cells")
        continue
    if (all_vc.loc[sample].sum(axis=1).loc[1]<20):
        print(sample, "does not have enough HighMT cells")
        OR, p = 0, 1
    else:
        OR, p = fisher_exact(all_vc.loc[sample].loc[[0,1],[0,1]].fillna(0))
    all_fisher_OR[sample] = [OR]
    all_fisher_p[sample] = [p]

In [None]:
fisher_results = pd.concat([pd.DataFrame(all_fisher_p), pd.DataFrame(all_fisher_OR)]).T

fisher_results.columns = ["p","OR"]

fisher_results = fisher_results.sort_values("OR")

In [None]:
full_results = pd.concat([fisher_results,pct_counts_values,pct_high_mt],axis=1).dropna()

full_results["Category"] = "Case"
full_results.loc[(full_results.TME>15),"Category"] = "Negative Control"
full_results.loc[(full_results.TME<15) & (full_results.OR<2),"Category"] = "Positive Control"
full_results.loc[(full_results.TME<15) & (full_results.OR>2) & (full_results.Pct_HighMT<0.15),"Category"] = "Positive Control"

plot_order = full_results.sort_values("TME").index
full_results = full_results.loc[plot_order]

colors = full_results["Category"].replace(color_mapping).ravel()

fig, ax = plt.subplots(1,1,figsize=(3.5,3))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant", bbox_to_anchor=(1,1,0,0))
ax.vlines(x=15, ymin=ax.get_ylim()[0], ymax=ax.get_ylim()[1], color="grey", linestyle="--")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/NasoCarcinoma_Chen.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,3))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", order=plot_order,
            hue="highlevel_celltype", 
            hue_order=["TME","Epithelial","Malignant"], 
            palette={"TME": "tab:blue", "Epithelial": "tab:orange", "Malignant": "tab:red"})
pretty_ax(ax)

ax.legend(frameon=False, title="High-level cell type", bbox_to_anchor=(1,1,0,0))
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")
for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/NasoCarcinoma_Chen_wNormal-Epithelial.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,3))
sns.boxplot(data=adata.obs, y="sample", x="Dissociation stress", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("Dissociation stress")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/NasoCarcinoma_Chen_dissociation.png", 
            dpi=200, bbox_inches="tight")

In [None]:
adata.obs[["sample","pct_counts_mt","cell_type","HighMT","Malignant","Dissociation stress"]].to_csv("/add/path/here/info-pct-counts-full/NasoCarcinoma_adata.csv")

# Find markers of pct counts MT in cases

In [None]:
malhighmarker, tmehighmarker, fullmarkers = get_markers_high_mt(full_results=full_results, adata=adata, 
                        sample_col="sample", malignant_cells=["Malignant"], 
                        tme_cells=["B_cell","T_cell","NK_cell","Macrophage",
                                   "Plasma","Epithelial","Endothelial","Dendritic",
                                   "Lymphovascular","Fibroblast","Myofibroblast","Mast"]) 

In [None]:
fullmarkers["Malignant"].to_csv(resdir / "full_highmt_vs_lowmt_mal_dgex.csv")

# Breast Wu 10X

In [None]:
resdir = full_resdir / "Wu_Breast_10X"
os.makedirs(resdir, exist_ok=True)

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Breast_Wu_10X/filtered_adata.h5ad")

all_samples = adata.obs["Patient"].unique()

adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype.isin(["Cancer Epithelial"])).astype(int)

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

## Score dissociation stress

In [None]:
sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

# Remove patients with high pct MT in healthy cells

In [None]:
pct_counts_values = adata.obs.groupby(["Patient","Malignant"]).median()["pct_counts_mt"].unstack()

pct_counts_values.columns = ["TME","Malignant"]

In [None]:
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["Patient","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

In [None]:
adata.obs["highlevel_celltype"] = adata.obs.cleaned_celltype.apply(lambda x: "TME" if x not in ["Cancer Epithelial","Normal Epithelial"] else ("Malignant" if x=="Cancer Epithelial" else "Epithelial"))

In [None]:
pct_counts_values.sort_values("TME",ascending=False)

In [None]:
all_vc = adata.obs[["Patient","HighMT","Malignant"]].value_counts().unstack()

In [None]:
all_fisher_OR, all_fisher_p = {},{}
for sample in all_vc.index.get_level_values(0).unique():
    
    if (all_vc.loc[sample].sum(axis=0)<30).sum()>0:
        print(sample, "does not have enough malignant/TME cells")
        continue
    if (all_vc.loc[sample].sum(axis=1).loc[1]<20):
        print(sample, "does not have enough HighMT cells")
        OR, p = 0, 1
    else:
        OR, p = fisher_exact(all_vc.loc[sample].loc[[0,1],[0,1]].fillna(0))
    all_fisher_OR[sample] = [OR]
    all_fisher_p[sample] = [p]

In [None]:
fisher_results = pd.concat([pd.DataFrame(all_fisher_p), pd.DataFrame(all_fisher_OR)]).T

fisher_results.columns = ["p","OR"]

fisher_results = fisher_results.sort_values("OR")

In [None]:
full_results = pd.concat([fisher_results,pct_counts_values,pct_high_mt],axis=1).dropna()

full_results["Category"] = "Case"
full_results.loc[(full_results.TME>15),"Category"] = "Negative Control"
full_results.loc[(full_results.TME<15) & (full_results.OR<2),"Category"] = "Positive Control"
full_results.loc[(full_results.TME<15) & (full_results.OR>2) & (full_results.Pct_HighMT<0.15),"Category"] = "Positive Control"

plot_order = full_results.sort_values("TME").index
full_results = full_results.loc[plot_order]

colors = full_results["Category"].replace(color_mapping).ravel()

fig, ax = plt.subplots(1,1,figsize=(3.5,9))
sns.boxplot(data=adata.obs, y="Patient", x="pct_counts_mt", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant", bbox_to_anchor=(1,1,0,0))
ax.vlines(x=15, ymin=ax.get_ylim()[0], ymax=ax.get_ylim()[1], color="grey", linestyle="--")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/Breast_Wu.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,9))
sns.boxplot(data=adata.obs, y="Patient", x="pct_counts_mt", order=plot_order,
            hue="highlevel_celltype", 
            hue_order=["TME","Epithelial","Malignant"], 
            palette={"TME": "tab:blue", "Epithelial": "tab:orange", "Malignant": "tab:red"})
pretty_ax(ax)

ax.legend(frameon=False, title="High-level cell type", bbox_to_anchor=(1,1,0,0))
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")
for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/Breast_Wu_wNormal-Epithelial.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,9))
sns.boxplot(data=adata.obs, y="Patient", x="Dissociation stress", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("Dissociation stress")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/Breast_Wu_dissociation.png", 
            dpi=200, bbox_inches="tight")

In [None]:
adata.obs[["Patient","pct_counts_mt","cell_type","HighMT","Malignant","Dissociation stress"]].to_csv("/add/path/here/info-pct-counts-full/breast_adata.csv")

# Find markers of pct counts MT in cases

In [None]:
malhighmarker, tmehighmarker, fullmarkers = get_markers_high_mt(full_results=full_results, adata=adata, 
                        sample_col="Patient", malignant_cells=["Cancer Epithelial"], 
                        tme_cells=['Endothelial', 'CAFs', 'PVL', 'B-cells', 'T-cells', 'Myeloid', 'Normal Epithelial', 'Plasmablasts',]) 

In [None]:
fullmarkers["Malignant"].to_csv(resdir / "full_highmt_vs_lowmt_mal_dgex.csv")

In [None]:
malhighmarker.to_csv(resdir / "mal_high_markers.csv")
tmehighmarker.to_csv(resdir / "tme_high_markers.csv")

# LUAD Bischoff 10X

In [None]:
resdir = full_resdir / "LUAD_Bischoff_10X"
os.makedirs(resdir, exist_ok=True)

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/LUAD_Bischoff_10X/filtered_adata.h5ad")

all_samples = adata.obs["sample"].unique()

adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype.isin(["Malignant"])).astype(int)

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

## Score dissociation stress

In [None]:
sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

# Remove patients with high pct MT in healthy cells

In [None]:
pct_counts_values = adata.obs.groupby(["sample","Malignant"])["pct_counts_mt"].median().unstack()

pct_counts_values.columns = ["TME","Malignant"]

In [None]:
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["sample","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

In [None]:
adata.obs["highlevel_celltype"] = adata.obs.cleaned_celltype.apply(lambda x: "TME" if x not in ["Malignant","Epithelial"] else x)

In [None]:
pct_counts_values.sort_values("TME",ascending=False)

In [None]:
all_vc = adata.obs[["sample","HighMT","Malignant"]].value_counts().unstack()

In [None]:
all_vc

In [None]:
all_fisher_OR, all_fisher_p = {},{}
for sample in all_vc.index.get_level_values(0).unique():
    
    if (all_vc.loc[sample].sum(axis=0)<30).sum()>0:
        print(sample, "does not have enough malignant/TME cells")
        continue
    if (all_vc.loc[sample].sum(axis=1).loc[1]<20):
        print(sample, "does not have enough HighMT cells")
        OR, p = 0, 1
    else:
        OR, p = fisher_exact(all_vc.loc[sample].loc[[0,1],[0,1]].fillna(0))
    all_fisher_OR[sample] = [OR]
    all_fisher_p[sample] = [p]

In [None]:
fisher_results = pd.concat([pd.DataFrame(all_fisher_p), pd.DataFrame(all_fisher_OR)]).T

fisher_results.columns = ["p","OR"]

fisher_results = fisher_results.sort_values("OR")

In [None]:
full_results

In [None]:
full_results = pd.concat([fisher_results,pct_counts_values,pct_high_mt],axis=1).dropna()

full_results["Category"] = "Case"
full_results.loc[(full_results.TME>15),"Category"] = "Negative Control"
full_results.loc[(full_results.TME<15) & (full_results.OR<2),"Category"] = "Positive Control"
full_results.loc[(full_results.TME<15) & (full_results.OR>2) & (full_results.Pct_HighMT<0.15),"Category"] = "Positive Control"

plot_order = full_results.sort_values("TME").index
full_results = full_results.loc[plot_order]

colors = full_results["Category"].replace(color_mapping).ravel()

fig, ax = plt.subplots(1,1,figsize=(3.5,5))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant", bbox_to_anchor=(1,1,0,0))
ax.vlines(x=15, ymin=ax.get_ylim()[0], ymax=ax.get_ylim()[1], color="grey", linestyle="--")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/LUAD_Bischoff.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,5))
sns.boxplot(data=adata.obs, y="sample", x="pct_counts_mt", order=plot_order,
            hue="highlevel_celltype", 
            hue_order=["TME","Epithelial","Malignant"], 
            palette={"TME": "tab:blue", "Epithelial": "tab:orange", "Malignant": "tab:red"})
pretty_ax(ax)

ax.legend(frameon=False, title="High-level cell type", bbox_to_anchor=(1,1,0,0))
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")
for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("% MT counts")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/LUAD_Bischoff_wNormal-Epithelial.png", 
            dpi=200, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3.5,5))
sns.boxplot(data=adata.obs, y="sample", x="Dissociation stress", hue="Malignant", 
               palette = {0: "tab:blue", 1: "tab:red"}, 
               order=plot_order,
               ax=ax)
ax.legend(frameon=False, title="Malignant")
ax.set_yticks(ax.get_yticks(), ax.get_yticklabels(), rotation=45, ha="right")

for xtick, color in zip(ax.get_yticklabels(), colors):
    xtick.set_color(color)

ax.set_ylabel("Sample")
ax.set_xlabel("Dissociation stress")
pretty_ax(ax)
fig.savefig("/add/path/here/figures/case_control/LUAD_Bischoff_dissociation.png", 
            dpi=200, bbox_inches="tight")

In [None]:
adata.obs[["sample","pct_counts_mt","cell_type","HighMT","Malignant","Dissociation stress"]].to_csv("/add/path/here/info-pct-counts-full/LUADBischoff_adata.csv")

# Find markers of pct counts MT in cases

In [None]:
malhighmarker, tmehighmarker, fullmarkers = get_markers_high_mt(full_results=full_results, adata=adata, 
                        sample_col="sample", malignant_cells=["Malignant"], 
                        tme_cells=['T_cell','B_cell','NK_cell','Macrophage',
                                   'Fibroblast','Endothelial','Mast','Dendritic']) 

In [None]:
fullmarkers["Malignant"].to_csv(resdir / "full_highmt_vs_lowmt_mal_dgex.csv")

# Find markers of pct counts MT in cases

In [None]:
malhighmarker, tmehighmarker, fullmarkers = get_markers_high_mt(full_results=full_results, adata=adata, 
                        sample_col="sample", malignant_cells=["Malignant"], 
                        tme_cells=['Fibroblast','Macrophage','Endothelial','T_cell',
                                   'Myocyte','B_cell','Mast',]) 

In [None]:
fullmarkers["Malignant"].to_csv(resdir / "full_highmt_vs_lowmt_mal_dgex.csv")