In [None]:
import pandas as pd
import scipy.stats as stats
import pandas as pd
import numpy as np
import scanpy as sc

import matplotlib.pyplot as plt
import seaborn as sns

from statannotations.Annotator import Annotator

from tqdm.notebook import tqdm

from scipy.stats import fisher_exact
import pathlib as pl
import os
from typing import List, Tuple
from scipy.stats import mannwhitneyu
import re
from matplotlib.legend_handler import HandlerTuple

# Metastatic pancreas Raghavan 10X

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Raghavan_Pancreas_10X/filtered_adata.h5ad")

In [None]:
all_samples = adata.obs["sample"].unique()
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))
adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Malignant").astype(int)
adata.obs["sample"] = adata.obs["sample"].astype(str)

In [None]:
pct_counts_values = adata.obs.groupby(["sample","Malignant"]).median()["pct_counts_mt"].unstack()

pct_counts_values.columns = ["TME","Malignant"]
maladata = adata[adata.obs.Malignant==1].copy()
pct_high_mt = maladata.obs[["sample","HighMT"]].value_counts().unstack()
pct_high_mt.columns = ["LowMT","HighMT"]
pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"


Associate pct of high MT cells with clinical features

In [None]:
metpan_clin = pd.read_csv("/add/path/here/auxiliary_data/MetPan_clinical.txt", sep='\t')

In [None]:
metpan_clin.columns = ["Patient", "Age", "Stage", "Treatment", "MetTreatment", 
"Status", "SurvTime"]

features = ["Stage", "Status", "Age", "SurvTime"]
metpan_clin.index = metpan_clin.Patient
metpan_clin = metpan_clin.rename({metpan_clin.index[1]:"PANFR0473"})
metpan_clin.loc[metpan_clin.Stage == "Locally advanced ","Stage"] = "Locally advanced"

pct_high_mt.index = [re.sub("_Biopsy_None", "", x) for x in pct_high_mt.index]

In [None]:
combined = pd.concat([pct_high_mt, metpan_clin[features]], axis=1) 
combined= combined[~np.isnan(combined.Pct_HighMT)]

combined.loc[["PANFR0489R", "PANFR0489"],"Stage"] = metpan_clin.loc["PANFR0489 and PANFR0489R2"].Stage

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,2.5))
pairs = [(("Metastatic", "Metastatic"),("Localized", "Localized")),
(("Localized", "Localized"), ("Locally advanced", "Locally advanced")),
(("Metastatic", "Metastatic"), ("Locally advanced", "Locally advanced"))]
order = ["Localized", "Locally advanced", "Metastatic"]

ax = sns.boxplot(data=combined, x="Stage", y="Pct_HighMT", boxprops={'alpha': 0.4}, dodge=False, hue = "Stage",
hue_order = order, order  = order, width=0.4)
                            
sns.swarmplot(x="Stage", y="Pct_HighMT",data=combined, ax=ax, linewidth=1, dodge=False, hue = "Stage",
hue_order = order, order = order)              
              
handles, labels = ax.get_legend_handles_labels()

annotator = Annotator(ax, pairs, data=combined, x="Stage", y="Pct_HighMT", hue="Stage",
order = order, hue_order = order)

annotator.configure(test='Mann-Whitney', text_format='simple', loc='inside', text_offset=1, show_test_name = False)
annotator.apply_and_annotate()
ax.set(ylim=(0, 1))
fig.show()
fig.savefig("/add/path/here/metpan_stage.pdf", format="pdf")

# Breast Wu 10X

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Breast_Wu_10X/filtered_adata.h5ad")

In [None]:
all_samples = adata.obs["Patient"].unique()

adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Cancer Epithelial").astype(int)

pct_counts_values = adata.obs.groupby(["Patient","Malignant"]).median()["pct_counts_mt"].unstack()

pct_counts_values.columns = ["TME","Malignant"]
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["Patient","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

Associate pct of high MT cells with clinical features

In [None]:
brca_clin = pd.read_csv("/add/path/here/auxiliary_data/brca_clinical.txt", sep='\t')
brca_clin["Sample"] = ["CID"+x for x in brca_clin["Case ID"]]
brca_clin.index = brca_clin.Sample

combined = pd.concat([pct_high_mt, brca_clin], axis=1)
combined = combined[~np.isnan(combined["Pct_HighMT"])]

combined2 = combined.dropna(axis=0)

In [None]:

fig, ax = plt.subplots(1,1,figsize=(5,2.5))
pairs = [(("ER+", "ER+"), ("TNBC", "TNBC")),
(("ER+", "ER+"), ("HER2+", "HER2+")),
(("ER+", "ER+"), ("HER2+/ER+", "HER2+/ER+")),
(("HER2+/ER+", "HER2+/ER+"), ("TNBC", "TNBC")),
(("HER2+/ER+", "HER2+/ER+"), ("HER2+", "HER2+")),
(("HER2+", "HER2+"), ("TNBC", "TNBC"))
]
order = ["TNBC", "HER2+/ER+", "ER+", "HER2+"]
ax = sns.boxplot(data=combined2, x="Subtype by IHC", y="Pct_HighMT", boxprops={'alpha': 0.4}, dodge=False, 
hue = "Subtype by IHC", hue_order=order, order=order, width = 0.4)
                            
sns.swarmplot(x="Subtype by IHC", y="Pct_HighMT",data=combined2, ax=ax, linewidth=1, dodge=False, hue = "Subtype by IHC",
order = order, hue_order = order)              
              
handles, labels = ax.get_legend_handles_labels()
annotator = Annotator(ax, pairs, data=combined2, x="Subtype by IHC", y="Pct_HighMT", hue="Subtype by IHC",
order = order, hue_order=order)

annotator.configure(test='Mann-Whitney', text_format='simple', loc='inside', text_offset=1, show_test_name = False)
annotator.apply_and_annotate()
ax.set(ylim=(0, 1))
fig.show()
fig.savefig("/add/path/here/brca_subtype.pdf", format="pdf")


# SCLC Chan

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/SCLC_Chan_10X/filtered_adata.h5ad")
adata = adata[adata.obs.source!="pleural_effusion"].copy()
all_samples = adata.obs["sample"].unique()

In [None]:
adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Malignant").astype(int)

pct_counts_values = adata.obs.groupby(["sample","Malignant"]).median()["pct_counts_mt"].unstack()

pct_counts_values.columns = ["TME","Malignant"]
maladata = adata[adata.obs.Malignant==1].copy()

pct_high_mt = maladata.obs[["sample","HighMT"]].value_counts().unstack()

pct_high_mt.columns = ["LowMT","HighMT"]

pct_high_mt = pct_high_mt["HighMT"]/pct_high_mt.sum(axis=1)
pct_high_mt.name = "Pct_HighMT"

Associate pct of high MT cells with clinical features

In [None]:
sclc_clin = pd.read_csv("/add/path/here/auxiliary_data/sclc_clinical.txt", sep='\t')

features = ["Gender", "Vital Status", "Stage at Dx", "Overall Survival (months)"]

sclc_clin.index = sclc_clin["Lab ID"]
sclc_clin = sclc_clin[features]

pct_high_mt = pct_high_mt.rename({"RU426B":"RU426",
"RU1080C": "RU1080",
"RU1124A_LN":"RU1124",
"RU1181C":"RU1181_T",
"RU1229A_Frozen":"RU1229A",
"RU1322A_LN":"Ru1322A"})

combined = pd.concat([pct_high_mt, sclc_clin], axis=1)
combined = combined[~np.isnan(combined["Pct_HighMT"])]

combined2 = combined.dropna(axis=0)

#prepare stage:
combined2["Stage"] = [re.sub("A|B", "", x) for x in combined2["Stage at Dx"]]

In [None]:
pairs = [
(("I", "I"), ("II", "II")),
(("I", "I"), ("III", "III")),
(("I", "I"), ("IV", "IV")),
(("II", "II"), ("III", "III")),
(("II", "II"), ("IV", "IV")),
(("III", "III"), ("IV", "IV"))
]

In [None]:
fig, ax = plt.subplots(1,1,figsize=(5,2.5))
order = ["I", "II", "III", "IV"]
ax = sns.boxplot(data=combined2, x="Stage", y="Pct_HighMT", boxprops={'alpha': 0.4}, dodge=False, hue = "Stage",
order = order, hue_order = order, width = 0.4)
                            
sns.swarmplot(x="Stage", y="Pct_HighMT",data=combined2, ax=ax, linewidth=1, dodge=False, hue = "Stage",
order = order, hue_order = order)              
              
handles, labels = ax.get_legend_handles_labels()

annotator = Annotator(ax, pairs, data=combined2, x="Stage", y="Pct_HighMT", hue="Stage", 
order = order, hue_order = order)

annotator.configure(test='Mann-Whitney', text_format='simple', loc='inside', text_offset=1, show_test_name = False)
annotator.apply_and_annotate()
ax.set(ylim=(0, 1))

fig.show()
fig.savefig("/add/path/here/sclc_stage.pdf", format="pdf")