In [None]:
import pandas as pd
import numpy as np
import scanpy as sc

import matplotlib.pyplot as plt
import seaborn as sns

from statannotations.Annotator import Annotator

In [None]:
def pretty_ax(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=True,
        labelleft = True)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

In [None]:
core_genes_stress = pd.read_csv("/add/path/here/auxiliary_data/coregene_df-FALSE-v3.csv")

list_core_genes = core_genes_stress[core_genes_stress["logFC"]>0].gene_symbol.ravel()

red_core_genes = core_genes_stress.head(40).gene_symbol.ravel()

dissociation_genes = pd.read_csv("/add/path/here/auxiliary_data/dissociation_genes-vanDenBrink2017.csv",header=None).astype(str)
dissociation_genes = dissociation_genes[0].str.upper().ravel()

dissociation_genes_machado = pd.read_csv("/add/path/here/auxiliary_data/dissociation_Machado2021.csv",header=None).astype(str)
dissociation_genes_machado = dissociation_genes_machado[0].str.upper().ravel()

common_disso_genes = np.intersect1d(list_core_genes.astype(str),dissociation_genes.astype(str))
common_disso_genes = np.intersect1d(common_disso_genes,dissociation_genes_machado.astype(str))

In [None]:
gocc = {}
with open("/add/path/here/auxiliary_data/GO_Cellular_Component_2013.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        vals = line.split("\t")
        gocc[vals[0]] = vals[2:-1]

pathways_gocc = ["mitochondrion (GO:0005739)",
                "cytoplasm (GO:0005737)"]

go_sigs = {}
for path in pathways_gocc:
    go_sigs[path] = gocc[path]

# LUAD Bischoff 

In [None]:
adata = sc.read_h5ad("/add/path/here/LUAD_PrimaryTumor_Bischoff.h5ad")

In [None]:
filtered_adata = sc.read_h5ad("/add/path/here/filtered_data/LUAD_Bischoff_10X/filtered_adata.h5ad")

In [None]:
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=True, inplace=True
)

### Normalize w/o MT genes

In [None]:
adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()
filtered_adata = filtered_adata[:,filtered_adata.var_names[~filtered_adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

sc.pp.normalize_total(filtered_adata, target_sum=10000)
sc.pp.log1p(filtered_adata)

adata.obs["Transcriptome variance"] = adata.to_df().var(axis=1)
filtered_adata.obs["Transcriptome variance"] = filtered_adata.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
adata.obs_names_make_unique()

In [None]:
filtered_cells = np.setdiff1d(adata.obs_names.to_numpy(), ["-".join(nm) for nm in filtered_adata.obs_names.str.split("-").str[:2]])

In [None]:
filtered_cells = np.unique(filtered_cells)

In [None]:
filtered_df = adata.obs.loc[filtered_cells, ['log1p_n_genes_by_counts', 'Dissociation stress',
                    'log1p_total_counts',"pct_counts_mt",
                    "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
filtered_df = pd.concat([filtered_df,adata[filtered_cells,"MALAT1"].to_df()],axis=1)
filtered_df.index = filtered_df.index + "_pre"
filtered_df["Condition"] = "Filtered"

kept_df = filtered_adata.obs[['log1p_n_genes_by_counts', 'Dissociation stress',
                              'log1p_total_counts',"pct_counts_mt",
                              "Transcriptome variance", 
                              'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
kept_df = pd.concat([kept_df,filtered_adata[:,"MALAT1"].to_df()],axis=1)
kept_df.index = kept_df.index + "_post"
kept_df["Condition"] = "Kept"

In [None]:
df = pd.concat([filtered_df,kept_df])

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.kdeplot(data=df, x="MALAT1", hue="Condition", ax=ax, common_norm=False, hue_order=["Kept","Filtered"])
plt.legend(["Filtered","Kept"], bbox_to_anchor=(1,1,0,0),frameon=False)
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(2,3,figsize=(5,3))
flatax = ax.flatten()

pairs = [("Filtered","Kept")]

features = ["pct_counts_mt", 'log1p_total_counts', "Dissociation stress", "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']
names = ["% MT counts", 'log1p(Total counts)', "Dissociation stress", "Transcriptome\nvariance", 
                    'Mitochondrion\n(GO:0005739)', 'Cytoplasm\n(GO:0005737)']

for i in range(len(flatax)):
    sns.boxplot(data=df, x="Condition", y=features[i], ax=flatax[i])
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel("")
    flatax[i].set_title(names[i])
    
    annot = Annotator(
        flatax[i],
        pairs=pairs,
        data=df, x="Condition", y=features[i]
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
fig.tight_layout()
fig.savefig("/add/path/here/figures/pre_vs_post/luad.svg", 
            dpi=200, bbox_inches="tight")

In [None]:
highmt = filtered_adata.obs[filtered_adata.obs.pct_counts_mt>15].index.str.split("-").str[:2]
highmt = ["-".join(nm) for nm in highmt]

adata.obs["Condition"] = "Kept"
adata.obs.loc[filtered_cells,"Condition"] = "Filtered (in-house)"
adata.obs.loc[highmt,"Condition"] = "Additional %MT filtering"

fig, ax = plt.subplots(1,1,figsize=(2,1))
sns.boxplot(data=adata.obs, x="Condition", y="Dissociation stress",order=["Filtered (in-house)",
                                                                          "Additional %MT filtering",
                                                                          "Kept"])
pretty_ax(ax)
ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("Dissociation stress")
fig.savefig("/add/path/here/figures/pre_vs_post/luad_disso_stress.svg", dpi=200,
            bbox_inches="tight")

# SCLC Chan

In [None]:
adata = sc.read_h5ad("/add/path/here/SCLC_Chan_10X.h5ad")

In [None]:
filtered_adata = sc.read_h5ad("/add/path/here/SCLC_Chan_10X/filtered_adata.h5ad")

In [None]:
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=True, inplace=True
)

### Normalize w/o MT genes

In [None]:
adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()
filtered_adata = filtered_adata[:,filtered_adata.var_names[~filtered_adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

sc.pp.normalize_total(filtered_adata, target_sum=10000)
sc.pp.log1p(filtered_adata)

adata.obs["Transcriptome variance"] = adata.to_df().var(axis=1)
filtered_adata.obs["Transcriptome variance"] = filtered_adata.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
filtered_cells = np.setdiff1d(adata.obs_names.to_numpy(), filtered_adata.obs_names.str.split("-").str[0].to_numpy())

In [None]:
filtered_df = adata.obs.loc[filtered_cells, ["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                    'log1p_total_counts',"pct_counts_mt",
                    "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
filtered_df = pd.concat([filtered_df,adata[filtered_cells,"MALAT1"].to_df()],axis=1)
filtered_df.index = filtered_df.index + "_pre"
filtered_df["Condition"] = "Filtered"

kept_df = filtered_adata.obs[["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                              'log1p_total_counts',"pct_counts_mt",
                              "Transcriptome variance", 
                              'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
kept_df = pd.concat([kept_df,filtered_adata[:,"MALAT1"].to_df()],axis=1)
kept_df.index = kept_df.index + "_post"
kept_df["Condition"] = "Kept"

In [None]:
df = pd.concat([filtered_df,kept_df])

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.kdeplot(data=df, x="MALAT1", hue="Condition", ax=ax, common_norm=False, hue_order=["Kept","Filtered"])
plt.legend(["Filtered","Kept"], bbox_to_anchor=(1,1,0,0),frameon=False)
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(2,3,figsize=(5,3))
flatax = ax.flatten()

pairs = [("Filtered","Kept")]

features = ["pct_counts_mt", 'log1p_total_counts', "Dissociation stress", "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']
names = ["% MT counts", 'log1p(Total counts)', "Dissociation stress", "Transcriptome\nvariance", 
                    'Mitochondrion\n(GO:0005739)', 'Cytoplasm\n(GO:0005737)']

for i in range(len(flatax)):
    sns.boxplot(data=df, x="Condition", y=features[i], ax=flatax[i])
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel("")
    flatax[i].set_title(names[i])
    
    annot = Annotator(
        flatax[i],
        pairs=pairs,
        data=df, x="Condition", y=features[i]
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
fig.tight_layout()
fig.savefig("/add/path/here/figures/pre_vs_post/sclc.svg", 
            dpi=200, bbox_inches="tight")

In [None]:
highmt = filtered_adata.obs[filtered_adata.obs.pct_counts_mt>15].index.str.split("-").str[0]

adata.obs["Condition"] = "Kept"
adata.obs.loc[filtered_cells,"Condition"] = "Filtered (in-house)"
adata.obs.loc[adata.obs_names.intersection(highmt),"Condition"] = "Additional %MT filtering"

fig, ax = plt.subplots(1,1,figsize=(2,1))
sns.boxplot(data=adata.obs, x="Condition", y="Dissociation stress",order=["Filtered (in-house)",
                                                                          "Additional %MT filtering",
                                                                          "Kept"])
pretty_ax(ax)
ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("Dissociation stress")
fig.savefig("/add/path/here/figures/pre_vs_post/sclc_disso_stress.svg", dpi=200,
            bbox_inches="tight")

# Prostate Song

In [None]:
adata = sc.read_h5ad("/add/path/here/Prostate_Song_SegWellS3_counts.h5ad")

In [None]:
filtered_adata = sc.read_h5ad("/add/path/here/filtered_data/Song_Prostate_SeqWell/filtered_adata.h5ad")

In [None]:
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=True, inplace=True
)

### Normalize w/o MT genes

In [None]:
adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()
filtered_adata = filtered_adata[:,filtered_adata.var_names[~filtered_adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

sc.pp.normalize_total(filtered_adata, target_sum=10000)
sc.pp.log1p(filtered_adata)

adata.obs["Transcriptome variance"] = adata.to_df().var(axis=1)
filtered_adata.obs["Transcriptome variance"] = filtered_adata.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
filtered_cells = np.setdiff1d(adata.obs_names.to_numpy(), filtered_adata.obs_names.str.split("-").str[0].to_numpy())

In [None]:
filtered_df = adata.obs.loc[filtered_cells, ["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                    'log1p_total_counts',"pct_counts_mt",
                    "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
filtered_df = pd.concat([filtered_df,adata[filtered_cells,"MALAT1"].to_df()],axis=1)
filtered_df.index = filtered_df.index + "_pre"
filtered_df["Condition"] = "Filtered"

kept_df = filtered_adata.obs[["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                              'log1p_total_counts',"pct_counts_mt",
                              "Transcriptome variance", 
                              'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
kept_df = pd.concat([kept_df,filtered_adata[:,"MALAT1"].to_df()],axis=1)
kept_df.index = kept_df.index + "_post"
kept_df["Condition"] = "Kept"

In [None]:
df = pd.concat([filtered_df,kept_df])

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.kdeplot(data=df, x="MALAT1", hue="Condition", ax=ax, common_norm=False, hue_order=["Kept","Filtered"])
plt.legend(["Filtered","Kept"], bbox_to_anchor=(1,1,0,0),frameon=False)
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(2,3,figsize=(5,3))
flatax = ax.flatten()

pairs = [("Filtered","Kept")]

features = ["pct_counts_mt", 'log1p_total_counts', "Dissociation stress", "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']
names = ["% MT counts", 'log1p(Total counts)', "Dissociation stress", "Transcriptome\nvariance", 
                    'Mitochondrion\n(GO:0005739)', 'Cytoplasm\n(GO:0005737)']

for i in range(len(flatax)):
    sns.boxplot(data=df, x="Condition", y=features[i], ax=flatax[i])
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel("")
    flatax[i].set_title(names[i])
    
    annot = Annotator(
        flatax[i],
        pairs=pairs,
        data=df, x="Condition", y=features[i]
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
fig.tight_layout()
fig.savefig("/add/path/here/figures/pre_vs_post/prostate.svg", 
            dpi=200, bbox_inches="tight")

In [None]:
highmt = filtered_adata.obs[filtered_adata.obs.pct_counts_mt>15].index.str.split("-").str[0]

adata.obs["Condition"] = "Kept"
adata.obs.loc[filtered_cells,"Condition"] = "Filtered (in-house)"
adata.obs.loc[highmt,"Condition"] = "Additional %MT filtering"

fig, ax = plt.subplots(1,1,figsize=(2,1))
sns.boxplot(data=adata.obs, x="Condition", y="Dissociation stress",order=["Filtered (in-house)",
                                                                          "Additional %MT filtering",
                                                                          "Kept"])
pretty_ax(ax)
ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("Dissociation stress")
fig.savefig("/add/path/here/figures/pre_vs_post/prostate_disso_stress.svg", dpi=200,
            bbox_inches="tight")

# Pancreas Steele

In [None]:
adata = sc.read_h5ad("/add/path/here/Pancreas_Steele_10X.h5ad")

In [None]:
filtered_adata = sc.read_h5ad("/add/path/here/filtered_data/Steele_Pancreas_10X/filtered_adata.h5ad")

In [None]:
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=True, inplace=True
)

In [None]:
adata.var_names_make_unique()

### Normalize w/o MT genes

In [None]:
adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()
filtered_adata = filtered_adata[:,filtered_adata.var_names[~filtered_adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

sc.pp.normalize_total(filtered_adata, target_sum=10000)
sc.pp.log1p(filtered_adata)

adata.obs["Transcriptome variance"] = adata.to_df().var(axis=1)
filtered_adata.obs["Transcriptome variance"] = filtered_adata.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
filtered_cells = np.setdiff1d(adata.obs_names.to_numpy(), filtered_adata.obs_names.str.split("-").str[0].to_numpy()+"-1")

In [None]:
filtered_df = adata.obs.loc[filtered_cells, ["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                    'log1p_total_counts',"pct_counts_mt",
                    "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
filtered_df = pd.concat([filtered_df,adata[filtered_cells,"MALAT1"].to_df()],axis=1)
filtered_df.index = filtered_df.index + "_pre"
filtered_df["Condition"] = "Filtered"

kept_df = filtered_adata.obs[["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                              'log1p_total_counts',"pct_counts_mt",
                              "Transcriptome variance", 
                              'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
kept_df = pd.concat([kept_df,filtered_adata[:,"MALAT1"].to_df()],axis=1)
kept_df.index = kept_df.index + "_post"
kept_df["Condition"] = "Kept"

In [None]:
df = pd.concat([filtered_df,kept_df])

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.kdeplot(data=df, x="MALAT1", hue="Condition", ax=ax, common_norm=False, hue_order=["Kept","Filtered"])
plt.legend(["Filtered","Kept"], bbox_to_anchor=(1,1,0,0),frameon=False)
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(2,3,figsize=(5,3))
flatax = ax.flatten()

pairs = [("Filtered","Kept")]

features = ["pct_counts_mt", 'log1p_total_counts', "Dissociation stress", "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']
names = ["% MT counts", 'log1p(Total counts)', "Dissociation stress", "Transcriptome\nvariance", 
                    'Mitochondrion\n(GO:0005739)', 'Cytoplasm\n(GO:0005737)']

for i in range(len(flatax)):
    sns.boxplot(data=df, x="Condition", y=features[i], ax=flatax[i])
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel("")
    flatax[i].set_title(names[i])
    
    annot = Annotator(
        flatax[i],
        pairs=pairs,
        data=df, x="Condition", y=features[i]
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
fig.tight_layout()
fig.savefig("/add/path/here/figures/pre_vs_post/pancreas.svg", 
            dpi=200, bbox_inches="tight")

In [None]:
highmt = filtered_adata.obs[filtered_adata.obs.pct_counts_mt>15].index.str.split("-").str[0]+"-1"

adata.obs["Condition"] = "Kept"
adata.obs.loc[filtered_cells,"Condition"] = "Filtered (in-house)"
adata.obs.loc[highmt,"Condition"] = "Additional %MT filtering"

fig, ax = plt.subplots(1,1,figsize=(2,1))
sns.boxplot(data=adata.obs, x="Condition", y="Dissociation stress",order=["Filtered (in-house)",
                                                                          "Additional %MT filtering",
                                                                          "Kept"])
pretty_ax(ax)
ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("Dissociation stress")
fig.savefig("/add/path/here/figures/pre_vs_post/pancreas_disso_stress.svg", dpi=200,
            bbox_inches="tight")

# MetPancreas Raghavan

In [None]:
adata = sc.read_h5ad("/add/path/here/Pancreas_Raghavan_10X.h5ad")

In [None]:
filtered_adata = sc.read_h5ad("/add/path/here/filtered_data/Raghavan_Pancreas_10X/filtered_adata.h5ad")

In [None]:
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=True, inplace=True
)

### Normalize w/o MT genes

In [None]:
adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()
filtered_adata = filtered_adata[:,filtered_adata.var_names[~filtered_adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

sc.pp.normalize_total(filtered_adata, target_sum=10000)
sc.pp.log1p(filtered_adata)

adata.obs["Transcriptome variance"] = adata.to_df().var(axis=1)
filtered_adata.obs["Transcriptome variance"] = filtered_adata.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
filtered_cells = np.setdiff1d(adata.obs_names.to_numpy(), filtered_adata.obs_names.str.split("-").str[0].to_numpy())

In [None]:
filtered_df = adata.obs.loc[filtered_cells, ["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                    'log1p_total_counts',"pct_counts_mt",
                    "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
filtered_df = pd.concat([filtered_df,adata[filtered_cells,"MALAT1"].to_df()],axis=1)
filtered_df.index = filtered_df.index + "_pre"
filtered_df["Condition"] = "Filtered"

kept_df = filtered_adata.obs[["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                              'log1p_total_counts',"pct_counts_mt",
                              "Transcriptome variance", 
                              'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
kept_df = pd.concat([kept_df,filtered_adata[:,"MALAT1"].to_df()],axis=1)
kept_df.index = kept_df.index + "_post"
kept_df["Condition"] = "Kept"

In [None]:
df = pd.concat([filtered_df,kept_df])

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.kdeplot(data=df, x="MALAT1", hue="Condition", ax=ax, common_norm=False, hue_order=["Kept","Filtered"])
plt.legend(["Filtered","Kept"], bbox_to_anchor=(1,1,0,0),frameon=False)
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(2,3,figsize=(5,3))
flatax = ax.flatten()

pairs = [("Filtered","Kept")]

features = ["pct_counts_mt", 'log1p_total_counts', "Dissociation stress", "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']
names = ["% MT counts", 'log1p(Total counts)', "Dissociation stress", "Transcriptome\nvariance", 
                    'Mitochondrion\n(GO:0005739)', 'Cytoplasm\n(GO:0005737)']

for i in range(len(flatax)):
    sns.boxplot(data=df, x="Condition", y=features[i], ax=flatax[i])
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel("")
    flatax[i].set_title(names[i])
    
    annot = Annotator(
        flatax[i],
        pairs=pairs,
        data=df, x="Condition", y=features[i]
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
fig.tight_layout()
fig.savefig("/add/path/here/figures/pre_vs_post/metpancreas.svg", 
            dpi=200, bbox_inches="tight")

In [None]:
highmt = filtered_adata.obs[filtered_adata.obs.pct_counts_mt>15].index.str.split("-").str[0]

adata.obs["Condition"] = "Kept"
adata.obs.loc[filtered_cells,"Condition"] = "Filtered (in-house)"
adata.obs.loc[highmt,"Condition"] = "Additional %MT filtering"

fig, ax = plt.subplots(1,1,figsize=(2,1))
sns.boxplot(data=adata.obs, x="Condition", y="Dissociation stress",order=["Filtered (in-house)",
                                                                          "Additional %MT filtering",
                                                                          "Kept"])
pretty_ax(ax)
ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("Dissociation stress")
fig.savefig("/add/path/here/figures/pre_vs_post/metpancreas_disso_stress.svg", dpi=200,
            bbox_inches="tight")

# Breast Wu

In [None]:
adata = sc.read_h5ad("/add/path/here/Breast_Wu_10X.h5ad")

In [None]:
filtered_adata = sc.read_h5ad("/add/path/here/filtered_data/Breast_Wu_10X/filtered_adata.h5ad")

In [None]:
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=True, inplace=True
)

### Normalize w/o MT genes

In [None]:
adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()
filtered_adata = filtered_adata[:,filtered_adata.var_names[~filtered_adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

sc.pp.normalize_total(filtered_adata, target_sum=10000)
sc.pp.log1p(filtered_adata)

adata.obs["Transcriptome variance"] = adata.to_df().var(axis=1)
filtered_adata.obs["Transcriptome variance"] = filtered_adata.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
filtered_cells = np.setdiff1d(adata.obs_names.to_numpy(), filtered_adata.obs_names.str.split("-").str[0].to_numpy())

In [None]:
filtered_df = adata.obs.loc[filtered_cells, ["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                    'log1p_total_counts',"pct_counts_mt",
                    "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
filtered_df = pd.concat([filtered_df,adata[filtered_cells,"MALAT1"].to_df()],axis=1)
filtered_df.index = filtered_df.index + "_pre"
filtered_df["Condition"] = "Filtered"

kept_df = filtered_adata.obs[["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                              'log1p_total_counts',"pct_counts_mt",
                              "Transcriptome variance", 
                              'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
kept_df = pd.concat([kept_df,filtered_adata[:,"MALAT1"].to_df()],axis=1)
kept_df.index = kept_df.index + "_post"
kept_df["Condition"] = "Kept"

In [None]:
df = pd.concat([filtered_df,kept_df])

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.kdeplot(data=df, x="MALAT1", hue="Condition", ax=ax, common_norm=False, hue_order=["Kept","Filtered"])
plt.legend(["Filtered","Kept"], bbox_to_anchor=(1,1,0,0),frameon=False)
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(2,3,figsize=(5,3))
flatax = ax.flatten()

pairs = [("Filtered","Kept")]

features = ["pct_counts_mt", 'log1p_total_counts', "Dissociation stress", "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']
names = ["% MT counts", 'log1p(Total counts)', "Dissociation stress", "Transcriptome\nvariance", 
                    'Mitochondrion\n(GO:0005739)', 'Cytoplasm\n(GO:0005737)']

for i in range(len(flatax)):
    sns.boxplot(data=df, x="Condition", y=features[i], ax=flatax[i])
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel("")
    flatax[i].set_title(names[i])
    
    annot = Annotator(
        flatax[i],
        pairs=pairs,
        data=df, x="Condition", y=features[i]
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
fig.tight_layout()
fig.savefig("/add/path/here/figures/pre_vs_post/breast.svg", 
            dpi=200, bbox_inches="tight")

In [None]:
highmt = filtered_adata.obs[filtered_adata.obs.pct_counts_mt>15].index.str.split("-").str[0]

adata.obs["Condition"] = "Kept"
adata.obs.loc[filtered_cells,"Condition"] = "Filtered (in-house)"
adata.obs.loc[highmt,"Condition"] = "Additional %MT filtering"

fig, ax = plt.subplots(1,1,figsize=(2,1))
sns.boxplot(data=adata.obs, x="Condition", y="Dissociation stress",order=["Filtered (in-house)",
                                                                          "Additional %MT filtering",
                                                                          "Kept"])
pretty_ax(ax)
ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("Dissociation stress")
fig.savefig("/add/path/here/figures/pre_vs_post/breast_disso_stress.svg", dpi=200,
            bbox_inches="tight")

# RCC Bi

In [None]:
adata = sc.read_h5ad("/add/path/here/RCC_Bi_10X.h5ad")

In [None]:
filtered_adata = sc.read_h5ad("/add/path/here/filtered_data/Bi_RCC_10X/filtered_adata.h5ad")

In [None]:
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=True, inplace=True
)

### Normalize w/o MT genes

In [None]:
adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()
filtered_adata = filtered_adata[:,filtered_adata.var_names[~filtered_adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

sc.pp.normalize_total(filtered_adata, target_sum=10000)
sc.pp.log1p(filtered_adata)

adata.obs["Transcriptome variance"] = adata.to_df().var(axis=1)
filtered_adata.obs["Transcriptome variance"] = filtered_adata.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
filtered_cells = np.setdiff1d(adata.obs_names.to_numpy(), filtered_adata.obs_names.str.split("-").str[0].to_numpy())

In [None]:
filtered_df = adata.obs.loc[filtered_cells, ['log1p_n_genes_by_counts', 'Dissociation stress',
                    'log1p_total_counts',"pct_counts_mt",
                    "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
filtered_df = pd.concat([filtered_df,adata[filtered_cells,"MALAT1"].to_df()],axis=1)
filtered_df.index = filtered_df.index + "_pre"
filtered_df["Condition"] = "Filtered"

kept_df = filtered_adata.obs[['log1p_n_genes_by_counts', 'Dissociation stress',
                              'log1p_total_counts',"pct_counts_mt",
                              "Transcriptome variance", 
                              'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
kept_df = pd.concat([kept_df,filtered_adata[:,"MALAT1"].to_df()],axis=1)
kept_df.index = kept_df.index + "_post"
kept_df["Condition"] = "Kept"

In [None]:
df = pd.concat([filtered_df,kept_df])

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.kdeplot(data=df, x="MALAT1", hue="Condition", ax=ax, common_norm=False, hue_order=["Kept","Filtered"])
plt.legend(["Filtered","Kept"], bbox_to_anchor=(1,1,0,0),frameon=False)
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(2,3,figsize=(5,3))
flatax = ax.flatten()

pairs = [("Filtered","Kept")]

features = ["pct_counts_mt", 'log1p_total_counts', "Dissociation stress", "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']
names = ["% MT counts", 'log1p(Total counts)', "Dissociation stress", "Transcriptome\nvariance", 
                    'Mitochondrion\n(GO:0005739)', 'Cytoplasm\n(GO:0005737)']

for i in range(len(flatax)):
    sns.boxplot(data=df, x="Condition", y=features[i], ax=flatax[i])
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel("")
    flatax[i].set_title(names[i])
    
    annot = Annotator(
        flatax[i],
        pairs=pairs,
        data=df, x="Condition", y=features[i]
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
fig.tight_layout()
fig.savefig("/add/path/here/figures/pre_vs_post/rcc.svg", 
            dpi=200, bbox_inches="tight")

In [None]:
highmt = filtered_adata.obs[filtered_adata.obs.pct_counts_mt>15].index.str.split("-").str[0]

adata.obs["Condition"] = "Kept"
adata.obs.loc[filtered_cells,"Condition"] = "Filtered (in-house)"
adata.obs.loc[highmt,"Condition"] = "Additional %MT filtering"

fig, ax = plt.subplots(1,1,figsize=(2,1))
sns.boxplot(data=adata.obs, x="Condition", y="Dissociation stress",order=["Filtered (in-house)",
                                                                          "Additional %MT filtering",
                                                                          "Kept"])
pretty_ax(ax)
ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("Dissociation stress")
fig.savefig("/add/path/here/figures/pre_vs_post/rcc_disso_stress.svg", dpi=200,
            bbox_inches="tight")

# NasoCarcinoma Chen

In [None]:
adata = sc.read_h5ad("/add/path/here/NasoCarcinoma_Chen_10X.h5ad")

In [None]:
filtered_adata = sc.read_h5ad("/add/path/here/filtered_data/Chen_NasoCarcinoma_10X/filtered_adata.h5ad")

In [None]:
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=True, inplace=True
)

In [None]:
adata.var_names_make_unique()

### Normalize w/o MT genes

In [None]:
adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()
filtered_adata = filtered_adata[:,filtered_adata.var_names[~filtered_adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

sc.pp.normalize_total(filtered_adata, target_sum=10000)
sc.pp.log1p(filtered_adata)

adata.obs["Transcriptome variance"] = adata.to_df().var(axis=1)
filtered_adata.obs["Transcriptome variance"] = filtered_adata.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
filtered_cells = np.setdiff1d(adata.obs_names.to_numpy(), filtered_adata.obs_names.str.split("-").str[0].to_numpy()+"-1")

In [None]:
filtered_df = adata.obs.loc[filtered_cells, ["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                    'log1p_total_counts',"pct_counts_mt",
                    "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
filtered_df = pd.concat([filtered_df,adata[filtered_cells,"MALAT1"].to_df()],axis=1)
filtered_df.index = filtered_df.index + "_pre"
filtered_df["Condition"] = "Filtered"

kept_df = filtered_adata.obs[["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                              'log1p_total_counts',"pct_counts_mt",
                              "Transcriptome variance", 
                              'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
kept_df = pd.concat([kept_df,filtered_adata[:,"MALAT1"].to_df()],axis=1)
kept_df.index = kept_df.index + "_post"
kept_df["Condition"] = "Kept"

In [None]:
df = pd.concat([filtered_df,kept_df])

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.kdeplot(data=df, x="MALAT1", hue="Condition", ax=ax, common_norm=False, hue_order=["Kept","Filtered"])
plt.legend(["Filtered","Kept"], bbox_to_anchor=(1,1,0,0),frameon=False)
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(2,3,figsize=(5,3))
flatax = ax.flatten()

pairs = [("Filtered","Kept")]

features = ["pct_counts_mt", 'log1p_total_counts', "Dissociation stress", "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']
names = ["% MT counts", 'log1p(Total counts)', "Dissociation stress", "Transcriptome\nvariance", 
                    'Mitochondrion\n(GO:0005739)', 'Cytoplasm\n(GO:0005737)']

for i in range(len(flatax)):
    sns.boxplot(data=df, x="Condition", y=features[i], ax=flatax[i])
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel("")
    flatax[i].set_title(names[i])
    
    annot = Annotator(
        flatax[i],
        pairs=pairs,
        data=df, x="Condition", y=features[i]
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
fig.tight_layout()
fig.savefig("/add/path/here/figures/pre_vs_post/nasocarcinoma.svg", 
            dpi=200, bbox_inches="tight")

In [None]:
highmt = filtered_adata.obs[filtered_adata.obs.pct_counts_mt>15].index.str.split("-").str[0]+"-1"

adata.obs["Condition"] = "Kept"
adata.obs.loc[filtered_cells,"Condition"] = "Filtered (in-house)"
adata.obs.loc[highmt,"Condition"] = "Additional %MT filtering"

fig, ax = plt.subplots(1,1,figsize=(2,1))
sns.boxplot(data=adata.obs, x="Condition", y="Dissociation stress",order=["Filtered (in-house)",
                                                                          "Additional %MT filtering",
                                                                          "Kept"])
pretty_ax(ax)
ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("Dissociation stress")
fig.savefig("/add/path/here/figures/pre_vs_post/nasocarcinoma_disso_stress.svg", dpi=200,
            bbox_inches="tight")

# Uveal Melanoma

In [None]:
adata = sc.read_h5ad("/Users/josephineyates/Downloads/UvealMelanoma_Durante.h5ad")

#adata = sc.read_h5ad("/add/path/here/NasoCarcinoma_Chen_10X.h5ad")

In [None]:
filtered_adata = sc.read_h5ad("/add/path/here/filtered_data/UvealMelanoma_Durante_10X/filtered_adata.h5ad")

In [None]:
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(
    adata, qc_vars=["mt"], percent_top=None, log1p=True, inplace=True
)

In [None]:
adata.var_names_make_unique()

### Normalize w/o MT genes

In [None]:
adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()
filtered_adata = filtered_adata[:,filtered_adata.var_names[~filtered_adata.var_names.str.startswith("MT-")]].copy()

sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)

sc.pp.normalize_total(filtered_adata, target_sum=10000)
sc.pp.log1p(filtered_adata)

adata.obs["Transcriptome variance"] = adata.to_df().var(axis=1)
filtered_adata.obs["Transcriptome variance"] = filtered_adata.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=go_sigs[sig], score_name=sig.capitalize())

for sig in go_sigs:
    sc.tl.score_genes(adata, gene_list=common_disso_genes, score_name="Dissociation stress")

for sig in go_sigs:
    sc.tl.score_genes(filtered_adata, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
filtered_cells = np.setdiff1d(adata.obs_names.to_numpy(), ["-".join(nm) for nm in filtered_adata.obs_names.str.split("-").str[:3]])

In [None]:
filtered_df = adata.obs.loc[filtered_cells, ["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                    'log1p_total_counts',"pct_counts_mt",
                    "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
filtered_df = pd.concat([filtered_df,adata[filtered_cells,"MALAT1"].to_df()],axis=1)
filtered_df.index = filtered_df.index + "_pre"
filtered_df["Condition"] = "Filtered"

kept_df = filtered_adata.obs[["cell_type",'log1p_n_genes_by_counts', 'Dissociation stress',
                              'log1p_total_counts',"pct_counts_mt",
                              "Transcriptome variance", 
                              'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']]
kept_df = pd.concat([kept_df,filtered_adata[:,"MALAT1"].to_df()],axis=1)
kept_df.index = kept_df.index + "_post"
kept_df["Condition"] = "Kept"

In [None]:
df = pd.concat([filtered_df,kept_df])

In [None]:
fig, ax = plt.subplots(1,1,figsize=(3,2))
sns.kdeplot(data=df, x="MALAT1", hue="Condition", ax=ax, common_norm=False, hue_order=["Kept","Filtered"])
plt.legend(["Filtered","Kept"], bbox_to_anchor=(1,1,0,0),frameon=False)
pretty_ax(ax)

In [None]:
fig, ax = plt.subplots(2,3,figsize=(5,3))
flatax = ax.flatten()

pairs = [("Filtered","Kept")]

features = ["pct_counts_mt", 'log1p_total_counts', "Dissociation stress", "Transcriptome variance", 
                    'Mitochondrion (go:0005739)', 'Cytoplasm (go:0005737)']
names = ["% MT counts", 'log1p(Total counts)', "Dissociation stress", "Transcriptome\nvariance", 
                    'Mitochondrion\n(GO:0005739)', 'Cytoplasm\n(GO:0005737)']

for i in range(len(flatax)):
    sns.boxplot(data=df, x="Condition", y=features[i], ax=flatax[i])
    pretty_ax(flatax[i])
    flatax[i].set_xlabel("")
    flatax[i].set_ylabel("")
    flatax[i].set_title(names[i])
    
    annot = Annotator(
        flatax[i],
        pairs=pairs,
        data=df, x="Condition", y=features[i]
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
fig.tight_layout()
fig.savefig("/add/path/here/figures/pre_vs_post/uvealmelanoma.svg", 
            dpi=200, bbox_inches="tight")

In [None]:
highmt = filtered_adata.obs[filtered_adata.obs.pct_counts_mt>15].index.str.split("-").str[:3]
highmt = ["-".join(nm) for nm in highmt]

adata.obs["Condition"] = "Kept"
adata.obs.loc[filtered_cells,"Condition"] = "Filtered (in-house)"
adata.obs.loc[highmt,"Condition"] = "Additional %MT filtering"

fig, ax = plt.subplots(1,1,figsize=(2,1))
sns.boxplot(data=adata.obs, x="Condition", y="Dissociation stress",order=["Filtered (in-house)",
                                                                          "Additional %MT filtering",
                                                                          "Kept"])
pretty_ax(ax)
ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
ax.set_xlabel("")
ax.set_ylabel("")
ax.set_title("Dissociation stress")
fig.savefig("/add/path/here/figures/pre_vs_post/uvealmelanoma_disso_stress.svg", dpi=200,
            bbox_inches="tight")