In [None]:
import pandas as pd
import numpy as np
import scanpy as sc

import matplotlib.pyplot as plt
import seaborn as sns

from statannotations.Annotator import Annotator

from tqdm.notebook import tqdm

from scipy.stats import fisher_exact, mannwhitneyu

from statsmodels.stats.multitest import multipletests

import pathlib as pl
import os

import metacells as mc

In [None]:
def pretty_ax(ax):
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(
        axis='both',  
        which='both',      
        bottom=True,     
        top=False,
        left=False,
        labelbottom=True,
        labelleft = True)
    ax.spines["bottom"].set_linewidth(1.5)
    ax.spines["left"].set_linewidth(1.5)

In [None]:
def plot_boxplot(adata, y, ax=None, name=None, x="Malignant", savename=None):
    pairs = [((0,0),(0,1)),((1,0),(1,1)),((0,0),(1,1)),((0,1),(1,0)),((0,1),(1,1))]

    if ax is None:
        fig, ax = plt.subplots(1,1,figsize=(3,2))
    sns.boxplot(data=adata.obs, x=x, y=y, 
                        hue="HighMT", ax=ax)
    pretty_ax(ax)
    ax.legend(frameon=False, title="HighMT",bbox_to_anchor=(1,1,0,0))
    if name is not None:
        ax.set_ylabel("")
        ax.set_title(name)

    annot = Annotator(
        ax,
        pairs=pairs,
        data=adata.obs, x=x, y=y, hue="HighMT"
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
    
    if savename is not None:
        fig.savefig(savename, dpi=200, bbox_inches="tight")

In [None]:
def create_pairs(celltypes) -> np.ndarray:
    pairs = []
    for ct in celltypes:
        pairs.append(((ct, 0),(ct, 1)))
    return pairs

def plot_boxplot_celltype(adata, y, celltypes,
                          ax=None, name=None, x="Malignant", savename=None):
    pairs = create_pairs(celltypes)

    if ax is None:
        fig, ax = plt.subplots(1,1,figsize=(3,2))
    sns.boxplot(data=adata.obs, x=x, y=y, 
                        hue="HighMT", order=celltypes,
                ax=ax)
    pretty_ax(ax)
    ax.legend(frameon=False, title="HighMT",bbox_to_anchor=(1,1,0,0))
    ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha="right")
    
    ax.set_xlabel("")
    
    if name is not None:
        ax.set_ylabel("")
        ax.set_title(name)

    annot = Annotator(
        ax,
        pairs=pairs,
        data=adata.obs, x=x, y=y, 
        hue="HighMT", order=celltypes,
    )
    annot.configure(
        test="Mann-Whitney",
        loc="inside",
        text_format="star",
        show_test_name=False,
        verbose=2,
        comparisons_correction=None,
        fontsize=10,
    )
    annot.apply_test()
    _, test_results = annot.annotate()
    
    if savename is not None:
        fig.savefig(savename, dpi=200, bbox_inches="tight")

# Define metacells parameters

In [None]:
LATERAL_GENE_NAMES = [
    "ACSM3", "ANP32B", "APOE", "AURKA", "B2M", "BIRC5", "BTG2", "CALM1", "CD63", "CD69", "CDK4",
    "CENPF", "CENPU", "CENPW", "CH17-373J23.1", "CKS1B", "CKS2", "COX4I1", "CXCR4", "DNAJB1",
    "DONSON", "DUSP1", "DUT", "EEF1A1", "EEF1B2", "EIF3E", "EMP3", "FKBP4", "FOS", "FOSB", "FTH1",
    "G0S2", "GGH", "GLTSCR2", "GMNN", "GNB2L1", "GPR183", "H2AFZ", "H3F3B", "HBM", "HIST1H1C",
    "HIST1H2AC", "HIST1H2BG", "HIST1H4C", "HLA-A", "HLA-B", "HLA-C", "HLA-DMA", "HLA-DMB",
    "HLA-DPA1", "HLA-DPB1", "HLA-DQA1", "HLA-DQB1", "HLA-DRA", "HLA-DRB1", "HLA-E", "HLA-F", "HMGA1",
    "HMGB1", "HMGB2", "HMGB3", "HMGN2", "HNRNPAB", "HSP90AA1", "HSP90AB1", "HSPA1A", "HSPA1B",
    "HSPA6", "HSPD1", "HSPE1", "HSPH1", "ID2", "IER2", "IGHA1", "IGHA2", "IGHD", "IGHG1", "IGHG2",
    "IGHG3", "IGHG4", "IGHM", "IGKC", "IGKV1-12", "IGKV1-39", "IGKV1-5", "IGKV3-15", "IGKV4-1",
    "IGLC2", "IGLC3", "IGLC6", "IGLC7", "IGLL1", "IGLL5", "IGLV2-34", "JUN", "JUNB", "KIAA0101",
    "LEPROTL1", "LGALS1", "LINC01206", "LTB", "MCM3", "MCM4", "MCM7", "MKI67", "MT2A", "MYL12A",
    "MYL6", "NASP", "NFKBIA", "NUSAP1", "PA2G4", "PCNA", "PDLIM1", "PLK3", "PPP1R15A", "PTMA",
    "PTTG1", "RAN", "RANBP1", "RGCC", "RGS1", "RGS2", "RGS3", "RP11-1143G9.4", "RP11-160E2.6",
    "RP11-53B5.1", "RP11-620J15.3", "RP5-1025A1.3", "RP5-1171I10.5", "RPS10", "RPS10-NUDT3", "RPS11",
    "RPS12", "RPS13", "RPS14", "RPS15", "RPS15A", "RPS16", "RPS17", "RPS18", "RPS19", "RPS19BP1",
    "RPS2", "RPS20", "RPS21", "RPS23", "RPS24", "RPS25", "RPS26", "RPS27", "RPS27A", "RPS27L",
    "RPS28", "RPS29", "RPS3", "RPS3A", "RPS4X", "RPS4Y1", "RPS4Y2", "RPS5", "RPS6", "RPS6KA1",
    "RPS6KA2", "RPS6KA2-AS1", "RPS6KA3", "RPS6KA4", "RPS6KA5", "RPS6KA6", "RPS6KB1", "RPS6KB2",
    "RPS6KC1", "RPS6KL1", "RPS7", "RPS8", "RPS9", "RPSA", "RRM2", "SMC4", "SRGN", "SRSF7", "STMN1",
    "TK1", "TMSB4X", "TOP2A", "TPX2", "TSC22D3", "TUBA1A", "TUBA1B", "TUBB", "TUBB4B", "TXN", "TYMS",
    "UBA52", "UBC", "UBE2C", "UHRF1", "YBX1", "YPEL5", "ZFP36", "ZWINT"
]
LATERAL_GENE_PATTERNS = ["RP[LS].*"]  # Ribosomal


In [None]:
NOISY_GENE_NAMES = [
    "CCL3", "CCL4", "CCL5", "CXCL8", "DUSP1", "FOS", "G0S2", "HBB", "HIST1H4C", "IER2", "IGKC",
    "IGLC2", "JUN", "JUNB", "KLRB1", "MT2A", "RPS26", "RPS4Y1", "TRBC1", "TUBA1B", "TUBB"
]

# Download stress dissociation signatures

In [None]:
core_genes_stress = pd.read_csv("/add/path/here/auxiliary_data/coregene_df-FALSE-v3.csv")

list_core_genes = core_genes_stress[core_genes_stress["logFC"]>0].gene_symbol.ravel()

red_core_genes = core_genes_stress.head(40).gene_symbol.ravel()

dissociation_genes = pd.read_csv("/add/path/here/auxiliary_data/dissociation_genes-vanDenBrink2017.csv",header=None).astype(str)
dissociation_genes = dissociation_genes[0].str.upper().ravel()

dissociation_genes_machado = pd.read_csv("/add/path/here/auxiliary_data/dissociation_Machado2021.csv",header=None).astype(str)
dissociation_genes_machado = dissociation_genes_machado[0].str.upper().ravel()

dissociation_prostate_specific = ["JUN","FOS","EGR1","ATF3","JUNB","GADD45B","IER2","ZFP36",
"DNAJB1","RHOB","NR4A1","UBC","HES1"]

common_disso_genes = np.intersect1d(list_core_genes.astype(str),dissociation_genes.astype(str))
common_disso_genes = np.intersect1d(common_disso_genes,dissociation_genes_machado.astype(str))

# Download MitoCarta genes

In [None]:
mitocarta = pd.read_csv("/add/path/here/auxiliary_data/mitocarta_genes.csv").values.ravel()
mitocarta = np.setdiff1d(mitocarta,['MT-ATP6', 'MT-CO2', 'MT-CO1', 'MT-ND2', 'MT-ND4', 'MT-ND5',
       'MT-CYB', 'MT-ATP8', 'MT-CO3', 'MT-ND3', 'MT-ND1', 'MT-ND4L',
       'MT-ND6'])

mitocarta_pathways = pd.read_csv("/add/path/here/auxiliary_data/mitocarta_pathways.csv",index_col=0)

high_order = []

for name in mitocarta_pathways["MitoPathways Hierarchy"].str.split(">").values:
    if len(name)==1:
        high_order.append(name[0])

high_order = mitocarta_pathways[mitocarta_pathways["MitoPathways Hierarchy"].isin(high_order)]["Genes"].str.split(", ").to_dict()

for k, v in high_order.items():
    high_order[k] = np.setdiff1d(v, ['MT-ATP6', 'MT-CO2', 'MT-CO1', 'MT-ND2', 'MT-ND4', 'MT-ND5',
       'MT-CYB', 'MT-ATP8', 'MT-CO3', 'MT-ND3', 'MT-ND1', 'MT-ND4L',
       'MT-ND6'])

# GOBP

In [None]:
gocc = {}
with open("/Users/josephineyates/Downloads/GO_Cellular_Component_2013.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        vals = line.split("\t")
        gocc[vals[0]] = vals[2:-1]


pathways_gocc = ["mitochondrion (GO:0005739)",
                "cytoplasm (GO:0005737)"]

go_sigs = {}
for path in pathways_gocc:
    go_sigs[path] = gocc[path]

# Signature of mitochondrial transfer

Taken from "Systematic investigation of mitochondrial transfer between cancer cells and T cells at single-cell resolution", Zhang et al., 2023, https://www.sciencedirect.com/science/article/pii/S1535610823003197?via%3Dihub

In [None]:
mito_transfer = ["ABI1","ARF6","F11R","BAIAP2L1","MYADM","ACTR3","ARHGEF5","SRC","CAPZA2","ARHGEF5","PSEN1",
                 "GOLPH3","RAB10","ARF4","CAP1","YWHAG", "YWHAZ", "HSPA4"]

# Create metacells

In [None]:
def get_metacells(adata: sc.AnnData, celltype_col: str = "cleaned_celltype", 
                 sample_col: str = "sample") -> sc.AnnData:
    

    mc.ut.set_name(adata, "hca_bm.one-pass.preliminary.cells")

    mc.pl.mark_lateral_genes(
        adata,
        lateral_gene_names=LATERAL_GENE_NAMES,
        lateral_gene_patterns=LATERAL_GENE_PATTERNS,
    )

    lateral_gene_mask = mc.ut.get_v_numpy(adata, "lateral_gene")
    lateral_gene_names = set(adata.var_names[lateral_gene_mask])

    # This will mark as "noisy_gene" any genes that match the above, if they exist in the clean dataset.
    mc.pl.mark_noisy_genes(adata, noisy_gene_names=NOISY_GENE_NAMES)

    # Either use the guesstimator:
    max_parallel_piles = mc.pl.guess_max_parallel_piles(adata)
    mc.pl.set_max_parallel_piles(max_parallel_piles)

    with mc.ut.progress_bar():
        mc.pl.divide_and_conquer_pipeline(adata, random_seed=123456)

    metacells = \
        mc.pl.collect_metacells(adata, name="hca_bm.one-pass.preliminary.metacells", random_seed=123456)
    print(f"Preliminary: {metacells.n_obs} metacells, {metacells.n_vars} genes")
    
    mc.tl.convey_obs_fractions_to_group(adata=adata, gdata=metacells, property_name=celltype_col)
    mc.tl.convey_obs_fractions_to_group(adata=adata, gdata=metacells, property_name=sample_col)
    mc.tl.convey_obs_fractions_to_group(adata=adata, gdata=metacells, property_name="HighMT")
    mc.tl.convey_obs_fractions_to_group(adata=adata, gdata=metacells, property_name="Malignant")
    
    mc.tl.convey_obs_to_group(
    adata=adata, gdata=metacells,
    property_name="cleaned_celltype", to_property_name="cleaned_celltype",
    method=mc.ut.most_frequent  # This is the default, for categorical data
    )

    mc.tl.convey_obs_to_group(
        adata=adata, gdata=metacells,
        property_name="Malignant", to_property_name="Malignant",
        method=mc.ut.most_frequent  # This is the default, for categorical data
    )

    mc.tl.convey_obs_to_group(
        adata=adata, gdata=metacells,
        property_name="HighMT", to_property_name="HighMT",
        method=mc.ut.most_frequent  # This is the default, for categorical data
    )
    return metacells, adata

# SCLC Chan

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/SCLC_Chan_10X/filtered_adata.h5ad")

In [None]:
adata = adata[adata.obs.source!="pleural_effusion"].copy()

In [None]:
all_samples = adata.obs["sample"].unique()

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

In [None]:
adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Malignant").astype(int)

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

# Create metacells

In [None]:
metacells, adata = get_metacells(adata, celltype_col="cleaned_celltype", sample_col="sample")

metacells.write("/add/path/here/metacell_data/SCLC_Chan_10X/metacells.h5ad")

adata.obs.to_csv("/add/path/here/metacell_data/SCLC_Chan_10X/adata_obs.csv")

# Analyze metacells

In [None]:
metacells = sc.read_h5ad("/add/path/here/metacell_data/SCLC_Chan_10X/metacells.h5ad")

In [None]:
metacells.obs["HighMT"] = (metacells.obs["HighMT_fraction_of_1"]>0.3).astype(int)

In [None]:
metacells.X = metacells.layers["total_umis"]

sc.pp.normalize_total(metacells, target_sum=10000)
sc.pp.log1p(metacells)

In [None]:
egfr_df = pd.concat([metacells[:,metacells.var_names[metacells.var_names.str.startswith(("ERB","EGFR"))]].to_df(),
                     metacells.obs[["Malignant","HighMT"]]],axis=1)

egfr_df.to_csv("/add/path/here/egfr_expression_metacells/SCLC.csv")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(11,2))
flatax = ax.flatten()

sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("CYP")], 
                  score_name="CYP genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith(("UGT","B3GAT"))],
                  score_name="UGT genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("GST")], 
                  score_name="GST genes")
sc.tl.score_genes(metacells, gene_list=kegg['ABC transporters'], 
                  score_name="ABC transporters")


plot_boxplot(metacells, "CYP genes", name="CYP genes (phase I)",ax=flatax[0])
plot_boxplot(metacells, "UGT genes", name="UGT genes (phase II)",ax=flatax[1])
plot_boxplot(metacells, "GST genes", name="GST genes (phase II)",ax=flatax[2])
plot_boxplot(metacells, "ABC transporters", name="ABC transporters (phase III)",ax=flatax[3])
for i in range(len(flatax)):
    pretty_ax(flatax[i])
    
fig.tight_layout()

In [None]:
sc.tl.pca(metacells)
sc.pp.neighbors(metacells)
sc.tl.umap(metacells)

In [None]:
sc.pl.umap(metacells, color=["HighMT","Malignant","cleaned_celltype"])

In [None]:
sc.tl.score_genes(metacells, gene_list=common_disso_genes, score_name="Dissociation stress") 

In [None]:
stress_sigs = ["O-Flanagan_dissociation_stress","O-Flanagan_dissociation_red",
               "Van-den-Brink_dissociation_stress","Machado_dissociation_stress"]
sc.tl.score_genes(metacells, gene_list=list_core_genes, score_name="O-Flanagan_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=red_core_genes, score_name="O-Flanagan_dissociation_red")
sc.tl.score_genes(metacells, gene_list=dissociation_genes, score_name="Van-den-Brink_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=dissociation_genes_machado, score_name="Machado_dissociation_stress")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(12,2))
flatax = ax.flatten()
for i,pathway in enumerate(stress_sigs):
    plot_boxplot(metacells, pathway, ax=flatax[i], name=pathway)
fig.tight_layout()

In [None]:
metacells.obs["Transcriptome variance"] = metacells.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(metacells, gene_list=go_sigs[sig], score_name=sig)

fig, ax = plt.subplots(1,3, figsize=(9,2))
flatax = ax.flatten()
for i,sig in enumerate(go_sigs):
    plot_boxplot(metacells, 
                 sig, name=sig.split(" (GO")[0], ax = flatax[i])
plot_boxplot(metacells, 
                 "Transcriptome variance", name="Transcriptome variance", ax = flatax[-1])
fig.tight_layout()

In [None]:
from scipy.stats import mannwhitneyu
from tqdm.notebook import tqdm

diffs = {}
for name in tqdm(mitocarta_pathways.index):
    sig_genes = mitocarta_pathways.loc[name].Genes.split(", ")
    if len(metacells.var_names.intersection(sig_genes))==0:
        continue
    sc.tl.score_genes(metacells, gene_list=sig_genes, score_name="MTCARTA_sig")

    malobs = metacells.obs[metacells.obs["Malignant"]==1]
    val1 = malobs[malobs.HighMT==0]["MTCARTA_sig"]
    val2 = malobs[malobs.HighMT==1]["MTCARTA_sig"]
    _, p = mannwhitneyu(val1.values,val2.values)
    median_high = val2.median()
    median_low = val1.median()
    diffs[name] = [median_low,median_high,p]

diffs = pd.DataFrame(diffs,index=["Median LowMT","Median HighMT","MWU p"]).T.sort_values("MWU p")

diffs["Diff. median"] = diffs["Median HighMT"] - diffs["Median LowMT"]

diffs.sort_values("Diff. median").to_csv("/add/path/here/metabolic_dysregulation/mitocarta_met_res/SCLC_Chan_10X.csv")

In [None]:
metacells.obs[['cleaned_celltype', 'Malignant', 'HighMT',
       'CYP genes', 'UGT genes', 'GST genes', 'ABC transporters',
       'O-Flanagan_dissociation_stress', 'O-Flanagan_dissociation_red',
       'Van-den-Brink_dissociation_stress', 'Machado_dissociation_stress',
       'Dissociation stress', 'Mito transfer']].to_csv("/add/path/here/info-metacells/sclc.csv")

# Pancreas Steele 10X

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Steele_Pancreas_10X/filtered_adata.h5ad")

In [None]:
all_samples = adata.obs["sample"].unique()

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

In [None]:
adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Malignant").astype(int)

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

# Create metacells

In [None]:
metacells, adata = get_metacells(adata, celltype_col="cleaned_celltype", sample_col="sample")

metacells.write("/add/path/here/metacell_data/Pancreas_Steele_10X/metacells.h5ad")

adata.obs.to_csv("/add/path/here/metacell_data/Pancreas_Steele_10X/adata_obs.csv")

# Analyze metacells

In [None]:
metacells = sc.read_h5ad("/add/path/here/metacell_data/Pancreas_Steele_10X/metacells.h5ad")

In [None]:
metacells.obs["HighMT"] = (metacells.obs["HighMT_fraction_of_1"]>0.3).astype(int)

In [None]:
metacells.X = metacells.layers["total_umis"]

sc.pp.normalize_total(metacells, target_sum=10000)
sc.pp.log1p(metacells)

In [None]:
egfr_df = pd.concat([metacells[:,metacells.var_names[metacells.var_names.str.startswith(("ERB","EGFR"))]].to_df(),
                     metacells.obs[["Malignant","HighMT"]]],axis=1)

egfr_df.to_csv("/add/path/here/egfr_expression_metacells/Pancreas.csv")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(11,2))
flatax = ax.flatten()

sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("CYP")], 
                  score_name="CYP genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith(("UGT","B3GAT"))], 
                  score_name="UGT genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("GST")], 
                  score_name="GST genes")
sc.tl.score_genes(metacells, gene_list=kegg['ABC transporters'], 
                  score_name="ABC transporters")


plot_boxplot(metacells, "CYP genes", name="CYP genes (phase I)",ax=flatax[0])
plot_boxplot(metacells, "UGT genes", name="UGT genes (phase II)",ax=flatax[1])
plot_boxplot(metacells, "GST genes", name="GST genes (phase II)",ax=flatax[2])
plot_boxplot(metacells, "ABC transporters", name="ABC transporters (phase III)",ax=flatax[3])
for i in range(len(flatax)):
    pretty_ax(flatax[i])
    
fig.tight_layout()

In [None]:
sc.tl.pca(metacells)
sc.pp.neighbors(metacells)
sc.tl.umap(metacells)

In [None]:
sc.pl.umap(metacells, color=["HighMT","Malignant","cleaned_celltype"])

In [None]:
sc.tl.score_genes(metacells, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
stress_sigs = ["O-Flanagan_dissociation_stress","O-Flanagan_dissociation_red",
               "Van-den-Brink_dissociation_stress","Machado_dissociation_stress"]
sc.tl.score_genes(metacells, gene_list=list_core_genes, score_name="O-Flanagan_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=red_core_genes, score_name="O-Flanagan_dissociation_red")
sc.tl.score_genes(metacells, gene_list=dissociation_genes, score_name="Van-den-Brink_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=dissociation_genes_machado, score_name="Machado_dissociation_stress")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(12,2))
flatax = ax.flatten()
for i,pathway in enumerate(stress_sigs):
    plot_boxplot(metacells, pathway, ax=flatax[i], name=pathway)
fig.tight_layout()

In [None]:
metacells.obs["Transcriptome variance"] = metacells.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(metacells, gene_list=go_sigs[sig], score_name=sig)

fig, ax = plt.subplots(1,3, figsize=(9,2))
flatax = ax.flatten()
for i,sig in enumerate(go_sigs):
    plot_boxplot(metacells, 
                 sig, name=sig.split(" (GO")[0], ax = flatax[i])
plot_boxplot(metacells, 
                 "Transcriptome variance", name="Transcriptome variance", ax = flatax[-1])
fig.tight_layout()

In [None]:
sc.tl.score_genes(metacells, gene_list=mito_transfer, score_name="Mito transfer")

plot_boxplot(metacells, 
                 "Mito transfer", name="Mito transfer")

In [None]:
from scipy.stats import mannwhitneyu
from tqdm.notebook import tqdm

diffs = {}
for name in tqdm(mitocarta_pathways.index):
    sig_genes = mitocarta_pathways.loc[name].Genes.split(", ")
    if len(metacells.var_names.intersection(sig_genes))==0:
        continue
    sc.tl.score_genes(metacells, gene_list=sig_genes, score_name="MTCARTA_sig")

    malobs = metacells.obs[metacells.obs["Malignant"]==1]
    val1 = malobs[malobs.HighMT==0]["MTCARTA_sig"]
    val2 = malobs[malobs.HighMT==1]["MTCARTA_sig"]
    _, p = mannwhitneyu(val1.values,val2.values)
    median_high = val2.median()
    median_low = val1.median()
    diffs[name] = [median_low,median_high,p]

diffs = pd.DataFrame(diffs,index=["Median LowMT","Median HighMT","MWU p"]).T.sort_values("MWU p")

diffs["Diff. median"] = diffs["Median HighMT"] - diffs["Median LowMT"]

diffs.sort_values("Diff. median").to_csv("/add/path/here/metabolic_dysregulation/mitocarta_met_res/Pancreas_Steele_10X.csv")

In [None]:
metacells.obs[['cleaned_celltype', 'Malignant', 'HighMT',
       'CYP genes', 'UGT genes', 'GST genes', 'ABC transporters',
       'O-Flanagan_dissociation_stress', 'O-Flanagan_dissociation_red',
       'Van-den-Brink_dissociation_stress', 'Machado_dissociation_stress',
       'Dissociation stress', 'Mito transfer']].to_csv("/add/path/here/info-metacells/pancreas.csv")

# Metastatic pancreas Raghavan 10X

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Raghavan_Pancreas_10X/filtered_adata.h5ad")

In [None]:
all_samples = adata.obs["sample"].unique()

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

In [None]:
adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype=="Malignant").astype(int)

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

# Create metacells

In [None]:
metacells, adata = get_metacells(adata, celltype_col="cleaned_celltype", sample_col="sample")

metacells.write("/add/path/here/metacell_data/Pancreas_Raghavan_10X/metacells.h5ad")

adata.obs.to_csv("/add/path/here/metacell_data/Pancreas_Raghavan_10X/adata_obs.csv")

# Analyze metacells

In [None]:
metacells = sc.read_h5ad("/add/path/here/metacell_data/Pancreas_Raghavan_10X/metacells.h5ad")

In [None]:
metacells.obs["HighMT"] = (metacells.obs["HighMT_fraction_of_1"]>0.3).astype(int)

In [None]:
metacells.X = metacells.layers["total_umis"]

sc.pp.normalize_total(metacells, target_sum=10000)
sc.pp.log1p(metacells)

In [None]:
egfr_df = pd.concat([metacells[:,metacells.var_names[metacells.var_names.str.startswith(("ERB","EGFR"))]].to_df(),
                     metacells.obs[["Malignant","HighMT"]]],axis=1)

egfr_df.to_csv("/add/path/here/egfr_expression_metacells/MetPancreas.csv")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(11,2))
flatax = ax.flatten()

sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("CYP")], 
                  score_name="CYP genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith(("UGT","B3GAT"))], 
                  score_name="UGT genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("GST")], 
                  score_name="GST genes")
sc.tl.score_genes(metacells, gene_list=kegg['ABC transporters'], 
                  score_name="ABC transporters")


plot_boxplot(metacells, "CYP genes", name="CYP genes (phase I)",ax=flatax[0])
plot_boxplot(metacells, "UGT genes", name="UGT genes (phase II)",ax=flatax[1])
plot_boxplot(metacells, "GST genes", name="GST genes (phase II)",ax=flatax[2])
plot_boxplot(metacells, "ABC transporters", name="ABC transporters (phase III)",ax=flatax[3])
for i in range(len(flatax)):
    pretty_ax(flatax[i])
    
fig.tight_layout()

In [None]:
sc.tl.pca(metacells)
sc.pp.neighbors(metacells)
sc.tl.umap(metacells)

In [None]:
sc.pl.umap(metacells, color=["HighMT","Malignant","cleaned_celltype"])

In [None]:
sc.tl.score_genes(metacells, gene_list=common_disso_genes, score_name="Dissociation stress") 

In [None]:
stress_sigs = ["O-Flanagan_dissociation_stress","O-Flanagan_dissociation_red",
               "Van-den-Brink_dissociation_stress","Machado_dissociation_stress"]
sc.tl.score_genes(metacells, gene_list=list_core_genes, score_name="O-Flanagan_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=red_core_genes, score_name="O-Flanagan_dissociation_red")
sc.tl.score_genes(metacells, gene_list=dissociation_genes, score_name="Van-den-Brink_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=dissociation_genes_machado, score_name="Machado_dissociation_stress")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(12,2))
flatax = ax.flatten()
for i,pathway in enumerate(stress_sigs):
    plot_boxplot(metacells, pathway, ax=flatax[i], name=pathway)
fig.tight_layout()

In [None]:
metacells.obs["Transcriptome variance"] = metacells.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(metacells, gene_list=go_sigs[sig], score_name=sig)

fig, ax = plt.subplots(1,3, figsize=(9,2))
flatax = ax.flatten()
for i,sig in enumerate(go_sigs):
    plot_boxplot(metacells, 
                 sig, name=sig.split(" (GO")[0], ax = flatax[i])
plot_boxplot(metacells, 
                 "Transcriptome variance", name="Transcriptome variance", ax = flatax[-1])
fig.tight_layout()

In [None]:
sc.tl.score_genes(metacells, gene_list=mito_transfer, score_name="Mito transfer")

plot_boxplot(metacells, 
                 "Mito transfer", name="Mito transfer")

In [None]:
from scipy.stats import mannwhitneyu
from tqdm.notebook import tqdm

diffs = {}
for name in tqdm(mitocarta_pathways.index):
    sig_genes = mitocarta_pathways.loc[name].Genes.split(", ")
    if len(metacells.var_names.intersection(sig_genes))==0:
        continue
    sc.tl.score_genes(metacells, gene_list=sig_genes, score_name="MTCARTA_sig")

    malobs = metacells.obs[metacells.obs["Malignant"]==1]
    val1 = malobs[malobs.HighMT==0]["MTCARTA_sig"]
    val2 = malobs[malobs.HighMT==1]["MTCARTA_sig"]
    _, p = mannwhitneyu(val1.values,val2.values)
    median_high = val2.median()
    median_low = val1.median()
    diffs[name] = [median_low,median_high,p]

diffs = pd.DataFrame(diffs,index=["Median LowMT","Median HighMT","MWU p"]).T.sort_values("MWU p")

diffs["Diff. median"] = diffs["Median HighMT"] - diffs["Median LowMT"]

diffs.sort_values("Diff. median").to_csv("/add/path/here/metabolic_dysregulation/mitocarta_met_res/Pancreas_Raghavan_10X.csv")

In [None]:
metacells.obs[['cleaned_celltype', 'Malignant', 'HighMT',
       'CYP genes', 'UGT genes', 'GST genes', 'ABC transporters',
       'O-Flanagan_dissociation_stress', 'O-Flanagan_dissociation_red',
       'Van-den-Brink_dissociation_stress', 'Machado_dissociation_stress',
       'Dissociation stress', 'Mito transfer']].to_csv("/add/path/here/info-metacells/metpancreas.csv")

# RCC Bi 10X

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Bi_RCC_10X/filtered_adata.h5ad")

all_samples = adata.obs["biosample_id"].unique()

adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype.isin(["Tumor","TP1","TP2"])).astype(int)

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

# Create metacells

In [None]:
metacells, adata = get_metacells(adata, celltype_col="cleaned_celltype", sample_col="sample")

metacells.write("/add/path/here/metacell_data/RCC_Bi_10X/metacells.h5ad")

adata.obs.to_csv("/add/path/here/metacell_data/RCC_Bi_10X/adata_obs.csv")

# Analyze metacells

In [None]:
metacells = sc.read_h5ad("/add/path/here/metacell_data/RCC_Bi_10X/metacells.h5ad")

In [None]:
metacells.obs["HighMT"] = (metacells.obs["HighMT_fraction_of_1"]>0.3).astype(int)

In [None]:
metacells.X = metacells.layers["total_umis"]

sc.pp.normalize_total(metacells, target_sum=10000)
sc.pp.log1p(metacells)

In [None]:
egfr_df = pd.concat([metacells[:,metacells.var_names[metacells.var_names.str.startswith(("ERB","EGFR"))]].to_df(),
                     metacells.obs[["Malignant","HighMT"]]],axis=1)

egfr_df.to_csv("/add/path/here/egfr_expression_metacells/RCC.csv")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(11,2))
flatax = ax.flatten()

sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("CYP")], 
                  score_name="CYP genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith(("UGT","B3GAT"))],
                  score_name="UGT genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("GST")], 
                  score_name="GST genes")
sc.tl.score_genes(metacells, gene_list=kegg['ABC transporters'], 
                  score_name="ABC transporters")


plot_boxplot(metacells, "CYP genes", name="CYP genes (phase I)",ax=flatax[0])
plot_boxplot(metacells, "UGT genes", name="UGT genes (phase II)",ax=flatax[1])
plot_boxplot(metacells, "GST genes", name="GST genes (phase II)",ax=flatax[2])
plot_boxplot(metacells, "ABC transporters", name="ABC transporters (phase III)",ax=flatax[3])
for i in range(len(flatax)):
    pretty_ax(flatax[i])
    
fig.tight_layout()

In [None]:
sc.tl.pca(metacells)
sc.pp.neighbors(metacells)
sc.tl.umap(metacells)

In [None]:
sc.pl.umap(metacells, color=["HighMT","Malignant","cleaned_celltype"])

In [None]:
sc.tl.score_genes(metacells, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
stress_sigs = ["O-Flanagan_dissociation_stress","O-Flanagan_dissociation_red",
               "Van-den-Brink_dissociation_stress","Machado_dissociation_stress"]
sc.tl.score_genes(metacells, gene_list=list_core_genes, score_name="O-Flanagan_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=red_core_genes, score_name="O-Flanagan_dissociation_red")
sc.tl.score_genes(metacells, gene_list=dissociation_genes, score_name="Van-den-Brink_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=dissociation_genes_machado, score_name="Machado_dissociation_stress")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(12,2))
flatax = ax.flatten()
for i,pathway in enumerate(stress_sigs):
    plot_boxplot(metacells, pathway, ax=flatax[i], name=pathway)
fig.tight_layout()

In [None]:
metacells.obs["Transcriptome variance"] = metacells.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(metacells, gene_list=go_sigs[sig], score_name=sig)

fig, ax = plt.subplots(1,3, figsize=(9,2))
flatax = ax.flatten()
for i,sig in enumerate(go_sigs):
    plot_boxplot(metacells, 
                 sig, name=sig.split(" (GO")[0], ax = flatax[i])
plot_boxplot(metacells, 
                 "Transcriptome variance", name="Transcriptome variance", ax = flatax[-1])
fig.tight_layout()

In [None]:
sc.tl.score_genes(metacells, gene_list=mito_transfer, score_name="Mito transfer")

plot_boxplot(metacells, 
                 "Mito transfer", name="Mito transfer")

In [None]:
from scipy.stats import mannwhitneyu
from tqdm.notebook import tqdm

diffs = {}
for name in tqdm(mitocarta_pathways.index):
    sig_genes = mitocarta_pathways.loc[name].Genes.split(", ")
    if len(metacells.var_names.intersection(sig_genes))==0:
        continue
    sc.tl.score_genes(metacells, gene_list=sig_genes, score_name="MTCARTA_sig")

    malobs = metacells.obs[metacells.obs["Malignant"]==1]
    val1 = malobs[malobs.HighMT==0]["MTCARTA_sig"]
    val2 = malobs[malobs.HighMT==1]["MTCARTA_sig"]
    _, p = mannwhitneyu(val1.values,val2.values)
    median_high = val2.median()
    median_low = val1.median()
    diffs[name] = [median_low,median_high,p]

diffs = pd.DataFrame(diffs,index=["Median LowMT","Median HighMT","MWU p"]).T.sort_values("MWU p")

diffs["Diff. median"] = diffs["Median HighMT"] - diffs["Median LowMT"]

diffs.sort_values("Diff. median").to_csv("/add/path/here/metabolic_dysregulation/mitocarta_met_res/RCC_Bi_10X.csv")

In [None]:
metacells.obs[['cleaned_celltype', 'Malignant', 'HighMT',
       'CYP genes', 'UGT genes', 'GST genes', 'ABC transporters',
       'O-Flanagan_dissociation_stress', 'O-Flanagan_dissociation_red',
       'Van-den-Brink_dissociation_stress', 'Machado_dissociation_stress',
       'Dissociation stress', 'Mito transfer']].to_csv("/add/path/here/info-metacells/rcc.csv")

# Breast Wu 10X

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/Breast_Wu_10X/filtered_adata.h5ad")

all_samples = adata.obs["Patient"].unique()

adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype.isin(["Cancer Epithelial"])).astype(int)

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

# Create metacells

In [None]:
metacells, adata = get_metacells(adata, celltype_col="cleaned_celltype", sample_col="Patient")

metacells.write("/add/path/here/metacell_data/Breast_Wu_10X/metacells.h5ad")

adata.obs.to_csv("/add/path/here/metacell_data/Breast_Wu_10X/adata_obs.csv")

# Analyze metacells

In [None]:
metacells = sc.read_h5ad("/add/path/here/metacell_data/Breast_Wu_10X/metacells.h5ad")

In [None]:
metacells.obs["HighMT"] = (metacells.obs["HighMT_fraction_of_1"]>0.3).astype(int)

In [None]:
metacells.X = metacells.layers["total_umis"]

sc.pp.normalize_total(metacells, target_sum=10000)
sc.pp.log1p(metacells)

In [None]:
egfr_df = pd.concat([metacells[:,metacells.var_names[metacells.var_names.str.startswith(("ERB","EGFR"))]].to_df(),
                     metacells.obs[["Malignant","HighMT"]]],axis=1)

egfr_df.to_csv("/add/path/here/egfr_expression_metacells/Breast.csv")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(11,2))
flatax = ax.flatten()

sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("CYP")], 
                  score_name="CYP genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith(("UGT","B3GAT"))], 
                  score_name="UGT genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("GST")], 
                  score_name="GST genes")
sc.tl.score_genes(metacells, gene_list=kegg['ABC transporters'], 
                  score_name="ABC transporters")


plot_boxplot(metacells, "CYP genes", name="CYP genes (phase I)",ax=flatax[0])
plot_boxplot(metacells, "UGT genes", name="UGT genes (phase II)",ax=flatax[1])
plot_boxplot(metacells, "GST genes", name="GST genes (phase II)",ax=flatax[2])
plot_boxplot(metacells, "ABC transporters", name="ABC transporters (phase III)",ax=flatax[3])
for i in range(len(flatax)):
    pretty_ax(flatax[i])
    
fig.tight_layout()

In [None]:
sc.tl.pca(metacells)
sc.pp.neighbors(metacells)
sc.tl.umap(metacells)

In [None]:
sc.pl.umap(metacells, color=["HighMT","Malignant","cleaned_celltype"])

In [None]:
sc.tl.score_genes(metacells, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
stress_sigs = ["O-Flanagan_dissociation_stress","O-Flanagan_dissociation_red",
               "Van-den-Brink_dissociation_stress","Machado_dissociation_stress"]
sc.tl.score_genes(metacells, gene_list=list_core_genes, score_name="O-Flanagan_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=red_core_genes, score_name="O-Flanagan_dissociation_red")
sc.tl.score_genes(metacells, gene_list=dissociation_genes, score_name="Van-den-Brink_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=dissociation_genes_machado, score_name="Machado_dissociation_stress")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(12,2))
flatax = ax.flatten()
for i,pathway in enumerate(stress_sigs):
    plot_boxplot(metacells, pathway, ax=flatax[i], name=pathway)
fig.tight_layout()

In [None]:
metacells.obs["Transcriptome variance"] = metacells.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(metacells, gene_list=go_sigs[sig], score_name=sig)

fig, ax = plt.subplots(1,3, figsize=(9,2))
flatax = ax.flatten()
for i,sig in enumerate(go_sigs):
    plot_boxplot(metacells, 
                 sig, name=sig.split(" (GO")[0], ax = flatax[i])
plot_boxplot(metacells, 
                 "Transcriptome variance", name="Transcriptome variance", ax = flatax[-1])
fig.tight_layout()

In [None]:
sc.tl.score_genes(metacells, gene_list=mito_transfer, score_name="Mito transfer")

plot_boxplot(metacells, 
                 "Mito transfer", name="Mito transfer")

In [None]:
from scipy.stats import mannwhitneyu
from tqdm.notebook import tqdm

diffs = {}
for name in tqdm(mitocarta_pathways.index):
    sig_genes = mitocarta_pathways.loc[name].Genes.split(", ")
    if len(metacells.var_names.intersection(sig_genes))==0:
        continue
    sc.tl.score_genes(metacells, gene_list=sig_genes, score_name="MTCARTA_sig")

    malobs = metacells.obs[metacells.obs["Malignant"]==1]
    val1 = malobs[malobs.HighMT==0]["MTCARTA_sig"]
    val2 = malobs[malobs.HighMT==1]["MTCARTA_sig"]
    _, p = mannwhitneyu(val1.values,val2.values)
    median_high = val2.median()
    median_low = val1.median()
    diffs[name] = [median_low,median_high,p]

diffs = pd.DataFrame(diffs,index=["Median LowMT","Median HighMT","MWU p"]).T.sort_values("MWU p")

diffs["Diff. median"] = diffs["Median HighMT"] - diffs["Median LowMT"]

diffs.sort_values("Diff. median").to_csv("/add/path/here/metabolic_dysregulation/mitocarta_met_res/Breast_Wu_10X.csv")

In [None]:
metacells.obs[['cleaned_celltype', 'Malignant', 'HighMT',
       'CYP genes', 'UGT genes', 'GST genes', 'ABC transporters',
       'O-Flanagan_dissociation_stress', 'O-Flanagan_dissociation_red',
       'Van-den-Brink_dissociation_stress', 'Machado_dissociation_stress',
       'Dissociation stress', 'Mito transfer']].to_csv("/add/path/here/info-metacells/breast.csv")

# UvealMelanoma Durante 10X

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/UvealMelanoma_Durante_10X/filtered_adata.h5ad")

all_samples = adata.obs["patient"].unique()

adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype.isin(["Malignant"])).astype(int)

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

# Create metacells

In [None]:
metacells, adata = get_metacells(adata, celltype_col="cleaned_celltype", sample_col="patient")

metacells.write("/add/path/here/metacell_data/UvealMelanoma_Durante_10X/metacells.h5ad")

adata.obs.to_csv("/add/path/here/metacell_data/UvealMelanoma_Durante_10X/adata_obs.csv")

# Analyze metacells

In [None]:
metacells = sc.read_h5ad("/add/path/here/metacell_data/UvealMelanoma_Durante_10X/metacells.h5ad")

In [None]:
metacells.obs["HighMT"] = (metacells.obs["HighMT_fraction_of_1"]>0.3).astype(int)

In [None]:
metacells.X = metacells.layers["total_umis"]

sc.pp.normalize_total(metacells, target_sum=10000)
sc.pp.log1p(metacells)

In [None]:
egfr_df = pd.concat([metacells[:,metacells.var_names[metacells.var_names.str.startswith(("ERB","EGFR"))]].to_df(),
                     metacells.obs[["Malignant","HighMT"]]],axis=1)

egfr_df.to_csv("/add/path/here/egfr_expression_metacells/UvealMelanoma.csv")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(11,2))
flatax = ax.flatten()

sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("CYP")], 
                  score_name="CYP genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith(("UGT","B3GAT"))], 
                  score_name="UGT genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("GST")], 
                  score_name="GST genes")
sc.tl.score_genes(metacells, gene_list=kegg['ABC transporters'], 
                  score_name="ABC transporters")


plot_boxplot(metacells, "CYP genes", name="CYP genes (phase I)",ax=flatax[0])
plot_boxplot(metacells, "UGT genes", name="UGT genes (phase II)",ax=flatax[1])
plot_boxplot(metacells, "GST genes", name="GST genes (phase II)",ax=flatax[2])
plot_boxplot(metacells, "ABC transporters", name="ABC transporters (phase III)",ax=flatax[3])
for i in range(len(flatax)):
    pretty_ax(flatax[i])
    
fig.tight_layout()

In [None]:
sc.tl.pca(metacells)
sc.pp.neighbors(metacells)
sc.tl.umap(metacells)

In [None]:
sc.pl.umap(metacells, color=["HighMT","Malignant","cleaned_celltype"])

In [None]:
sc.tl.score_genes(metacells, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
stress_sigs = ["O-Flanagan_dissociation_stress","O-Flanagan_dissociation_red",
               "Van-den-Brink_dissociation_stress","Machado_dissociation_stress"]
sc.tl.score_genes(metacells, gene_list=list_core_genes, score_name="O-Flanagan_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=red_core_genes, score_name="O-Flanagan_dissociation_red")
sc.tl.score_genes(metacells, gene_list=dissociation_genes, score_name="Van-den-Brink_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=dissociation_genes_machado, score_name="Machado_dissociation_stress")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(12,2))
flatax = ax.flatten()
for i,pathway in enumerate(stress_sigs):
    plot_boxplot(metacells, pathway, ax=flatax[i], name=pathway)
fig.tight_layout()

In [None]:
metacells.obs["Transcriptome variance"] = metacells.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(metacells, gene_list=go_sigs[sig], score_name=sig)

fig, ax = plt.subplots(1,3, figsize=(9,2))
flatax = ax.flatten()
for i,sig in enumerate(go_sigs):
    plot_boxplot(metacells, 
                 sig, name=sig.split(" (GO")[0], ax = flatax[i])
plot_boxplot(metacells, 
                 "Transcriptome variance", name="Transcriptome variance", ax = flatax[-1])
fig.tight_layout()

In [None]:
sc.tl.score_genes(metacells, gene_list=mito_transfer, score_name="Mito transfer")

plot_boxplot(metacells, 
                 "Mito transfer", name="Mito transfer")

In [None]:
from scipy.stats import mannwhitneyu
from tqdm.notebook import tqdm

diffs = {}
for name in tqdm(mitocarta_pathways.index):
    sig_genes = mitocarta_pathways.loc[name].Genes.split(", ")
    if len(metacells.var_names.intersection(sig_genes))==0:
        continue
    sc.tl.score_genes(metacells, gene_list=sig_genes, score_name="MTCARTA_sig")

    malobs = metacells.obs[metacells.obs["Malignant"]==1]
    val1 = malobs[malobs.HighMT==0]["MTCARTA_sig"]
    val2 = malobs[malobs.HighMT==1]["MTCARTA_sig"]
    _, p = mannwhitneyu(val1.values,val2.values)
    median_high = val2.median()
    median_low = val1.median()
    diffs[name] = [median_low,median_high,p]

diffs = pd.DataFrame(diffs,index=["Median LowMT","Median HighMT","MWU p"]).T.sort_values("MWU p")

diffs["Diff. median"] = diffs["Median HighMT"] - diffs["Median LowMT"]

diffs.sort_values("Diff. median").to_csv("/add/path/here/metabolic_dysregulation/mitocarta_met_res/UvealMelanoma_Durante_10X.csv")

In [None]:
metacells.obs[['cleaned_celltype', 'Malignant', 'HighMT',
       'CYP genes', 'UGT genes', 'GST genes', 'ABC transporters',
       'O-Flanagan_dissociation_stress', 'O-Flanagan_dissociation_red',
       'Van-den-Brink_dissociation_stress', 'Machado_dissociation_stress',
       'Dissociation stress', 'Mito transfer']].to_csv("/add/path/here/info-metacells/uvealmelanoma.csv")

# LUAD Bischoff 10X

In [None]:
adata = sc.read_h5ad("/add/path/here/filtered_data/LUAD_Bischoff_10X/filtered_adata.h5ad")

all_samples = adata.obs["sample"].unique()

adata.obs["HighMT"] = (adata.obs.pct_counts_mt>15).astype(int)
adata.obs["Malignant"] = (adata.obs.cleaned_celltype.isin(["Malignant"])).astype(int)

In [None]:
sc.pp.filter_genes(adata, min_cells=int(0.01*adata.shape[0]))

# Normalize without the MT genes

In [None]:
adata.X = adata.layers["counts"].copy()

adata = adata[:,adata.var_names[~adata.var_names.str.startswith("MT-")]].copy()

# Create metacells

In [None]:
metacells, adata = get_metacells(adata, celltype_col="cleaned_celltype", sample_col="sample")

metacells.write("/add/path/here/metacell_data/LUAD_Bischoff_10X/metacells.h5ad")

adata.obs.to_csv("/add/path/here/metacell_data/LUAD_Bischoff_10X/adata_obs.csv")

# Analyze metacells

In [None]:
metacells = sc.read_h5ad("/add/path/here/metacell_data/LUAD_Bischoff_10X/metacells.h5ad")

In [None]:
metacells.obs["HighMT"] = (metacells.obs["HighMT_fraction_of_1"]>0.3).astype(int)

In [None]:
metacells.X = metacells.layers["total_umis"]

sc.pp.normalize_total(metacells, target_sum=10000)
sc.pp.log1p(metacells)

In [None]:
egfr_df = pd.concat([metacells[:,metacells.var_names[metacells.var_names.str.startswith(("ERB","EGFR"))]].to_df(),
                     metacells.obs[["Malignant","HighMT"]]],axis=1)

egfr_df.to_csv("/add/path/here/egfr_expression_metacells/LUAD.csv")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(11,2))
flatax = ax.flatten()

sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("CYP")], 
                  score_name="CYP genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith(("UGT","B3GAT"))], 
                  score_name="UGT genes")
sc.tl.score_genes(metacells, gene_list=metacells.var_names[metacells.var_names.str.startswith("GST")], 
                  score_name="GST genes")
sc.tl.score_genes(metacells, gene_list=kegg['ABC transporters'], 
                  score_name="ABC transporters")


plot_boxplot(metacells, "CYP genes", name="CYP genes (phase I)",ax=flatax[0])
plot_boxplot(metacells, "UGT genes", name="UGT genes (phase II)",ax=flatax[1])
plot_boxplot(metacells, "GST genes", name="GST genes (phase II)",ax=flatax[2])
plot_boxplot(metacells, "ABC transporters", name="ABC transporters (phase III)",ax=flatax[3])
for i in range(len(flatax)):
    pretty_ax(flatax[i])
    
fig.tight_layout()

In [None]:
sc.tl.pca(metacells)
sc.pp.neighbors(metacells)
sc.tl.umap(metacells)

In [None]:
sc.pl.umap(metacells, color=["HighMT","Malignant","cleaned_celltype"])

In [None]:
sc.tl.score_genes(metacells, gene_list=common_disso_genes, score_name="Dissociation stress")

In [None]:
stress_sigs = ["O-Flanagan_dissociation_stress","O-Flanagan_dissociation_red",
               "Van-den-Brink_dissociation_stress","Machado_dissociation_stress"]
sc.tl.score_genes(metacells, gene_list=list_core_genes, score_name="O-Flanagan_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=red_core_genes, score_name="O-Flanagan_dissociation_red")
sc.tl.score_genes(metacells, gene_list=dissociation_genes, score_name="Van-den-Brink_dissociation_stress")
sc.tl.score_genes(metacells, gene_list=dissociation_genes_machado, score_name="Machado_dissociation_stress")

In [None]:
fig, ax = plt.subplots(1,4, figsize=(12,2))
flatax = ax.flatten()
for i,pathway in enumerate(stress_sigs):
    plot_boxplot(metacells, pathway, ax=flatax[i], name=pathway)
fig.tight_layout()

In [None]:
metacells.obs["Transcriptome variance"] = metacells.to_df().var(axis=1)

for sig in go_sigs:
    sc.tl.score_genes(metacells, gene_list=go_sigs[sig], score_name=sig)

fig, ax = plt.subplots(1,3, figsize=(9,2))
flatax = ax.flatten()
for i,sig in enumerate(go_sigs):
    plot_boxplot(metacells, 
                 sig, name=sig.split(" (GO")[0], ax = flatax[i])
plot_boxplot(metacells, 
                 "Transcriptome variance", name="Transcriptome variance", ax = flatax[-1])
fig.tight_layout()

In [None]:
sc.tl.score_genes(metacells, gene_list=mito_transfer, score_name="Mito transfer")

plot_boxplot(metacells, 
                 "Mito transfer", name="Mito transfer")

In [None]:
from scipy.stats import mannwhitneyu
from tqdm.notebook import tqdm

diffs = {}
for name in tqdm(mitocarta_pathways.index):
    sig_genes = mitocarta_pathways.loc[name].Genes.split(", ")
    if len(metacells.var_names.intersection(sig_genes))==0:
        continue
    sc.tl.score_genes(metacells, gene_list=sig_genes, score_name="MTCARTA_sig")

    malobs = metacells.obs[metacells.obs["Malignant"]==1]
    val1 = malobs[malobs.HighMT==0]["MTCARTA_sig"]
    val2 = malobs[malobs.HighMT==1]["MTCARTA_sig"]
    _, p = mannwhitneyu(val1.values,val2.values)
    median_high = val2.median()
    median_low = val1.median()
    diffs[name] = [median_low,median_high,p]

diffs = pd.DataFrame(diffs,index=["Median LowMT","Median HighMT","MWU p"]).T.sort_values("MWU p")

diffs["Diff. median"] = diffs["Median HighMT"] - diffs["Median LowMT"]

diffs.sort_values("Diff. median").to_csv("/add/path/here/metabolic_dysregulation/mitocarta_met_res/LUAD_Bischoff_10X.csv")

In [None]:
metacells.obs[['cleaned_celltype', 'Malignant', 'HighMT',
       'CYP genes', 'UGT genes', 'GST genes', 'ABC transporters',
       'O-Flanagan_dissociation_stress', 'O-Flanagan_dissociation_red',
       'Van-den-Brink_dissociation_stress', 'Machado_dissociation_stress',
       'Dissociation stress', 'Mito transfer']].to_csv("/add/path/here/info-metacells/luad.csv")