# Analysis Part VIII - Metabolism

In [None]:
%load_ext autoreload
%matplotlib inline

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore')
import os
import scanpy as sc
import scirpy as ir
import anndata as ann
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mudata import MuData
import mudata

import tarfile
import warnings
from glob import glob

import anndata
import matplotlib.pyplot as plt
import muon as mu
import pandas as pd
import scanpy as sc
import scirpy as ir

%autoreload 2
import sys
sys.path.append('..')
import utility.annotation as utils_annotation
import utility.representation as utils_representation
import utility.visualisation as utils_vis

from sklearn.mixture import GaussianMixture

In [None]:
sc.settings.set_figure_params(dpi=150)
sc.settings.verbosity = 3
sc.set_figure_params(vector_friendly=False, color_map='viridis', transparent=True)
sb.set_style('whitegrid')

from matplotlib.colors import LinearSegmentedColormap
colors = ['darkblue', 'blue', 'lightblue', '#FFEA00', 'orange', 'red', 'darkred']
cust = LinearSegmentedColormap.from_list('custom_cmap', colors)
colormap = cust

In [None]:
colors_katha = ['#33378F', '#669AC7', '#BFE0EC', '#FDF8C0', '#FBBC6C', '#EB5638', '#A71D2B']
cust_katha = LinearSegmentedColormap.from_list('custom_cmap', colors_katha)

In [None]:
mdata = mu.read('/Users/mimi/Sina/data_specificity_annotated_final_pseudotime_cite.h5mu')

## Pathway Analysis using DecoupleR

In [None]:
import decoupler as dc

In [None]:
dc.__version__

### Prepare the pathways

In [None]:
msigdb = dc.get_resource('MSigDB')

In [None]:
# Filter by hallmark
msigdb_hallmark = msigdb[msigdb['collection']=='hallmark']

# Filter by kegg
msigdb_kegg = msigdb[msigdb['collection']=='kegg_pathways']

# Filter by cell_type_signatures
msigdb_cell_type = msigdb[msigdb['collection']=='cell_type_signatures']

# Filter by go_biological_process
msigdb_gobp = msigdb[msigdb['collection']=='go_biological_process']

# Filter by reactome_pathways
msigdb_reactome = msigdb[msigdb['collection']=='reactome_pathways']

# Remove duplicated entries
msigdb_hallmark = msigdb_hallmark[~msigdb_hallmark.duplicated(['geneset', 'genesymbol'])]
msigdb_kegg = msigdb_kegg[~msigdb_kegg.duplicated(['geneset', 'genesymbol'])]
msigdb_cell_type = msigdb_cell_type[~msigdb_cell_type.duplicated(['geneset', 'genesymbol'])]
msigdb_gobp = msigdb_gobp[~msigdb_gobp.duplicated(['geneset', 'genesymbol'])]
msigdb_reactome = msigdb_reactome[~msigdb_reactome.duplicated(['geneset', 'genesymbol'])]

In [None]:
hallmark_selection = pd.read_csv('hallmark_selection_inkl_category.csv', header=None)

In [None]:
kegg_selection = pd.read_csv('KEGG_selection_inkl_category.csv', header=None)

In [None]:
anabolism = pd.read_csv('Anabolism.csv', header=None)

In [None]:
anabolism = list(anabolism[0])

In [None]:
own_pathways = pd.read_csv('OWN_PATHWAYS.csv', header=0)

In [None]:
GO_terms = pd.read_csv('GO_Selection.csv', header=None)

In [None]:
GO_select = list(GO_terms[0])

In [None]:
hallmark_select = list(hallmark_selection[0])

In [None]:
kegg_select = list(kegg_selection[0])

In [None]:
msigdb_hallmark_select = msigdb_hallmark[msigdb_hallmark['geneset'].isin(hallmark_select)]

In [None]:
msigdb_kegg_select = msigdb_kegg[msigdb_kegg['geneset'].isin(kegg_select)]

In [None]:
msigdb_go_select = msigdb[msigdb['geneset'].isin(GO_select)]
msigdb_go_select = msigdb_go_select[~msigdb_go_select.duplicated(['geneset', 'genesymbol'])]

### KEGG / OWN / GOCC / GOBP

In [None]:
selection = GO_select + kegg_select
selection2 = GO_select + kegg_select + hallmark_select

In [None]:
msigdb_selection = msigdb[msigdb['geneset'].isin(selection)]
msigdb_selection2 = msigdb[msigdb['geneset'].isin(selection2)]

In [None]:
msigdb_selection = msigdb_selection[~msigdb_selection.duplicated(['geneset', 'genesymbol'])]
msigdb_selection2 = msigdb_selection2[~msigdb_selection2.duplicated(['geneset', 'genesymbol'])]

In [None]:
msigdb_selection['weight']=1
msigdb_selection2['weight']=1

In [None]:
selected_pathways = pd.concat([msigdb_selection, own_pathways]).reset_index()
selected_pathways2 = pd.concat([msigdb_selection2, own_pathways]).reset_index()

In [None]:
ana = selected_pathways[selected_pathways['geneset'].isin(anabolism)]

In [None]:
ana_filtered = ana[~ana.duplicated(['genesymbol'])]

In [None]:
ana_filtered

In [None]:
ana_filtered['geneset'] = 'ANABOLISM'
ana['geneset'] = 'ANABOLISM'

ana

In [None]:
selected_pathways3 = pd.concat([selected_pathways, ana]).reset_index()
selected_pathways4 = pd.concat([selected_pathways, ana_filtered]).reset_index()

## Use mlm for metabolic pathways
As ORA only takes the toip 5% of the genes to test for geneset overrepresentation, highly expressed genes like the TCR or anything similar may skew the results for the individual pathways. Therefore, I want to test whether the result differs if I use the multivariate linear model that was also used for the progeny pathways.

In [None]:
dc.run_mlm(
    mat=mdata['gex'],
    net=selected_pathways4,
    source='geneset',
    target='genesymbol',
    weight='weight',
    use_raw = False,
    verbose=True
)

In [None]:
mdata['gex'].obsm['selected_pathways_mlm_estimate'] = mdata['gex'].obsm['mlm_estimate'].copy()
mdata['gex'].obsm['selected_pathways_mlm_pvals'] = mdata['gex'].obsm['mlm_pvals'].copy()

In [None]:
acts = dc.get_acts(mdata['gex'], obsm_key='selected_pathways_mlm_estimate')

#### General differences between specifc and unspecific 

In [None]:
#General differences between specifc and unspecific 
df = dc.rank_sources_groups(acts, groupby='specific_new', reference='no_binding', method='t-test_overestim_var')
df.to_csv('/Users/mimi/Sina/5_Visualisations/Figure2/Selected_Pathway_pvals_mlm_specific_vs_unspecific_inkl_seumois_ana_filtered.csv')

#### General differences between leiden clusters in specific cells

In [None]:
#Differences between leiden clusters in specific cells
df = dc.rank_sources_groups(acts[acts.obs['specific_new']=='NS4B214'], groupby='leiden', reference='rest', method='t-test_overestim_var')
df.to_csv('/Users/mimi/Sina/5_Visualisations/Figure2/Selected_Pathway_pvals_mlm_specific_over_leiden_inkl_seumois_ana_filtered.csv')

In [None]:
#Differences between leiden clusters in specific cells versus cluster 4
df = dc.rank_sources_groups(acts[acts.obs['specific_new']=='NS4B214'], groupby='leiden', reference='4', method='t-test_overestim_var')
df.to_csv('/Users/mimi/Sina/5_Visualisations/Figure2/Selected_Pathway_pvals_mlm_specific_over_leiden_inkl_seumois_ana_filtered_vs_cluster4.csv')

#Differences between leiden clusters in specific cells versus cluster 10
df = dc.rank_sources_groups(acts[acts.obs['specific_new']=='NS4B214'], groupby='leiden', reference='10', method='t-test_overestim_var')
df.to_csv('/Users/mimi/Sina/5_Visualisations/Figure2/Selected_Pathway_pvals_mlm_specific_over_leiden_inkl_seumois_ana_filtered_vs_cluster10.csv')

#### Generate plots for those per cluster

In [None]:
#add log of pvalue
df['logpval'] = -1*np.log(df['pvals_adj'])

In [None]:
pd.DataFrame(selected_pathways['geneset'].unique()).to_csv('selected_pathways.csv')

In [None]:
selected_pathways_category_annotation = pd.read_csv('selected_pathways_inkl_category.csv', header=None, sep=';')

In [None]:
#add categories to df
df['category'] = np.NaN
for i in range(len(selected_pathways_category_annotation[0])):
    df['category'][df['names']==selected_pathways_category_annotation[0][i]] = selected_pathways_category_annotation[1][i]

In [None]:
for i in range(13):
    table = df[df['group']==str(i)]
    table = table.sort_values(by=['logpval']).reset_index()
    table.to_csv(f'/Users/mimi/Sina/5_Visualisations/Figure2/Selected_Pathway_pvals_mlm_specific_over_leiden_Cluster{i}.csv')
    ax = sb.scatterplot(data=table, x=table.index, y='logpval', hue='category')
    sb.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
    plt.title('Cluster '+str(i))
    plt.savefig(f'/Users/mimi/Sina/5_Visualisations/Figure2/Selected_Pathway_pvals_mlm_specific_over_leiden_Cluster{i}.pdf', dpi=300, bbox_inches='tight')
    plt.show()

### Pvalue Plots and rankings as before

In [None]:
#General differences between specifc and unspecific 
df = dc.rank_sources_groups(acts, groupby='specific_new', reference='no_binding', method='t-test_overestim_var')
df.to_csv('/Users/mimi/Sina/5_Visualisations/Figure2/Selected_Pathway_inkl_hallmark_pvals_mlm_specific_vs_unspecific.csv')

In [None]:
#Differences between leiden clusters in specific cells
df = dc.rank_sources_groups(acts[(acts.obs['specific_new']=='NS4B214') & (acts.obs['time'].isin(['d7', 'd11', 'd14']))], groupby='leiden', reference='rest', method='t-test_overestim_var')
df.to_csv('/Users/mimi/Sina/5_Visualisations/Figure2/Selected_Pathway_inkl_hallmark_pvals_mlm_specific_over_leiden_early.csv')

In [None]:
#add log of pvalue
df['logpval'] = -1*np.log(df['pvals_adj'])

In [None]:
pd.DataFrame(selected_pathways2['geneset'].unique()).to_csv('selected_pathways2.csv')

In [None]:
selected_pathways_category_annotation = pd.read_csv('selected_pathways2_inkl_category.csv', header=None, sep=';')

#add categories to df
df['category'] = np.NaN
for i in range(len(selected_pathways_category_annotation[0])):
    df['category'][df['names']==selected_pathways_category_annotation[0][i]] = selected_pathways_category_annotation[1][i]

for i in range(13):
    table = df[df['group']==str(i)]
    table = table.sort_values(by=['logpval']).reset_index()
    table.to_csv(f'/Users/mimi/Sina/5_Visualisations/Figure2/Selected_Pathway_inkl_Hallmark_pvals_mlm_specific_over_leiden_Cluster{i}.csv')
    ax = sb.scatterplot(data=table, x=table.index, y='logpval', hue='category')
    sb.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
    plt.title('Cluster '+str(i))
    plt.savefig(f'/Users/mimi/Sina/5_Visualisations/Figure2/Selected_Pathway_inkl_Hallmark_pvals_mlm_specific_over_leiden_Cluster{i}.pdf', dpi=300, bbox_inches='tight')
    plt.show()

##### UMAPs for all pathways

In [None]:
colors_katha = ['#33378F', '#669AC7', '#BFE0EC', '#FDF8C0', '#FBBC6C', '#EB5638', '#A71D2B']
cust_katha = LinearSegmentedColormap.from_list('custom_cmap', colors_katha)

In [None]:
#pw = list(selected_pathways3['geneset'].unique())
pw = ['MEVALONATE PATHWAY',
 'PI3K-AKT-MTOR-MYC SIGNALING',
 'METHIONINE CYCLE',
 'HIF PATHWAY',
 'ARGININE-PROLINE METABOLISM',
 'apoptosis',
 'quiescence', 'ANABOLISM', 'Type I and II IFN signaling genes', 'Cytotoxic signature genes']

#early
for i in pw:
    ax = sc.pl.umap(acts, cmap=cust, show=False, size=30)
    sc.pl.umap(acts[(acts.obs['specific_new']=='NS4B214')&(acts.obs['time'].isin(['d7', 'd11', 'd14']))], color=i, 
           cmap=cust_katha, show=False, size=30, ax=ax)
    plt.tight_layout()
    plt.savefig(f"/Users/mimi/Sina/5_Visualisations/Figure2/{i}_UMAP_d7-14_specific_cells_kathas_color.pdf", dpi=300, bbox_inches='tight')
    plt.show()

#late
    ax = sc.pl.umap(acts, cmap=cust, show=False, size=30)
    sc.pl.umap(acts[(acts.obs['specific_new']=='NS4B214')&(acts.obs['time'].isin(['d90', 'd365', 'dx']))], color=i, 
           cmap=cust_katha, show=False, size=30, ax=ax)
    plt.tight_layout()
    plt.savefig(f"/Users/mimi/Sina/5_Visualisations/Figure2/{i}_UMAP_d90-x_specific_cells_kathas_color.pdf", dpi=600, bbox_inches='tight')
    plt.show()

#all
    ax = sc.pl.umap(acts, cmap=cust, show=False, size=30)
    sc.pl.umap(acts, color=i, 
           cmap=cust_katha, show=False, size=30, ax=ax)
    plt.tight_layout()
    plt.savefig(f"/Users/mimi/Sina/5_Visualisations/Figure2/{i}_UMAP_all_cells_kathas_color.pdf", dpi=300, bbox_inches='tight')
    plt.show()

#all specific
    ax = sc.pl.umap(acts, cmap=cust, show=False, size=30)
    sc.pl.umap(acts[(acts.obs['specific_new']=='NS4B214')], color=i, 
           cmap=cust_katha, show=False, size=30, ax=ax)
    plt.tight_layout()
    plt.savefig(f"/Users/mimi/Sina/5_Visualisations/Figure2/{i}_UMAP_all_specific_cells_kathas_color.pdf", dpi=300, bbox_inches='tight')
    plt.show()

#d14
    ax = sc.pl.umap(acts, cmap=cust, show=False, size=30)
    sc.pl.umap(acts[(acts.obs['specific_new']=='NS4B214')&(acts.obs['time'].isin(['d14']))], color=i, 
           cmap=cust_katha, show=False, size=30, ax=ax)
    plt.tight_layout()
    plt.savefig(f"/Users/mimi/Sina/5_Visualisations/Figure2/{i}_UMAP_d14_specific_cells_kathas_color.pdf", dpi=300, bbox_inches='tight')
    plt.show()

#d365
    ax = sc.pl.umap(acts, cmap=cust, show=False, size=30)
    sc.pl.umap(acts[(acts.obs['specific_new']=='NS4B214')&(acts.obs['time'].isin(['d365']))], color=i, 
           cmap=cust_katha, show=False, size=30, ax=ax)
    plt.tight_layout()
    plt.savefig(f"/Users/mimi/Sina/5_Visualisations/Figure2/{i}_UMAP_d365_specific_cells_kathas_color.pdf", dpi=300, bbox_inches='tight')
    plt.show()

##### Scores for all cells and pathways

In [None]:
table = mdata['gex'].obsm['selected_pathways_mlm_estimate'] #[mdata['gex'].obs['specific_new'].isin(['no_binding', 'NS4B214'])]
table['specific_new'] = mdata['gex'].obs['specific_new'] #[mdata['gex'].obs['specific_new'].isin(['no_binding', 'NS4B214'])]
table['time'] = mdata['gex'].obs['time'] #[mdata['gex'].obs['specific_new'].isin(['no_binding', 'NS4B214'])]
table['class'] = 'naive'
table['class'][table['time'].isin(['d7', 'd11', 'd14'])] = 'early'
table['class'][table['time'].isin(['d21', 'd28', 'd49'])] = 'intermediate'
table['class'][table['time'].isin(['d90', 'd365', 'dx'])] = 'late'
table['cluster'] = mdata['gex'].obs['leiden'] #[mdata['gex'].obs['specific_new'].isin(['no_binding', 'NS4B214'])]
table['FACS'] = mdata['gex'].obs['FACS_Phenotype_v3'] #[mdata['gex'].obs['specific_new'].isin(['no_binding', 'NS4B214'])]

In [None]:
table.to_csv('/Users/mimi/Sina/5_Visualisations/Figure2/Score_table_inkl_seumois_ana_filtered_new_all_cells.csv')

In [None]:
#values of pathways per cluster
for i in range(0,14):
    print(table[['GOBP_T_CELL_PROLIFERATION']][table['cluster']==str(i)].mean())

## Density Plots

In [None]:
pw = ['GOCC_MITOCHONDRION',
 'GOBP_CELLULAR_RESPONSE_TO_RETINOIC_ACID',
 'KEGG_ABC_TRANSPORTERS',
 'KEGG_GAP_JUNCTION',
 'KEGG_CYTOKINE_CYTOKINE_RECEPTOR_INTERACTION',
 'KEGG_TGF_BETA_SIGNALING_PATHWAY',
 'KEGG_HEDGEHOG_SIGNALING_PATHWAY',
 'KEGG_APOPTOSIS',
 'GOBP_T_CELL_PROLIFERATION',
 'KEGG_NEUROTROPHIN_SIGNALING_PATHWAY',
 'KEGG_NOD_LIKE_RECEPTOR_SIGNALING_PATHWAY',
 'KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_LACTO_AND_NEOLACTO_SERIES',
 'KEGG_CHEMOKINE_SIGNALING_PATHWAY',
 'KEGG_NATURAL_KILLER_CELL_MEDIATED_CYTOTOXICITY',
 'KEGG_JAK_STAT_SIGNALING_PATHWAY',
 'KEGG_MAPK_SIGNALING_PATHWAY',
 'KEGG_TOLL_LIKE_RECEPTOR_SIGNALING_PATHWAY',
 'KEGG_T_CELL_RECEPTOR_SIGNALING_PATHWAY',
 'KEGG_NOTCH_SIGNALING_PATHWAY',
 'GOBP_RESPONSE_TO_ENDOPLASMIC_RETICULUM_STRESS',
 'KEGG_FOCAL_ADHESION',
 'KEGG_MTOR_SIGNALING_PATHWAY',
 'KEGG_VEGF_SIGNALING_PATHWAY',
 'KEGG_SPHINGOLIPID_METABOLISM',
 'KEGG_FC_GAMMA_R_MEDIATED_PHAGOCYTOSIS',
 'KEGG_CALCIUM_SIGNALING_PATHWAY',
 'KEGG_CELL_ADHESION_MOLECULES_CAMS',
 'KEGG_LEUKOCYTE_TRANSENDOTHELIAL_MIGRATION',
 'GOBP_RETINOIC_ACID_RECEPTOR_SIGNALING_PATHWAY',
 'KEGG_ADIPOCYTOKINE_SIGNALING_PATHWAY',
 'KEGG_CYTOSOLIC_DNA_SENSING_PATHWAY',
 'KEGG_RIG_I_LIKE_RECEPTOR_SIGNALING_PATHWAY',
 'KEGG_B_CELL_RECEPTOR_SIGNALING_PATHWAY',
 'KEGG_HEMATOPOIETIC_CELL_LINEAGE',
 'GOBP_TRANSLATIONAL_INITIATION',
 'KEGG_FC_EPSILON_RI_SIGNALING_PATHWAY',
 'KEGG_ETHER_LIPID_METABOLISM',
 'KEGG_GLYCEROLIPID_METABOLISM',
 'KEGG_GLYCEROPHOSPHOLIPID_METABOLISM',
 'KEGG_ECM_RECEPTOR_INTERACTION',
 'KEGG_CELL_CYCLE',
 'KEGG_WNT_SIGNALING_PATHWAY',
 'KEGG_ERBB_SIGNALING_PATHWAY',
 'KEGG_GNRH_SIGNALING_PATHWAY',
 'KEGG_P53_SIGNALING_PATHWAY',
 'KEGG_O_GLYCAN_BIOSYNTHESIS',
 'KEGG_INSULIN_SIGNALING_PATHWAY',
 'KEGG_UBIQUITIN_MEDIATED_PROTEOLYSIS',
 'KEGG_PPAR_SIGNALING_PATHWAY',
 'KEGG_FRUCTOSE_AND_MANNOSE_METABOLISM',
 'KEGG_ARGININE_AND_PROLINE_METABOLISM',
 'KEGG_FOLATE_BIOSYNTHESIS',
 'GOBP_RESPIRATORY_ELECTRON_TRANSPORT_CHAIN',
 'KEGG_PEROXISOME',
 'KEGG_N_GLYCAN_BIOSYNTHESIS',
 'KEGG_GALACTOSE_METABOLISM',
 'KEGG_GLYCOSAMINOGLYCAN_BIOSYNTHESIS_KERATAN_SULFATE',
 'GOBP_NAD_METABOLIC_PROCESS',
 'GOBP_NAD_BIOSYNTHESIS_VIA_NICOTINAMIDE_RIBOSIDE_SALVAGE_PATHWAY',
 'KEGG_NICOTINATE_AND_NICOTINAMIDE_METABOLISM',
 'KEGG_ANTIGEN_PROCESSING_AND_PRESENTATION',
 'KEGG_ENDOCYTOSIS',
 'KEGG_MELANOGENESIS',
 'KEGG_ARACHIDONIC_ACID_METABOLISM',
 'KEGG_NEUROACTIVE_LIGAND_RECEPTOR_INTERACTION',
 'KEGG_ALDOSTERONE_REGULATED_SODIUM_REABSORPTION',
 'KEGG_INTESTINAL_IMMUNE_NETWORK_FOR_IGA_PRODUCTION',
 'KEGG_ADHERENS_JUNCTION',
 'KEGG_PURINE_METABOLISM',
 'KEGG_TRYPTOPHAN_METABOLISM',
 'GOBP_FATTY_ACID_BETA_OXIDATION',
 'KEGG_ALANINE_ASPARTATE_AND_GLUTAMATE_METABOLISM',
 'KEGG_AMINO_SUGAR_AND_NUCLEOTIDE_SUGAR_METABOLISM',
 'KEGG_NITROGEN_METABOLISM',
 'KEGG_GLYCOLYSIS_GLUCONEOGENESIS',
 'KEGG_STARCH_AND_SUCROSE_METABOLISM',
 'KEGG_PENTOSE_PHOSPHATE_PATHWAY',
 'KEGG_GLYCOSAMINOGLYCAN_BIOSYNTHESIS_HEPARAN_SULFATE',
 'KEGG_RNA_DEGRADATION',
 'KEGG_REGULATION_OF_ACTIN_CYTOSKELETON',
 'KEGG_PORPHYRIN_AND_CHLOROPHYLL_METABOLISM',
 'KEGG_GLYCOSAMINOGLYCAN_BIOSYNTHESIS_CHONDROITIN_SULFATE',
 'KEGG_PROPANOATE_METABOLISM',
 'KEGG_CYSTEINE_AND_METHIONINE_METABOLISM',
 'KEGG_PYRUVATE_METABOLISM',
 'KEGG_PROTEIN_EXPORT',
 'GOBP_RIBOSOME_BIOGENESIS',
 'KEGG_LYSOSOME',
 'KEGG_GLYCOSAMINOGLYCAN_DEGRADATION',
 'KEGG_GLYOXYLATE_AND_DICARBOXYLATE_METABOLISM',
 'KEGG_TIGHT_JUNCTION',
 'KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_GANGLIO_SERIES',
 'KEGG_GLYCOSPHINGOLIPID_BIOSYNTHESIS_GLOBO_SERIES',
 'KEGG_OTHER_GLYCAN_DEGRADATION',
 'KEGG_STEROID_HORMONE_BIOSYNTHESIS',
 'KEGG_SULFUR_METABOLISM',
 'KEGG_CITRATE_CYCLE_TCA_CYCLE',
 'KEGG_PHENYLALANINE_METABOLISM',
 'KEGG_TYROSINE_METABOLISM',
 'KEGG_PENTOSE_AND_GLUCURONATE_INTERCONVERSIONS',
 'KEGG_PHOSPHATIDYLINOSITOL_SIGNALING_SYSTEM',
 'KEGG_INOSITOL_PHOSPHATE_METABOLISM',
 'KEGG_METABOLISM_OF_XENOBIOTICS_BY_CYTOCHROME_P450',
 'KEGG_DRUG_METABOLISM_CYTOCHROME_P450',
 'KEGG_GLUTATHIONE_METABOLISM',
 'KEGG_STEROID_BIOSYNTHESIS',
 'KEGG_RETINOL_METABOLISM',
 'KEGG_FATTY_ACID_METABOLISM',
 'KEGG_TERPENOID_BACKBONE_BIOSYNTHESIS',
 'GOBP_TRANSLATIONAL_ELONGATION',
 'KEGG_BIOSYNTHESIS_OF_UNSATURATED_FATTY_ACIDS',
 'KEGG_GLYCINE_SERINE_AND_THREONINE_METABOLISM',
 'KEGG_SNARE_INTERACTIONS_IN_VESICULAR_TRANSPORT',
 'KEGG_BUTANOATE_METABOLISM',
 'KEGG_VALINE_LEUCINE_AND_ISOLEUCINE_DEGRADATION',
 'KEGG_SELENOAMINO_ACID_METABOLISM',
 'KEGG_ALPHA_LINOLENIC_ACID_METABOLISM',
 'KEGG_LYSINE_DEGRADATION',
 'KEGG_DRUG_METABOLISM_OTHER_ENZYMES',
 'KEGG_REGULATION_OF_AUTOPHAGY',
 'GOBP_MITOCHONDRIAL_TRANSLATION',
 'KEGG_MISMATCH_REPAIR',
 'KEGG_NUCLEOTIDE_EXCISION_REPAIR',
 'KEGG_DNA_REPLICATION',
 'KEGG_HOMOLOGOUS_RECOMBINATION',
 'KEGG_LINOLEIC_ACID_METABOLISM',
 'KEGG_NON_HOMOLOGOUS_END_JOINING',
 'KEGG_OXIDATIVE_PHOSPHORYLATION',
 'KEGG_BASAL_TRANSCRIPTION_FACTORS',
 'KEGG_PYRIMIDINE_METABOLISM',
 'KEGG_BASE_EXCISION_REPAIR',
 'KEGG_RNA_POLYMERASE',
 'KEGG_ONE_CARBON_POOL_BY_FOLATE',
 'KEGG_SPLICEOSOME',
 'GOCC_RIBOSOMAL_SUBUNIT',
 'KEGG_PANTOTHENATE_AND_COA_BIOSYNTHESIS',
 'KEGG_BETA_ALANINE_METABOLISM',
 'KEGG_LIMONENE_AND_PINENE_DEGRADATION',
 'KEGG_ASCORBATE_AND_ALDARATE_METABOLISM',
 'KEGG_HISTIDINE_METABOLISM',
 'GOBP_NADH_METABOLIC_PROCESS',
 'KEGG_PRIMARY_BILE_ACID_BIOSYNTHESIS',
 'GOBP_FATTY_ACID_ELONGATION',
 'GOCC_CYTOSOLIC_RIBOSOME',
 'KEGG_PROTEASOME',
 'KEGG_AMINOACYL_TRNA_BIOSYNTHESIS',
 'KEGG_VALINE_LEUCINE_AND_ISOLEUCINE_BIOSYNTHESIS',
 'KEGG_RIBOSOME',
 'KEGG_RIBOFLAVIN_METABOLISM',
 'KEGG_TAURINE_AND_HYPOTAURINE_METABOLISM',
 'KEGG_GLYCOSYLPHOSPHATIDYLINOSITOL_GPI_ANCHOR_BIOSYNTHESIS',
 'FATTY ACID SYNTHESIS',
 'GLUTAMINE CATABOLISM',
 'POLYAMINE SYNTHESIS',
 'MEVALONATE PATHWAY',
 'PI3K-AKT-MTOR-MYC SIGNALING',
 'METHIONINE CYCLE',
 'HIF PATHWAY',
 'ARGININE-PROLINE METABOLISM',
 'apoptosis',
 'quiescence', 'ANABOLISM', 'Type I and II IFN signaling genes', 'Cytotoxic signature genes']

In [None]:
#per interesting pathway, generate a column with +/- cells as only categories can be used
for i in pw:
    values = mdata['gex'].obsm['selected_pathways_mlm_estimate'][i]
    mdata['gex'].obs[i] = np.where(values > values.mean(), i, np.where(values < values.mean(), 'no', 'no'))

In [None]:
mdata['gex'].obsm['selected_pathways_mlm_estimate']['oxgly'] = mdata['gex'].obsm['selected_pathways_mlm_estimate']['KEGG_OXIDATIVE_PHOSPHORYLATION'] + mdata['gex'].obsm['selected_pathways_mlm_estimate']['KEGG_GLYCOLYSIS_GLUCONEOGENESIS']
mdata['gex'].obsm['selected_pathways_mlm_estimate']['transl'] = mdata['gex'].obsm['selected_pathways_mlm_estimate']['GOBP_TRANSLATIONAL_INITIATION'] + mdata['gex'].obsm['selected_pathways_mlm_estimate']['GOBP_TRANSLATIONAL_ELONGATION'] + mdata['gex'].obsm['selected_pathways_mlm_estimate']['GOCC_RIBOSOMAL_SUBUNIT']

In [None]:
for i in ['oxgly', 'transl']:
    values = mdata['gex'].obsm['selected_pathways_mlm_estimate'][i]
    mdata['gex'].obs[i] = np.where(values > values.mean(), i, np.where(values < values.mean(), 'no', 'no'))

In [None]:
for i in pw:
    sc.tl.embedding_density(mdata['gex'], basis='umap', groupby=i)
    sc.pl.embedding_density(mdata['gex'], basis='umap', key='umap_density_'+str(i), show=False, color_map=cust_katha, group=i)
    plt.tight_layout()
    plt.savefig(f'/Users/mimi/Sina/5_Visualisations/Figure2/{i}_density_plot.pdf', dpi=300)

In [None]:
for i in ['oxgly', 'transl']:
    sc.tl.embedding_density(mdata['gex'], basis='umap', groupby=i)
    sc.pl.embedding_density(mdata['gex'], basis='umap', key='umap_density_'+str(i), show=False, color_map=cust_katha, group=i)
    plt.tight_layout()
    plt.savefig(f'/Users/mimi/Sina/5_Visualisations/Figure2/{i}_density_plot.pdf', dpi=300)

## Transcription Factor Networks

In [None]:
net = dc.get_collectri(organism='human', split_complexes=False)

In [None]:
dc.run_ulm(
    mat=mdata['gex'],
    net=net,
    source='source',
    target='target',
    weight='weight',
    use_raw=False,
    verbose=True
)

In [None]:
mdata['gex'].obsm['collectri_ulm_estimate'] = mdata['gex'].obsm['ulm_estimate'].copy()
mdata['gex'].obsm['collectri_ulm_pvals'] = mdata['gex'].obsm['ulm_pvals'].copy()

In [None]:
acts = dc.get_acts(mdata['gex'], obsm_key='ulm_estimate')

In [None]:
TFs = ['EOMES', 'MYC', 'TCF7', 'FOXO1', 'LEF1', 'KLF2' ]

for t in TFs:
    sc.pl.umap(acts, color=[t], cmap=cust_katha, show=False, size=30)
    plt.tight_layout()
    plt.savefig(f'/Users/mimi/Sina/5_Visualisations/Figure2/{t}_TF_network_UMAP.pdf', dpi=300)
    plt.show()