## Notebook for CCI-derived metabolism from scRNA-Seq using <span style="color:pink">MEBOCOST</span> in Healthy adult epithelial cells

- **Developed by**: Anna Maguza
- **Institute of Computational Biology - Computational Health Centre - Helmholtz Munich**
- 26th of June 2023

### Import packages

In [1]:
import os,sys
import numpy as np
import scanpy as sc
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

from mebocost import mebocost



In [None]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

In [None]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

### Data upload

In [None]:
input = '/Users/anna.maguza/Desktop/Data/Processed_datasets/Cancer_dataset_integration/Datasets integration/healthy_epithelial/Integrated_Healthy_epithelial_datasets_5000.h5ad'
adata = sc.read_h5ad(input)
X_is_raw(adata)

In [None]:
# Extract the raw counts
adata = adata.raw.to_adata()
adata

In [None]:
# rename column in adata.obs 'Unified Cell States' to 'Unified_Cell_States' 
adata.obs.rename(columns = {'Unified Cell States': 'Unified_Cell_States'}, inplace = True)

#### Extract Highly Variable Genes

In [None]:
adata.layers['counts'] = adata.X.copy()

sc.pp.highly_variable_genes(
    adata,
    flavor = "seurat_v3",
    n_top_genes = 7000,
    layer = "counts",
    batch_key = "Library_Preparation_Protocol",
    subset = True
)

#### Infer metabolic communications

- The mebocost.conf file needs to be provided. It is in the original repo and after installation it will have the correct paths and should work out of the box. You could provide the full path to the file, or create a symlink.


In [None]:
mebo_obj = mebocost.create_obj(
                        adata = adata,
                        group_col = ['Unified_Cell_States'],
                        met_est = 'mebocost',
                        config_path = '/Users/anna.maguza/MEBOCOST/mebocost.conf',
                        exp_mat = None,
                        cell_ann = None,
                        species = 'human',
                        met_pred = None,
                        met_enzyme = None,
                        met_sensor = None,
                        met_ann = None,
                        scFEA_ann = None,
                        compass_met_ann = None,
                        compass_rxn_ann = None,
                        gene_network = None,
                        gmt_path = None,
                        cutoff_exp = 'auto', ## automated cutoff to exclude lowly ranked 25% sensors across all cells
                        cutoff_met = 'auto', ## automated cutoff to exclude lowly ranked 25% metabolites across all cells
                        cutoff_prop = 0.25, ## at lease 25% of cells should be expressed the sensor or present the metabolite in the cell group (specified by group_col)
                        sensor_type = ['Receptor', 'Transporter', 'Nuclear Receptor'],
                        thread = 4
                        )

#### Estimate metabolite presence


- If you just want the table of metabolic activity, you can stop here and export it as a CSV file.

In [None]:
mebo_obj._load_config_()
mebo_obj.estimator()

[April 27, 2023 12:12:11]: Load config and read data based on given species [human].
[April 27, 2023 12:12:13]: Estimtate metabolite level using mebocost


In [None]:
met_mat = pd.DataFrame(mebo_obj.met_mat.toarray(),
                      index = mebo_obj.met_mat_indexer,
                      columns = mebo_obj.met_mat_columns)
met_mat.head()

Unnamed: 0,711539-ST09_CSE_IAV-V1,712557-ST09_CSE_IAV-V1,548541-ST09_CSE_IAV-V1,235387-ST09_CSE_IAV-V1,843167-ST09_CSE_IAV-V1,429761-ST09_CSE_IAV-V1,180538-ST09_CSE_IAV-V1,877284-ST09_CSE_IAV-V1,719369-ST09_CSE_IAV-V1,808473-ST09_CSE_IAV-V1,...,7998788-CSE03_CTRL-V6,1633943-CSE03_CTRL-V6,13157829-CSE03_CTRL-V6,5759657-CSE03_CTRL-V6,2825919-CSE03_CTRL-V6,9600822-CSE03_CTRL-V6,12250460-CSE03_CTRL-V6,3863124-CSE03_CTRL-V6,4148054-CSE03_CTRL-V6,4291292-CSE03_CTRL-V6
HMDB0001188,2.0,1.0,8.0,3.0,8.0,2.0,0.0,1.0,6.0,0.0,...,0.0,0.0,4.0,0.0,3.0,4.0,0.0,0.0,0.0,0.0
HMDB0000699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HMDB0006875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HMDB0000972,2.0,5.0,5.0,0.0,11.0,0.0,4.0,8.0,0.0,7.0,...,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HMDB0004673,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### MEBOCOST cell-cell communication inference

In [None]:
commu_res = mebo_obj.infer_commu(
                                n_shuffle = 1000,
                                seed = 1712, 
                                Return = True, 
                                thread = 4,
                                save_permuation = False,
                                min_cell_number = 3
                            )

[April 27, 2023 12:12:32]: Load config and read data based on given species [human].
[April 27, 2023 12:12:34]: Estimtate metabolite level using mebocost
[April 27, 2023 12:12:43]: Infer communications
[April 27, 2023 12:12:43]: Sensor type used ['Transporter', 'Receptor', 'Nuclear Receptor']
[April 27, 2023 12:12:43]: Parameters: {shuffling: 1000 times, random seed: 1712, thread: 4}
[April 27, 2023 12:12:59]: met_sensor: (67, 8)
[April 27, 2023 12:12:59]: avg_exp: (465, 19) for (gene, cell) of needed
[April 27, 2023 12:12:59]: avg_met: (311, 19) for (metabolite, cell) of needed
[April 27, 2023 12:12:59]: shuffling 1000 times for generating backgroud
[April 27, 2023 12:13:09]: take exp and met avg for shuffling
[April 27, 2023 12:13:45]: thread: 4
[April 27, 2023 12:13:46]: ABCA1 ~ HMDB0006247
[April 27, 2023 12:13:46]: Normalizing Cluster Mean and Calculating Communication Score
[April 27, 2023 12:13:47]: Calculating P-value
[April 27, 2023 12:13:47]: LDLR ~ HMDB0000067
[April 27, 202

In [None]:
print('sensor_exp cutoff: %s'%mebo_obj.cutoff_exp)
print('metabolite_presence cutoff: %s'%mebo_obj.cutoff_met)

sensor_exp cutoff: 2.0
metabolite_presence cutoff: 1.0


In [None]:
# Save results
commu_res.to_csv('/Users/anna.maguza/Desktop/GCA_healthy_epithelial.csv')

#### Visualise MEBOCOST prediction

- Showing the detailed communications (sender-receiver vs metabolite-sensor) in a dot map

In [None]:
mebo_obj.commu_dotmap(
                sender_focus = ['Stem Cells'],
                metabolite_focus = [],
                sensor_focus = [],
                receiver_focus = [],
                and_or = 'and',
                pval_method = 'permutation_test_fdr',
                pval_cutoff = 0.05,
                figsize = 'auto',
                cmap = 'plasma_r',
                node_size_norm = (10, 150),
                save = None,
                show_plot = True,
                comm_score_col = 'Commu_Score',
                comm_score_cutoff = 0,
                swap_axis = False,
                return_fig = False
                )


In [None]:
mebo_obj.FlowPlot(
                pval_method = 'permutation_test_fdr',
                pval_cutoff = 0.05,
                sender_focus = ['Stem Cells'],
                metabolite_focus = ['Dehydroascorbic acid'],
                sensor_focus = [],
                receiver_focus = [],
                remove_unrelevant = False,
                and_or = 'and',
                node_label_size = 8,
                node_alpha = 0.6,
                figsize = 'auto',
                node_cmap = 'tab20',
                line_cmap = 'bwr',
                line_vmin = None,
                line_vmax = 15.5,
                node_size_norm = (20, 150),
                linewidth_norm = (0.5, 5),
                save = None,
                show_plot = True,
                comm_score_col = 'Commu_Score',
                comm_score_cutoff = 0,
                text_outline = False,
                return_fig = False
            )

In [None]:
commu_df = mebo_obj.commu_res.copy()
good_met = commu_df[(commu_df['permutation_test_fdr']<=0.05)]['Metabolite_Name'].sort_values().unique()

mebo_obj.violin_plot(
                    sensor_or_met = good_met[:5], ## only top 5 as example
                    cell_focus = ['Stem Cells'],
                    cmap = 'RdPu',
                    vmin = None,
                    vmax = None,
                    figsize = 'auto',
                    cbar_title = '',
                    save = None,
                    show_plot = True
                    )

In [None]:
good_sensor = commu_df[(commu_df['permutation_test_fdr']<=0.05)]['Sensor'].sort_values().unique()

mebo_obj.violin_plot(
                    sensor_or_met = good_sensor[:5],## only top 5 as example
                    cell_focus = ['Stem Cells'],
                    cmap = 'RdPu',
                    vmin = None,
                    vmax = None,
                    figsize = 'auto',
                    cbar_title = '',
                    save = None,
                    show_plot = True
                    )