# Estimate Metalinks

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import muon as mu
import liana as li

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from itertools import product

In [3]:
import decoupler as dc

Test object

In [4]:
adata = sc.datasets.pbmc68k_reduced()
adata = adata.raw.to_adata()

## Metabolite Enrichment

In [5]:
resource = pd.read_csv("liana/resource/metalinks_resource.csv")

Process net

In [6]:
# replace direction with 1 or -1
met_net = pd.read_csv("liana/resource/metabolite_sets.csv")
met_net['direction'].replace({'degrading': -1, 'producing': 1}, inplace=True)

In [7]:
# NOTE: there are duplicates, Elias should append Sources
met_net = met_net.drop_duplicates(['HMDB', 'GENE', 'direction'])

In [8]:
# NOTE: there are sign-duplicates...
met_net = met_net.groupby(['HMDB', 'GENE']).aggregate({'direction': 'mean'}).reset_index()
# Remove any edge whose direction is not 1 or -1
met_net = met_net[met_net['direction'].isin([-1, 1])]

Metabolite Estimate

In [9]:
dc.run_ulm(adata, net=met_net, source='HMDB', target='GENE', weight='direction', use_raw=False, min_n=3)

In [10]:
met_est = li.fun.obsm_to_adata(adata, 'ulm_estimate')

In [11]:
mdata = mu.MuData({'metabolite':met_est, 'rna':adata})

In [12]:
li.mt.rank_aggregate(mdata, groupby='bulk_labels',
                     multi_kwargs=dict(x_mod='metabolite', y_mod='rna'),
                     n_perms=None, use_raw=False, verbose=True)

`x_mod` will be transformed to zero-inflated min-max scale.
`y_mod` will be transformed to zero-inflated min-max scale.
Using `.X`!
Converting mat to CSR format
Using `.X`!
Using `.X`!
Using resource `consensus`.
0.94 of entities in the resource are missing from the data.
Generating ligand-receptor stats for 700 samples and 808 features
Assuming that counts were `natural` log-normalized!
Running CellPhoneDB
Running Connectome
Running log2FC
Running NATMI
Running SingleCellSignalR
Running CellChat


In [13]:
mdata.uns['liana_res']

Unnamed: 0,source,target,ligand_complex,receptor_complex,lr_means,expr_prod,scaled_weight,lr_logfc,spec_weight,lrscore,lr_probs,magnitude_rank
1092,CD14+ Monocyte,CD56+ NK,TIMP1,CD63,0.572534,0.327786,0.966945,0.441389,0.060475,0.741422,0.441381,1.867635e-08
1106,CD56+ NK,CD56+ NK,SPON2,ITGB2,0.559032,0.310813,1.867714,0.536629,0.121522,0.736293,0.403199,1.492927e-07
898,CD56+ NK,CD14+ Monocyte,SPON2,ITGB2,0.539100,0.290173,1.789968,0.530844,0.113452,0.729568,0.387321,9.332889e-07
1088,CD14+ Monocyte,CD56+ NK,S100A9,ITGB2,0.526580,0.271852,0.835955,0.360086,0.097511,0.723086,0.383827,3.816054e-06
847,CD14+ Monocyte,CD14+ Monocyte,S100A9,ITGB2,0.506648,0.253799,0.758209,0.354301,0.091035,0.716154,0.368241,3.642492e-05
...,...,...,...,...,...,...,...,...,...,...,...,...
317,CD4+/CD25 T Reg,CD4+/CD45RO+ Memory,HLA-DQA2,CD4,0.146821,0.012269,-0.181963,-0.185860,0.006765,0.356802,0.000000,1.000000e+00
318,CD4+/CD25 T Reg,CD4+/CD45RO+ Memory,HLA-DQA1,CD4,0.150066,0.013847,-0.193747,-0.201353,0.007197,0.370807,0.000000,1.000000e+00
321,CD4+/CD25 T Reg,CD4+/CD45RO+ Memory,HLA-DPB1,CD4,0.163064,0.020170,-0.294756,-0.227968,0.007606,0.415635,0.000000,1.000000e+00
54,CD8+ Cytotoxic T,CD4+/CD25 T Reg,TIMP1,CD63,0.143827,0.020458,-0.534240,-0.267517,0.003774,0.417359,0.015612,1.000000e+00


## Check Transporters

Transporter estimate

(Note worth it for this part)

In [None]:
t_net = pd.read_csv("liana/resource/transporter_sets.csv")

In [None]:
# when revirsible == True, else False
t_net['reversibility'] = t_net['reversibility']=='reversible'

In [None]:
in_net = t_net[(t_net['transport_direction'] == 'in') | (t_net['reversibility'])]
in_net

In [None]:

out_net = t_net[(t_net['transport_direction'] == 'out') | (t_net['reversibility'])]
out_net

In [None]:
dc.run_ulm(adata, net=out_net, source='HMDB', target='GENE', weight='direction', use_raw=False, min_n=3)