### Notebook for the inference of cell-cell communications on Healthy-CTRL data using `LIANA+`

- **Developed by**: Carlos Talavera-López
- **Würzburg Institute for Systems Immunology, Faculty of Medicine, Julius-Maximilian-Universität Würzburg**
- **Created**: 231109
- **Latest version**: 240508

### Import required modules

In [1]:
import anndata
import numpy as np
import liana as li
import pandas as pd
import scanpy as sc

from liana.mt import rank_aggregate
from liana.method import singlecellsignalr, connectome, cellphonedb, natmi, logfc, cellchat, geometric_mean

### Set up working environment

In [2]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

-----
anndata     0.10.3
scanpy      1.9.6
-----
PIL                 10.1.0
appnope             0.1.3
asttokens           NA
comm                0.2.0
cycler              0.12.1
cython_runtime      NA
dateutil            2.8.2
debugpy             1.8.0
decorator           5.1.1
exceptiongroup      1.1.3
executing           2.0.1
h5py                3.10.0
ipykernel           6.26.0
ipywidgets          8.1.1
jedi                0.19.1
joblib              1.3.2
kiwisolver          1.4.5
liana               1.0.2
llvmlite            0.41.1
matplotlib          3.8.1
mizani              0.9.3
mpl_toolkits        NA
mudata              0.2.3
natsort             8.4.0
numba               0.58.1
numpy               1.26.1
packaging           23.2
pandas              2.1.1
parso               0.8.3
patsy               0.5.3
pexpect             4.8.0
platformdirs        3.11.0
plotnine            0.12.4
prompt_toolkit      3.0.39
psutil              5.9.6
ptyprocess          0.7.0
pure_eval     

### Read in data

In [3]:
adata_all = sc.read_h5ad('../../../data/Marburg_cell_states_locked_ctl231212.raw.h5ad') 
adata_all

AnnData object with n_obs × n_vars = 97573 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', 'IAV_score', 'group', 'Viral_score', 'cell_type', 'cell_states', 'leiden', 'cell_compartment', '_scvi_batch', '_scvi_labels', 'C_scANVI', 'viral_counts', 'infected_status'
    var: 'mt', 'ribo'
    uns: 'cell_states_colors', 'disease_colors', 'group_colors', 'infected_status_colors', 'infection_colors'
    obsm: 'X_scANVI', 'X_scVI', 'X_umap'

### Subset conditions

In [4]:
adata = adata_all[adata_all.obs['group'].isin(['healthy_ctrl'])]
adata

View of AnnData object with n_obs × n_vars = 23667 × 27208
    obs: 'sex', 'age', 'ethnicity', 'PaCO2', 'donor', 'infection', 'disease', 'SMK', 'illumina_stimunr', 'bd_rhapsody', 'n_genes', 'doublet_scores', 'predicted_doublets', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'pct_counts_ribo', 'percent_mt2', 'n_counts', 'percent_chrY', 'XIST-counts', 'S_score', 'G2M_score', 'condition', 'sample_group', 'IAV_score', 'group', 'Viral_score', 'cell_type', 'cell_states', 'leiden', 'cell_compartment', '_scvi_batch', '_scvi_labels', 'C_scANVI', 'viral_counts', 'infected_status'
    var: 'mt', 'ribo'
    uns: 'cell_states_colors', 'disease_colors', 'group_colors', 'infected_status_colors', 'infection_colors'
    obsm: 'X_scANVI', 'X_scVI', 'X_umap'

### Check `LIANA+` available methods

In [5]:
li.mt.show_methods()

Unnamed: 0,Method Name,Magnitude Score,Specificity Score,Reference
0,CellPhoneDB,lr_means,cellphone_pvals,"Efremova, M., Vento-Tormo, M., Teichmann, S.A...."
0,Connectome,expr_prod,scaled_weight,"Raredon, M.S.B., Yang, J., Garritano, J., Wang..."
0,log2FC,,lr_logfc,"Dimitrov, D., Türei, D., Garrido-Rodriguez, M...."
0,NATMI,expr_prod,spec_weight,"Hou, R., Denisenko, E., Ong, H.T., Ramilowski,..."
0,SingleCellSignalR,lrscore,,"Cabello-Aguilar, S., Alame, M., Kon-Sun-Tack, ..."
0,CellChat,lr_probs,cellchat_pvals,"Jin, S., Guerrero-Juarez, C.F., Zhang, L., Cha..."
0,Rank_Aggregate,magnitude_rank,specificity_rank,"Dimitrov, D., Türei, D., Garrido-Rodriguez, M...."
0,Geometric Mean,lr_gmeans,gmean_pvals,CellPhoneDBv2's permutation approach applied t...


### Run aggregated estimate 

In [6]:
li.mt.rank_aggregate(adata, groupby = 'cell_states', expr_prop = 0.1, verbose = True, use_raw = False)
adata.uns['liana_res'].head()

Using `.X`!
120 features of mat are empty, they will be removed.
Make sure that normalized counts are passed!
The following cell identities were excluded: OASiav_Ciliated, SCGB1+KRT5-FOXA1+iav_Club, iav-lip_Club, ifn_Basal, ifn_Goblet
['NC_026431.1', 'NC_026432.1', 'NC_026433.1', 'NC_026434.1', 'NC_026435.1', 'NC_026436.1', 'NC_026437.1', 'NC_026438.1'] contain `_`. Consider replacing those!
Using resource `consensus`.
0.09 of entities in the resource are missing from the data.


Generating ligand-receptor stats for 23650 samples and 27088 features
... as `zero_center=True`, sparse input is densified and may lead to large memory consumption




Assuming that counts were `natural` log-normalized!




Running CellPhoneDB


100%|██████████| 1000/1000 [01:21<00:00, 12.29it/s]


Running Connectome
Running log2FC




Running NATMI
Running SingleCellSignalR
Running CellChat


100%|██████████| 1000/1000 [11:49<00:00,  1.41it/s]


Unnamed: 0,source,target,ligand_complex,receptor_complex,lr_means,cellphone_pvals,expr_prod,scaled_weight,lr_logfc,spec_weight,lrscore,lr_probs,cellchat_pvals,specificity_rank,magnitude_rank
269278,mixed_Goblet2,MHCII+Club,SLPI,PLSCR1,782.273499,0.0,28474.570312,2.449424,,0.013771,0.991049,0.000451,0.0,,1.408636e-16
103885,mixed_Goblet2,DHRS9+Club,SLPI,PLSCR1,782.110657,0.0,27970.90625,2.413791,,0.013528,0.99097,0.000424,0.0,,3.803305e-15
620876,mixed_Goblet2,p53_Ciliated,SLPI,PLSCR1,778.007385,0.0,15282.640625,1.516125,,0.007391,0.987822,0.000204,0.0,,1.408616e-13
371848,mixed_Goblet2,OMG+Ciliated,SLPI,PLSCR1,776.935425,0.0,11967.80957,1.281608,,0.005788,0.98626,0.000168,0.0,,5.769639e-13
21043,mixed_Goblet2,APOD+Ciliated,SLPI,PLSCR1,777.012634,0.0,12206.550781,1.298499,,0.005903,0.986393,0.000158,0.0,,8.214949e-13


In [7]:
my_plot = li.pl.dotplot(adata = adata,
                        colour='magnitude_rank',
                        inverse_colour = True,
                        size = 'magnitude_rank',
                        inverse_size=False,
                        source_labels=['SERPINE2+Basal'],
                        target_labels = ['RARRES1+lip_Goblet','MHCII+Club', 'Ionocyte', 'mixed_Goblet1', 'mixed_Goblet2'],
                        filter_fun=lambda x: x['magnitude_rank'] <= 0.01,
                        figure_size = (10, 10),
                        cmap = 'magma'
                       )
my_plot

TypeError: dotplot() got an unexpected keyword argument 'filter_fun'

In [None]:
rank_aggregate.describe()

In [None]:
adata.obs['cell_states'].cat.categories

In [None]:
li.pl.dotplot(adata = adata,
              colour = 'magnitude_rank',
              size = 'magnitude_rank',
              inverse_size = True,
              inverse_colour = True,
              source_labels = ['SERPINE2+Basal'],
              target_labels = ['KRT17+Goblet', 'MHCII+Club'],
              top_n = 10,
              orderby = 'magnitude_rank',
              orderby_ascending = True,
              figure_size = (10, 10)
             )

In [None]:
methods = [logfc, geometric_mean]
new_rank_aggregate = li.mt.AggregateClass(li.mt.aggregate_meta, methods=methods)

In [None]:
new_rank_aggregate(adata,
                   groupby='cell_states',
                   expr_prop=0.1,
                   verbose=True,
                   # Note that with this option, we don't perform permutations
                   # and hence we exclude the p-value for geometric_mean, as well as specificity_rank
                   n_perms=None,
                   use_raw=False,
                   )

In [None]:
adata.uns['liana_res'].head()