In [1]:
import scanpy as sc
import pandas as pd
import numpy as np

Load anndata

In [2]:
adata = sc.read_h5ad("test.h5ad")

In [3]:
# lognorm should be the default (expected)
adata.X = adata.layers['logcounts']

Parameters

In [4]:
groupby='label'
de_method='wilcoxon'
resource_name = 'consensus'
complex_policy = 'min'
key_cols = ['source', 'target', 'ligand_complex', 'receptor_complex']
verbose = False

In [5]:
# only relevant if perms are done
n_perms = 1000
seed = 69

Run LIANA PIPE

In [6]:
from liana import liana_pipe

In [7]:
lr_res = liana_pipe(adata=adata, 
                    groupby=groupby, 
                    resource_name=resource_name,
                    de_method=de_method,
                    n_perms=n_perms,
                    seed=seed,
                    verbose=verbose,
                    _key_cols=key_cols,
                    resource=None)

  self.data[key] = value
  next(self.gen)


In [8]:
lr_res.sort_values('ligand_means')

Unnamed: 0,ligand,ligand_complex,ligand_logfc,ligand_means,ligand_zscores,mat_mean,receptor,receptor_complex,receptor_logfc,receptor_means,receptor_zscores,source,target,ligand_means_sums,receptor_means_sums
2288,CSF1,CSF1,-0.044315,0.000000,-0.105409,0.199056,CSF2RA,CSF2RA,-0.055999,0.000000,-0.105409,CD8 T,CD8 T,0.035162,0.040544
2096,BTLA,BTLA,-0.048226,0.000000,-0.105409,0.199056,TNFRSF14,TNFRSF14,-0.520295,0.294117,-0.250196,CD8 T,CD8 T,0.037057,1.424242
2087,IL18BP,IL18BP,-0.132456,0.000000,-0.145695,0.199056,IL6R,IL6R,-0.068199,0.000000,-0.105409,CD8 T,CD8 T,0.087242,0.045404
2086,IL6,IL6,-0.315346,0.000000,-0.149471,0.199056,IL6ST,IL6R_IL6ST,-0.121447,0.000000,-0.146079,CD8 T,CD8 T,0.140459,0.083360
2085,IL6,IL6,-0.315346,0.000000,-0.149471,0.199056,IL6R,IL6R_IL6ST,-0.068199,0.000000,-0.105409,CD8 T,CD8 T,0.140459,0.045404
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1345,B2M,B2M,0.595277,5.810702,0.728883,0.199056,CD247,CD247,-2.154735,0.269538,-0.764050,NK,B,16.490938,3.277351
4178,B2M,B2M,0.595277,5.810702,0.728883,0.199056,CD3G,CD3G,-0.160309,0.196046,-0.106673,NK,NK,16.490938,0.797412
4332,B2M,B2M,0.595277,5.810702,0.728883,0.199056,KLRC1,KLRC1,1.285116,0.642471,0.494169,NK,NK,16.490938,0.893647
2647,B2M,B2M,0.595277,5.810702,0.728883,0.199056,CD3D,CD3D,2.097606,2.190063,0.951525,NK,CD8 T,16.490938,3.070996


Consensus

In [9]:
from liana.steady.scores.consensus import consensus

In [10]:
consensus.complex_cols

['receptor_means',
 'receptor_zscores',
 'ligand_logfc',
 'receptor_logfc',
 'ligand_zscores',
 'ligand_means']

In [11]:
adata = consensus(adata, groupby='label')

  self.data[key] = value
  next(self.gen)


CellPhoneDB
Index(['ligand', 'ligand_complex', 'ligand_means', 'receptor',
       'receptor_complex', 'receptor_means', 'source', 'target'],
      dtype='object')
Connectome
Index(['ligand', 'ligand_complex', 'ligand_means', 'ligand_zscores',
       'receptor', 'receptor_complex', 'receptor_means', 'receptor_zscores',
       'source', 'target'],
      dtype='object')
log2FC
Index(['ligand', 'ligand_complex', 'ligand_logfc', 'ligand_means', 'receptor',
       'receptor_complex', 'receptor_logfc', 'receptor_means', 'source',
       'target'],
      dtype='object')
NATMI
Index(['ligand', 'ligand_complex', 'ligand_means', 'ligand_means_sums',
       'receptor', 'receptor_complex', 'receptor_means', 'receptor_means_sums',
       'source', 'target'],
      dtype='object')
SingleCellSignalR
Index(['ligand', 'ligand_complex', 'ligand_means', 'mat_mean', 'receptor',
       'receptor_complex', 'receptor_means', 'source', 'target'],
      dtype='object')


Robust rank aggregate

## Each Method

CellPhoneDB

In [None]:
from liana import cellphonedb

In [None]:
%%time
adata = cellphonedb(adata, groupby='label', n_perms=1000)

In [None]:
lr_res = adata.uns['liana_res']

In [None]:
lr_res[lr_res['receptor_complex']=='CD8A_CD8B']

NATMI

In [None]:
from liana import natmi

In [None]:
adata = natmi(adata, groupby='label')

In [None]:
adata.uns['liana_res']

SCA

In [None]:
from liana import singlecellsignalr as sca

In [None]:
adata = sca(adata, groupby='label')

In [None]:
adata.uns['liana_res'].sort_values('lr_means', ascending=True)

Connectome

In [None]:
from liana import connectome

In [None]:
adata = connectome(adata, groupby='label')

In [None]:
adata.uns['liana_res'].sort_values('scaled_weight', ascending=False)

logFC

In [None]:
from liana import logfc

In [None]:
adata = logfc(adata, groupby='label')

In [None]:
adata.uns['liana_res'].sort_values('lr_logfc', ascending=False)

In [None]:
np.exp(1)

try w/ real data

In [None]:
adata = sc.datasets.pbmc3k_processed()
labels = adata.obs.louvain
cells = adata.obs_names

In [None]:
adata = sc.datasets.pbmc3k()

In [None]:
adata = adata[[x in cells for x in adata.obs_names]]

In [None]:
adata.obs['label'] = labels

In [None]:
sc.pp.log1p(adata)

In [None]:
%%time
adata = cellphonedb(adata, groupby='label', n_perms=1000)

## LIANA PIPE LINE BY LINE

In [None]:
# I get this from the score object
_complex_cols = ['ligand_means', 'receptor_means']
# change to full list and move to _var
_add_cols = ['ligand', 'receptor',
             'ligand_means_sums', 'receptor_means_sums',
             'ligand_zscores', 'receptor_zscores',
             'ligand_logfoldchanges', 'receptor_logfoldchanges',
             'ligand_logfc', 'receptor_logfc'
             'mat_mean',
             ]

_key_cols = ['source', 'target', 'ligand_complex', 'receptor_complex']

In [None]:
from liana.utils.pre import check_adata, check_if_covered, format_vars, filter_resource
from liana.resource import select_resource
from liana.resource.select_resource import explode_complexes
from liana.steady.liana_pipe import _get_lr
from liana.utils.reassemble_complexes import reassemble_complexes
from scipy.sparse import csr_matrix
from liana.steady._permutations import get_means_perms

I need to double check if every step makes sense, as I build unit tests!!!

For example, are there any duplicated rows (source, target, ligand_complex, receptor_complex)

Double check again if correct subunit is kept, etc

In [None]:
adata = check_adata(adata, True)

In [None]:
# Define idents col name
adata.obs.label = adata.obs[groupby]

Load resource

In [None]:
resource = select_resource(resource_name='consensus')

In [None]:
# Decomplexify
resource = explode_complexes(resource)

In [None]:
# Filter Resource
resource = filter_resource(resource, adata.var_names)

Create entities

In [None]:
entities = np.union1d(np.unique(resource["ligand"]), np.unique(resource["receptor"]))

Check overlap between resource and adata

In [None]:
check_if_covered(entities, adata.var_keys)

Get global mean for SCA before filtering

In [None]:
if 'mat_mean' in _add_cols: # SHOULD BE METHOD NAME!
    adata.uns['mat_mean'] = np.mean(adata.X)

In [None]:
# Filter to only include the relevant genes
adata = adata[:,np.intersect1d(entities, adata.var.index)]
adata

Get Stats

In [None]:
lr_res = _get_lr(adata, resource, key_cols + _complex_cols + _add_cols, de_method)

In [None]:
lr_res

Recomplexify

In [None]:
# If I want to implement anything else but the min as complex policy, I would need to change this:
# temp = temp[[x=="INHBA_INHBB" for x in temp['ligand_complex']]].sort_values('receptor_complex')
# temp = temp[[x=="ACVR1B_ACVR2A" for x in temp['receptor_complex']]]

In [None]:
lr_res = reassemble_complexes(lr_res, _key_cols, _complex_cols)

In [None]:
lr_res

### SCORES

In [None]:
from liana.steady.Method import Method, MethodMeta
from liana.steady.scores.cellphonedb import _cpdb_score

CELLPHONEDB

In [None]:
_cellphonedb = MethodMeta(method_name = "cellphonedb",
                          complex_cols = ['ligand_means', 'receptor_means'], 
                          add_cols = ['ligand', 'receptor'],
                          fun = _cpdb_score,
                          magnitude = 'lr_means',
                          specificity = 'pvals',
                          permute = True,
                          reference = 'Efremova et al., 2020')

In [None]:
cellphonedb = Method(_SCORE=_cellphonedb)

In [None]:
adata = cellphonedb(adata, groupby, seed=666)

In [None]:
adata.uns['liana_res']

logFC re-implement

Magnitude as None

In [None]:
lr_res['logfc'] = lr_res[['ligand_logfoldchanges', 'receptor_logfoldchanges']].mean(1)

In [None]:
lr_res.sort_values(by='logfc', key=abs, ascending=False)

Re-implement Connectome

Both Specificity and Magnitude

In [None]:
lr_res['edge_weight'] = lr_res[['ligand_zscores', 'receptor_zscores']].mean(1)