### Notebook for the cell-cell interaction research of cancer epithelial cells with liana package

- **Developed by:** Anna Maguza
- **Institute of Computational Biology - Computational Health Department - Helmholtz Munich**
- 30th June 2023

### Import required modules

In [2]:
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import liana as li
import anndata as ad

### Set up working environment

In [3]:
%matplotlib inline

In [4]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 180, color_map = 'magma_r', dpi_save = 300, vector_friendly = True, format = 'svg')

-----
anndata     0.8.0
scanpy      1.9.3
-----
PIL                         9.4.0
appnope                     0.1.2
asttokens                   NA
backcall                    0.2.0
cffi                        1.15.1
comm                        0.1.2
cycler                      0.10.0
cython_runtime              NA
dateutil                    2.8.2
debugpy                     1.5.1
decorator                   5.1.1
defusedxml                  0.7.1
entrypoints                 0.4
executing                   0.8.3
fontTools                   4.39.3
h5py                        3.8.0
importlib_resources         NA
ipykernel                   6.19.2
ipython_genutils            0.2.0
jedi                        0.18.1
joblib                      1.2.0
jupyter_server              1.23.6
kiwisolver                  1.4.4
liana                       0.1.7
llvmlite                    0.39.1
matplotlib                  3.7.1
matplotlib_inline           0.1.6
mizani                      0.8.1
mpl_

In [5]:
def X_is_raw(adata):
    return np.array_equal(adata.X.sum(axis=0).astype(int), adata.X.sum(axis=0))

### Data Upload

In [6]:
input = '/Users/anna.maguza/Desktop/Data/Processed_datasets/Cancer_dataset_integration/Labels_transfer/scBalance/Joanito_predicted_labels_with_scBalance_7000.h5ad'
adata = sc.read_h5ad(input)
X_is_raw(adata)

False

In [7]:
# Extract the raw counts
adata = adata.raw.to_adata()
adata

AnnData object with n_obs × n_vars = 35714 × 33287
    obs: 'nFeature_RNA', 'pct_counts_mito', 'Sample_ID', 'Donor_ID', 'sample.origin', 'dataset_x', 'Cell Type', 'iCMS', 'msi', 'dataset_y', 'Sex', 'Tumor Stage', 'MSS/MSI', 'Location', 'Side', 'Group Stage', 'Stage TNM', 'iCMS.transcriptomic', 'iCMS.inferCNV', 'KRAS', 'BRAF', 'TP53', 'APC', 'PIK3CA', 'LymphNode', 'Normal', 'Tumor', 'CMS', 'Age_group', 'Study_name', 'Diagnosis', 'n_genes_by_counts', 'total_counts', 'Library_Preparation_Protocol', 'doublet_scores', 'predicted_doublets', 'doublet_info', 'Predicted Label'
    var: 'feature_types', 'genome'
    uns: 'log1p'

In [8]:
X_is_raw(adata)

True

### Liana Run

In [9]:
# import all individual methods
import liana 
from liana.method import singlecellsignalr, connectome, cellphonedb, natmi, logfc, cellchat, geometric_mean

### Run CellChat

In [10]:
# Save raw counts in raw
adata.raw = adata

In [11]:
adata.obs['Predicted Label'].value_counts()

TA                        22094
Paneth cells               7622
Colonocyte                 2136
Enterocyte                 1661
Tuft cells                 1400
Stem cells                  555
Epithelial cells            103
Goblet cells                 94
Enteroendocrine cells        31
Microfold cell                9
Enterochromaffin cells        6
L cells                       3
Name: Predicted Label, dtype: int64

In [12]:
#run cellchat
cellchat(adata, groupby='Predicted Label', expr_prop=0.1, verbose=True)

Using `.raw`!
6092 features of mat are empty, they will be removed.
Make sure that normalized counts are passed!




The following cell identities were excluded: L cells
0.07 of entities in the resource are missing from the data.
Generating ligand-receptor stats for 35711 samples and 27195 features


100%|██████████| 1000/1000 [28:53<00:00,  1.73s/it]


In [13]:
#Write anndata object to file
adata.write('/Users/anna.maguza/Desktop/Data/Processed_datasets/Cell_cell_interaction/Cancer_epithelial/Liana/Epithelial_cancer_post-liana_cellChat_all_counts.h5ad')

In [None]:
li.pl.dotplot(adata = adata, 
              colour='lr_probs',
              size='cellchat_pvals',
              inverse_size=True, # we inverse sign since we want small p-values to have large sizes
              source_labels=['Stem cells'],
              target_labels=['Enterocyte', 'TA', 'Goblet cells', 'Colonocyte', 'Paneth cells', 'Tuft cells', 'Enterochromaffin cells', 'Enteroendocrine cells', 'Microfold cell', 'Epithelial cells'],
              figure_size=(8, 80),
              # finally, since cpdbv2 suggests using a filter to FPs
              # we filter the pvals column to <= 0.05
              filterby='cellchat_pvals',
              filter_lambda=lambda x: x <= 0.05
             )

In [None]:
li.pl.dotplot(adata = adata, 
              colour='lr_probs',
              size='cellchat_pvals',
              inverse_size=True, # we inverse sign since we want small p-values to have large sizes
              source_labels=['Stem cells'],
              target_labels=['Epithelial cells'],
              figure_size=(5, 30),
              # finally, since cpdbv2 suggests using a filter to FPs
              # we filter the pvals column to <= 0.05
              filterby='cellchat_pvals',
              filter_lambda=lambda x: x <= 0.05
             )