# LIANA tumor vs normal core atlas v0

## Libraries

In [1]:
import numpy as  np
import pandas as pd
import scanpy as sc
import decoupler as dc
import liana as li
from liana.method import singlecellsignalr, connectome, cellphonedb, natmi, logfc, cellchat, geometric_mean
import sc_atlas_helpers as ah
from tqdm.auto import tqdm
import contextlib
import os
import statsmodels.stats.multitest
import numpy as np
from anndata import AnnData
import scipy.sparse

  from .autonotebook import tqdm as notebook_tqdm


## Define variables, paths and comparison tumor vs normal

In [2]:
resDir ="/data/projects/2022/CRCA/results/v1/final/liana_cell2cell/h5ads/updated/"

In [3]:
resDir

'/data/projects/2022/CRCA/results/v1/final/liana_cell2cell/h5ads/updated/'

In [4]:
adata = sc.read_h5ad("/data/projects/2022/CRCA/results/v1/downstream_analyses/Prepare_de_analysis/artifacts/paired_tumor_normal-adata.h5ad")

In [5]:
adata.obs.sample_type.value_counts()

sample_type
tumor     1557400
normal     775115
Name: count, dtype: int64

In [6]:
adata.obs.sample_type.value_counts()

sample_type
tumor     1557400
normal     775115
Name: count, dtype: int64

In [7]:
adata.obs.cell_type_coarse.value_counts()

cell_type_coarse
Plasma_cell             362084
T_cell_CD8              355772
Cancer_non_stem_like    323487
T_cell_CD4              264011
Cancer_stem_like        251557
B_cell                  171676
Fibroblast              133488
Macrophage               86680
Endothelial_cell         73950
Granulocyte              72035
Treg                     62290
Pericyte                 44792
Monocyte                 44221
Dendritic_cell           27209
gamma_delta              19471
NK                       14523
Schwann_cell             13397
Tuft                      7040
ILC                       2541
Enteroendocrine           1909
NKT                        382
Name: count, dtype: int64

In [8]:
adata.obs.cell_type_middle.value_counts()

cell_type_middle
Cancer cell              386113
Plasma cell              362084
CD8                      355772
CD4                      264011
B cell                   171676
Fibroblast               133488
Epithelial cell          114527
Macrophage                86680
Endothelial cell          73950
Treg                      62290
Pericyte                  44792
Monocyte                  44221
Goblet                    38764
Epithelial progenitor     35640
Neutrophil                34652
Mast cell                 29639
Dendritic cell            27209
gamma-delta               19471
NK                        14523
Schwann cell              13397
Eosinophil                 7744
Tuft                       7040
ILC                        2541
Enteroendocrine            1909
NKT                         382
Name: count, dtype: int64

In [9]:
adata.obs.cell_type_fine.value_counts()

cell_type_fine
CD8                       349274
Plasma IgA                289016
CD4                       259607
Cancer Colonocyte-like    137590
Cancer TA-like            119040
B cell activated          101749
Colonocyte                 99415
Cancer Crypt-like          96877
Macrophage                 82153
Fibroblast S3              75590
Treg                       62290
Plasma IgG                 60684
Pericyte                   44792
Goblet                     38764
Neutrophil                 34274
B cell naive               34015
Endothelial arterial       33982
Endothelial venous         33713
Monocyte classical         32405
Cancer Goblet-like         30802
Mast cell                  29639
Fibroblast S1              29469
Fibroblast S2              28429
TA progenitor              26512
GC B cell                  23381
gamma-delta                19471
Colonocyte BEST4           15112
NK                         14523
Schwann cell               13397
B cell memory              1

In [10]:
set(adata.obs.cell_type_fine)

{'B cell activated',
 'B cell memory',
 'B cell naive',
 'CD4',
 'CD4 cycling',
 'CD8',
 'CD8 cycling',
 'Cancer BEST4',
 'Cancer Colonocyte-like',
 'Cancer Crypt-like',
 'Cancer Goblet-like',
 'Cancer TA-like',
 'Colonocyte',
 'Colonocyte BEST4',
 'Crypt cell',
 'DC mature',
 'DC3',
 'Endothelial arterial',
 'Endothelial lymphatic',
 'Endothelial venous',
 'Enteroendocrine',
 'Eosinophil',
 'Fibroblast S1',
 'Fibroblast S2',
 'Fibroblast S3',
 'GC B cell',
 'Goblet',
 'Granulocyte progenitor',
 'ILC',
 'Macrophage',
 'Macrophage cycling',
 'Mast cell',
 'Monocyte classical',
 'Monocyte non-classical',
 'Myeloid progenitor',
 'NK',
 'NKT',
 'Neutrophil',
 'Pericyte',
 'Plasma IgA',
 'Plasma IgG',
 'Plasma IgM',
 'Plasmablast',
 'Schwann cell',
 'TA progenitor',
 'Treg',
 'Tuft',
 'cDC1',
 'cDC2',
 'gamma-delta',
 'pDC'}

In [12]:
adata = adata[(adata.obs["sample_type"]=="tumor") & ~(adata.obs["sample_type"].isin(["Enteroendocrine","NKT"]))].copy()

In [13]:
# %%
# Make epithelial-tumor labels the same for comparisson
cluster_annot = {
    "Monocyte classical": "Monocyte classical",
    "Monocyte non-classical": "Monocyte non-classical",
    "Macrophage": "Macrophage",
    "Macrophage cycling": "Macrophage cycling",
    "Myeloid progenitor": "Myeloid progenitor",
    "cDC1": "cDC1",
    "cDC2": "cDC2",
    "DC3": "DC3",
    "pDC": "pDC",
    "DC mature": "DC mature",
    "Granulocyte progenitor": "Neutrophil",
    "Neutrophil": "Neutrophil",
    "Eosinophil": "Eosinophil",
    "Mast cell": "Mast cell",
    "Platelet": "Platelet",
    "CD4": "T cell CD4",
    "Treg": "T cell regulatory",
    "CD8": "T cell CD8",
    "NK": "NK",
    "ILC": "ILC",
    "gamma-delta": "gamma-delta",
    "NKT": "NKT",
    "CD4 naive": "T cell CD4 naive",
    "CD8 naive": "T cell CD8 naive",
    "CD4 stem-like": "T cell CD4 stem-like",
    "CD8 stem-like": "T cell CD8 stem-like",
    "CD4 cycling": "T cell CD4 cycling",
    "CD8 cycling": "T cell CD8 cycling",
    "GC B cell": "GC B cell",
    "B cell naive": "B cell naive",
    "B cell activated naive": "B cell activated",
    "B cell activated": "B cell activated",
    "B cell memory": "B cell memory",
    "Plasma IgA": "Plasma IgA",
    "Plasma IgG": "Plasma IgG",
    "Plasma IgM": "Plasma IgM",
    "Plasmablast": "Plasmablast",
    "Crypt cell": "Cancer Crypt-like",
    "TA progenitor": "Cancer TA-like",
    "Colonocyte": "Cancer Colonocyte-like",
    "Colonocyte BEST4": "Cancer BEST4",
    "Goblet": "Cancer Goblet-like",
    "Tuft": "Tuft",
    "Enteroendocrine": "Enteroendocrine",
    "Cancer Colonocyte-like": "Cancer Colonocyte-like",
    "Cancer BEST4": "Cancer BEST4",
    "Cancer Goblet-like": "Cancer Goblet-like",
    "Cancer Crypt-like": "Cancer Crypt-like",
    "Cancer TA-like": "Cancer TA-like",
    "Cancer cell circulating": "Cancer cell circulating",
    "Endothelial venous": "Endothelial venous",
    "Endothelial arterial": "Endothelial arterial",
    "Endothelial lymphatic": "Endothelial lymphatic",
    "Fibroblast S1": "Fibroblast S1",
    "Fibroblast S2": "Fibroblast S2",
    "Fibroblast S3": "Fibroblast S3",
    "Pericyte": "Pericyte",
    "Schwann cell": "Schwann cell",
    "Hepatocyte": "Hepatocyte",
    "Fibroblastic reticular cell": "Fibroblastic reticular cell",
    "Epithelial reticular cell": "Epithelial reticular cell",
}
adata.obs["cell_type"] = (
    adata.obs["cell_type_fine"].map(cluster_annot)
)

# %%
cluster_annot = {
    "Monocyte classical": "Monocyte",
    "Monocyte non-classical": "Monocyte",
    "Macrophage": "Macrophage",
    "Macrophage cycling": "Macrophage",
    "Myeloid progenitor": "Dendritic cell",
    "cDC1": "Dendritic cell",
    "cDC2": "Dendritic cell",
    "DC3": "Dendritic cell",
    "pDC": "Dendritic cell",
    "DC mature": "Dendritic cell",
    "Granulocyte progenitor": "Neutrophil",
    "Neutrophil": "Neutrophil",
    "Eosinophil": "Eosinophil",
    "Mast cell": "Mast cell",
    "Platelet": "Platelet",
    "CD4": "T cell CD4",
    "Treg": "T cell CD4",
    "CD8": "T cell CD8",
    "NK": "NK",
    "ILC": "ILC",
    "gamma-delta": "gamma-delta",
    "NKT": "NKT",
    "CD4 naive": "T cell CD4",
    "CD8 naive": "T cell CD8",
    "CD4 stem-like": "T cell CD4",
    "CD8 stem-like": "T cell CD8",
    "CD4 cycling": "T cell CD4",
    "CD8 cycling": "T cell CD8",
    "GC B cell": "B cell",
    "B cell naive": "B cell",
    "B cell activated naive": "B cell",
    "B cell activated": "B cell",
    "B cell memory": "B cell",
    "Plasma IgA": "Plasma cell",
    "Plasma IgG": "Plasma cell",
    "Plasma IgM": "Plasma cell",
    "Plasmablast": "Plasma cell",
    "Crypt cell": "Cancer stem-like",
    "TA progenitor": "Cancer stem-like",
    "Colonocyte": "Cancer non-stem-like",
    "Colonocyte BEST4": "Cancer non-stem-like",
    "Goblet": "Cancer non-stem-like",
    "Tuft": "Tuft",
    "Enteroendocrine": "Enteroendocrine",
    "Cancer Colonocyte-like": "Cancer cell",
    "Cancer BEST4": "Cancer cell",
    "Cancer Goblet-like": "Cancer cell",
    "Cancer Crypt-like": "Cancer cell",
    "Cancer TA-like": "Cancer cell",
    "Cancer cell circulating": "Cancer cell circulating",
    "Endothelial venous": "Endothelial cell",
    "Endothelial arterial": "Endothelial cell",
    "Endothelial lymphatic": "Endothelial cell",
    "Fibroblast S1": "Fibroblast",
    "Fibroblast S2": "Fibroblast",
    "Fibroblast S3": "Fibroblast",
    "Pericyte": "Pericyte",
    "Schwann cell": "Schwann cell",
    "Hepatocyte": "Hepatocyte",
    "Fibroblastic reticular cell": "Fibroblastic reticular cell",
    "Epithelial reticular cell": "Epithelial reticular cell",
}
adata.obs["cell_type_coarse"] = (
    adata.obs["cell_type_fine"].map(cluster_annot)
)

# %%
adata.obs["cell_type_lineage"] = adata.obs["cell_type_coarse"].astype(str).replace(
    {
        "Cancer non-stem-like": "Cancer cell",
        "Cancer stem-like": "Cancer cell",
        "Cancer cell circulating": "Cancer cell",
        "T cell CD8": "T cell",
        "T cell CD4": "T cell",
        "gamma-delta": "T cell",
        "NKT": "T cell",
        "Plasma cell": "B cell",
        "Macrophage": "Myeloid cell",
        "Monocyte": "Myeloid cell",
        "Dendritic cell": "Myeloid cell",
        "Granulocyte": "Myeloid cell",
        "Fibroblast": "Stromal cell",
        "Endothelial cell": "Stromal cell",
        "Pericyte": "Stromal cell",
        "Schwann cell": "Stromal cell",
    }
)

In [14]:
set(adata.obs.cell_type)

{'B cell activated',
 'B cell memory',
 'B cell naive',
 'Cancer BEST4',
 'Cancer Colonocyte-like',
 'Cancer Crypt-like',
 'Cancer Goblet-like',
 'Cancer TA-like',
 'DC mature',
 'DC3',
 'Endothelial arterial',
 'Endothelial lymphatic',
 'Endothelial venous',
 'Enteroendocrine',
 'Eosinophil',
 'Fibroblast S1',
 'Fibroblast S2',
 'Fibroblast S3',
 'GC B cell',
 'ILC',
 'Macrophage',
 'Macrophage cycling',
 'Mast cell',
 'Monocyte classical',
 'Monocyte non-classical',
 'Myeloid progenitor',
 'NK',
 'NKT',
 'Neutrophil',
 'Pericyte',
 'Plasma IgA',
 'Plasma IgG',
 'Plasma IgM',
 'Plasmablast',
 'Schwann cell',
 'T cell CD4',
 'T cell CD4 cycling',
 'T cell CD8',
 'T cell CD8 cycling',
 'T cell regulatory',
 'Tuft',
 'cDC1',
 'cDC2',
 'gamma-delta',
 'pDC'}

In [15]:
set(adata.obs.cell_type_coarse)

{'B cell',
 'Cancer cell',
 'Dendritic cell',
 'Endothelial cell',
 'Enteroendocrine',
 'Eosinophil',
 'Fibroblast',
 'ILC',
 'Macrophage',
 'Mast cell',
 'Monocyte',
 'NK',
 'NKT',
 'Neutrophil',
 'Pericyte',
 'Plasma cell',
 'Schwann cell',
 'T cell CD4',
 'T cell CD8',
 'Tuft',
 'gamma-delta'}

In [16]:
adata[adata.obs["sample_type"]=="tumor"].obs.cell_type_coarse.value_counts()

cell_type_coarse
Cancer cell         386113
T cell CD8          246021
T cell CD4          229632
Plasma cell         192909
B cell               98174
Fibroblast           86281
Macrophage           72972
Endothelial cell     59723
Monocyte             39388
Pericyte             33936
Neutrophil           33055
Dendritic cell       21755
Mast cell            18083
NK                   11491
gamma-delta          11191
Schwann cell          6551
Eosinophil            5562
Tuft                  1916
ILC                   1538
Enteroendocrine        817
NKT                    292
Name: count, dtype: int64

In [17]:
set(adata.obs.cell_type_fine)

{'B cell activated',
 'B cell memory',
 'B cell naive',
 'CD4',
 'CD4 cycling',
 'CD8',
 'CD8 cycling',
 'Cancer BEST4',
 'Cancer Colonocyte-like',
 'Cancer Crypt-like',
 'Cancer Goblet-like',
 'Cancer TA-like',
 'DC mature',
 'DC3',
 'Endothelial arterial',
 'Endothelial lymphatic',
 'Endothelial venous',
 'Enteroendocrine',
 'Eosinophil',
 'Fibroblast S1',
 'Fibroblast S2',
 'Fibroblast S3',
 'GC B cell',
 'Granulocyte progenitor',
 'ILC',
 'Macrophage',
 'Macrophage cycling',
 'Mast cell',
 'Monocyte classical',
 'Monocyte non-classical',
 'Myeloid progenitor',
 'NK',
 'NKT',
 'Neutrophil',
 'Pericyte',
 'Plasma IgA',
 'Plasma IgG',
 'Plasma IgM',
 'Plasmablast',
 'Schwann cell',
 'Treg',
 'Tuft',
 'cDC1',
 'cDC2',
 'gamma-delta',
 'pDC'}

## LIANA- rank agregate

In [None]:
adata.layers["log1p_norm"] = adata.X.copy()
sc.pp.normalize_total(adata, target_sum=1e6, layer="log1p_norm")
sc.pp.log1p(adata, base=6, layer="log1p_norm")

In [None]:
# Run rank_aggregate 
li.mt.rank_aggregate(adata, groupby='cell_type_coarse', expr_prop=0.1,resource_name='consensus',  verbose=True,key_added='rank_aggregate', n_jobs=6, layer = "log1p_norm", use_raw = False)

In [None]:
#Save adata with new ranked information
adata.write_h5ad(f"{resDir}/adata_rank_agregate.h5ad")