# LIANA tumor vs normal core atlas v0

## Libraries

In [1]:
import numpy as  np
import pandas as pd
import scanpy as sc
import decoupler as dc
import liana as li
from liana.method import singlecellsignalr, connectome, cellphonedb, natmi, logfc, cellchat, geometric_mean
import sc_atlas_helpers as ah
from tqdm.auto import tqdm
import contextlib
import os
import statsmodels.stats.multitest
import numpy as np
from anndata import AnnData
import scipy.sparse

  from .autonotebook import tqdm as notebook_tqdm


## Define variables, paths and comparison tumor vs normal

In [2]:
comparison="tumor_normal"
subset = "core_atlas" 
cell_type_oi = "Cancer cell"
n_top_ligands = 30

In [3]:
perturbation = comparison.split("_")[0].upper()
baseline = comparison.split("_")[1].upper()
title_plot = f"{perturbation} vs {baseline}: {cell_type_oi}, top {n_top_ligands} DE ligands"
cell_type_oi = cell_type_oi.replace(" ","")
save_name_plot =  f"{perturbation}_vs_{baseline}_{cell_type_oi}_top_{n_top_ligands}_DE_ligands"

In [4]:
dataDir = "/data/projects/2022/CRCA/results/v1/downstream_analyses/Prepare_de_analysis/artifacts/"
resDir ="/data/projects/2022/CRCA/results/v1/final/liana_cell2cell/h5ads/updated/"

In [5]:
resDir

'/data/projects/2022/CRCA/results/v1/final/liana_cell2cell/h5ads/updated/'

In [None]:
adata = sc.read_h5ad("/data/projects/2022/CRCA/results/v1/downstream_analyses/Prepare_de_analysis/artifacts/paired_tumor_normal-adata.h5ad")

In [None]:
adata

In [None]:
#adata_original = adata.copy()

In [None]:
adata.obs.sample_type.value_counts()

In [None]:
adata.obs.cell_type_coarse.value_counts()

In [None]:
adata.obs.cell_type_middle.value_counts()

In [None]:
adata.obs.cell_type_fine.value_counts()

In [None]:
set(adata.obs.cell_type_fine)


In [None]:
adata.obs['cell_type_fine'].replace({
    'Cancer BEST4': 'Cancer cell',
    'Cancer Colonocyte-like': 'Cancer cell',
    'Cancer Crypt-like': 'Cancer cell',
    'Cancer Goblet-like': 'Cancer cell',
    'Cancer TA-like': 'Cancer cell'
}, inplace=True)

In [None]:
set(adata.obs.cell_type_fine)


## Pseudobulk

In [None]:
## Filter adata for sample_type only tumor & normal 
adata = adata[adata.obs.sample_type.isin(["tumor","normal"])]

In [None]:
adata.obs["cell_type_new"] =  "epithelial_cancer"

In [None]:
## Filter adata to have paired samples only 
#filtered_indices = adata.obs.groupby('patient_id').filter(lambda x: len(set(x['sample_type'])) >= 2).index
#adata = adata[filtered_indices] #paired data

In [None]:
# Pseudobulk 
groups_col ="sample_type" # tumor vs normal 
sample_col="sample_id" 
layer="counts"
pseudobulk = [
    (
        group,
        dc.get_pseudobulk(
            adata[adata.obs[groups_col] == group],
            sample_col=sample_col,
            groups_col=[groups_col,"cell_type_fine","patient_id","dataset"],
            layer=layer,
            mode="sum",
            min_prop=0.05,
            min_cells=10,
            min_counts=500,
            min_smpls=10,
        ),
    )
    for group in adata.obs[groups_col].unique()
]

In [None]:
## Create count matrix and samplesheet for each sample_type: tumor & normal 
#for group, pdata in pseudobulk:
#    group = group.replace(" ","_")
#    if pdata.obs["sample_id"].nunique() <= 5:
#        print(f"Cell type {group} does not have samples in all groups")
#        break
#    else:
#        pdata.var_names.name = "gene_id"
#
#        colData = pdata.obs
#        colData.index.name = "sample_col"
#
#        colData.to_csv(f"{resDir}/02_pseudobulk/{group}_colData.csv")
#        rowData = pdata.var[["Geneid", "GeneSymbol", "Chromosome", "Class", "Length"]]
#        rowData.to_csv(f"{resDir}/02_pseudobulk/{group}_rowData.csv")
#        count_mat = pdata.to_df().T
#        count_mat.index.name = "gene_id"
#        count_mat.to_csv(f"{resDir}/02_pseudobulk/{group}_count_mat.csv")

## LIANA- rank agregate

In [None]:
adata.layers["log1p_norm"] = adata.X.copy()
sc.pp.normalize_total(adata, target_sum=1e6, layer="log1p_norm")
sc.pp.log1p(adata, base=6, layer="log1p_norm")

In [None]:
# Run rank_aggregate 
li.mt.rank_aggregate(adata, groupby='cell_type_fine', expr_prop=0.1,resource_name='consensus',  verbose=True,key_added='rank_aggregate', layer = "log1p_norm", use_raw = False)

In [None]:
#Save adata with new ranked information
adata.write_h5ad(f"{resDir}/adata_rank_agregate.h5ad")