In [None]:
'''
Goal:Compare this dataset to 
A single-cell atlas of the myometrium in human parturition

Author:Carsten Knutsen
Date:231114
conda_env:uterus_sc
Notes: compare bulk DEGs in single cell
'''

In [None]:
import scanpy as sc

# Only needed for processing
import numpy as np
import pandas as pd
import os 
pd.set_option('display.max_rows', 500)
import liana as li

output = '/home/carsten/alvira_bioinformatics/uterus/data/pilot/231212_liana_test'
os.makedirs(output, exist_ok=True)
sc.settings.figdir = output

In [None]:
adata = sc.read('/home/carsten/alvira_bioinformatics/uterus/data/single_cell_files/scanpy_files/uterus_processed_celltyped.gz.h5ad')
adata.raw = adata


In [None]:
li.mt.show_methods()


In [None]:
from liana.mt import rank_aggregate

In [None]:
?rank_aggregate.__call__


In [None]:
rank_aggregate.describe()


In [None]:
li.resource.show_resources()


In [None]:
from liana.method import singlecellsignalr, connectome, cellphonedb, natmi, logfc, cellchat, geometric_mean


In [None]:
cellphonedb(adata, groupby='Cell Subtype', expr_prop=0.1, resource_name='consensus', verbose=True, key_added='cpdb_res')


In [None]:
df = adata.uns['cpdb_res'].copy()


In [None]:
df.loc[(df['target']=='Uterine smooth muscle')&(df['source']=='Uterine smooth muscle')]

In [None]:
markers = pd.read_csv('/home/carsten/alvira_bioinformatics/uterus/data/pilot/240925_atlas_figure_pilot/usm_vsm_custom_pathway_genes.csv')
usm = markers['USM'].dropna().unique().tolist()
vsm = markers['VSM'].dropna().unique().tolist()

In [None]:
usm_comm = df.loc[(df['target']=='Uterine smooth muscle')|(df['source']=='Uterine smooth muscle')]
usm_comm = usm_comm.loc[(usm_comm['ligand'].isin(usm))|(usm_comm['receptor'].isin(usm))]
usm_comm

sc.pl.dotplot(adata,
              [x for x in usm if x in vsm_comm['ligand'].unique().tolist()+ vsm_comm['receptor'].unique().tolist()],
              groupby='Cell Subtype')

In [None]:
vsm_comm = df.loc[(df['target']=='Vascular smooth muscle')|(df['source']=='Vascular smooth muscle')]
vsm_comm = vsm_comm.loc[(vsm_comm['ligand'].isin(vsm))|(vsm_comm['receptor'].isin(vsm))]
vsm_comm = vsm_comm.sort_values(['source','target'])
sc.pl.dotplot(adata,
              [x for x in vsm if x in vsm_comm['ligand'].unique().tolist()+ vsm_comm['receptor'].unique().tolist()],
              groupby='Cell Subtype')

In [None]:
vsm_comm.sort_values('cellphone_pvals')

In [None]:
li.pl.dotplot(adata = adata,
              colour='lr_means',
              size='cellphone_pvals',
              inverse_size=True, # we inverse sign since we want small p-values to have large sizes
              source_labels=['Macrovascular', 'Capillary'],
              target_labels=['Uterine smooth muscle','Vascular smooth muscle'],
              figure_size=(8, 7),
              # finally, since cpdbv2 suggests using a filter to FPs
              # we filter the pvals column to <= 0.05
              filterby='receptor',
              filter_lambda=lambda x: x.startswith('NOTCH'),
              uns_key='cpdb_res' # uns_key to use, default is 'liana_res'
             )

In [None]:
li.pl.dotplot(adata = adata,
              colour='lr_means',
              size='cellphone_pvals',
              inverse_size=True, # we inverse sign since we want small p-values to have large sizes
              source_labels=adata.obs['Cell Subtype'].cat.categories,
              target_labels=['Uterine smooth muscle'],
              figure_size=(8, 7),
              # finally, since cpdbv2 suggests using a filter to FPs
              # we filter the pvals column to <= 0.05
              filterby='receptor',
              filter_lambda=lambda x: x.startswith('CAL'),
              uns_key='cpdb_res' # uns_key to use, default is 'liana_res'
             )

In [None]:
adata

In [None]:
df1 = cellchat(adata, groupby='Cell Subtype', expr_prop=0.1, resource_name='cellchatdb', verbose=True, key_added='cpdb_res')


In [None]:
adata.uns['cpdb_res']

In [None]:
adata

In [None]:
sc.pl.umap(adata,color='COL18A1')

In [None]:
df = adata.uns['cpdb_res'].copy()
df.loc[((df['source']=='Vascular smooth muscle')&
      (df['target']=='Macrovascular')&
       (df['cellchat_pvals']<0.05))].sort_values('lr_probs')

In [None]:
li.pl.dotplot(adata = adata,
              colour='lr_means',
              size='cellphone_pvals',
              inverse_size=True, # we inverse sign since we want small p-values to have large sizes
              source_labels=['Vascular smooth muscle'],
              target_labels=adata.obs['Cell Subtype'].cat.categories,
              figure_size=(8, 7),
              # finally, since cpdbv2 suggests using a filter to FPs
              # we filter the pvals column to <= 0.05
              filterby='lr_means',
              filter_lambda=lambda x: x>2,
              uns_key='cpdb_res' # uns_key to use, default is 'liana_res'
             )