In [None]:
# fine annotate lineage 
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import os,sys
import anndata
import warnings 
warnings.filterwarnings('ignore')

In [None]:
sc.logging.print_header()
import matplotlib.pyplot as plt
sc.settings.set_figure_params(dpi = 200, color_map = 'RdYlBu_r', dpi_save = 300, format = 'pdf',fontsize=5)
plt.rcParams["figure.figsize"] = [6,6]
# sns.set_palette('colorblind')

# integrate spatial object 

In [None]:
adata_paed = sc.read_h5ad('')

In [None]:
# add 5% quantile, representing confident cell abundance, 'at least this amount is present', 
# to adata.obs with nice names for plotting
adata_paed.obs[adata_paed.uns['mod']['factor_names']] = adata_paed.obsm['q05_cell_abundance_w_sf']

In [None]:
adata_paed.obs['tot_cell_abundance'] = adata_paed.uns["mod"]["post_sample_means"]["w_sf"].sum(1).flatten()
adata_paed.obs['detection_sensit']  = adata_paed.uns["mod"]["post_sample_q05"]["detection_y_s"]

In [None]:
adata_paed_filt = adata_paed[adata_paed.obs['tot_cell_abundance']>30,:].copy()
adata_paed_filt = adata_paed_filt[adata_paed_filt.obs['n_genes_by_counts']>1000,:].copy()

In [None]:
## remove meduala less lobules with high confidence
adata_paed_filt.obs['full_lobule'] = 0
for s in adata_paed_filt.obs['SampleID'].cat.categories.tolist():
    sample = adata_paed_filt[adata_paed_filt.obs['SampleID'].isin([s])]
    for l in sample.obs['annotations_lobules_0'].cat.categories:
        lobule = sample.obs['annotations_lobules_0'] == l
        med = np.where(sample.obs['annotations_level_0'][lobule] == 'Medulla')[0]
        cortex = np.where(sample.obs['annotations_level_0'][lobule] == 'Cortex')[0]
        if (cortex.size>=5) & (med.size>=5):
            adata_paed_filt.obs['full_lobule'][lobule.iloc[np.where(lobule)].index] = 1
adata_paed_filt.obs['full_lobule'] = adata_paed_filt.obs['full_lobule'].astype('category')
adata_paed_filt.obs['full_lobule'].value_counts()
adata_paed_filt = adata_paed_filt[adata_paed_filt.obs['full_lobule']==1].copy()


In [None]:
celltypes = adata_paed_filt.uns['mod']['factor_names']
cellab_paed = sc.AnnData(adata_paed_filt.obs[celltypes], 
                         obs = adata_paed_filt.obs.drop(celltypes, axis = 1),obsm = adata_paed_filt.obsm)
sc.pp.normalize_total(cellab_paed,target_sum=100)
cellab_paed.var_names

In [None]:
# version 3 hybrid
import scipy 
cma_cluster_exp = {}
cma_cluster = {}
for c in cellab_paed.var_names:
    # find the spots that have the highest expression of that cell
    quantile = np.quantile(cellab_paed.X[:,np.where(cellab_paed.var_names==c)],0.99)
    cells_above = np.where(cellab_paed.X[:,np.where(cellab_paed.var_names==c)]>quantile)[0]
    obs_ind = np.where(cellab_paed.obs.columns == 'cma_v2')[0]
    cell_abn = np.squeeze(cellab_paed.X[cells_above,np.where(cellab_paed.var_names==c)])                      
    # get the cma value for these 
    cma_value =  np.squeeze(np.array(cellab_paed.obs.iloc[cells_above,obs_ind]))
    cma_cluster[c] = np.dot(cell_abn,cma_value)/np.sum(cell_abn)
    cma_cluster_exp[c] = np.exp2(cma_cluster[c]-0.16)-1
  

In [None]:
plt.hist(cma_cluster.values(),bins=100)
plt.xlim((-0.65,0.65))
plt.show()

plt.hist(cma_cluster_exp.values(),bins=100)
plt.xlim((-0.65,0.65))
plt.show()

In [None]:
# map clusters back to cells
adata_paed_cells = sc.read_h5ad('')
# adata_paed_cells

In [None]:
adata_paed_cells.obs['mean_hyper_cma_exp'] = adata_paed_cells.obs['hyper_leiden'].map(cma_cluster_exp)
adata_paed_cells.obs['mean_hyper_cma'] = adata_paed_cells.obs['hyper_leiden'].map(cma_cluster)
adata_paed_cells.obs['mean_hyper_cma_exp'] = adata_paed_cells.obs['mean_hyper_cma_exp'].astype('float32')
adata_paed_cells.obs['mean_hyper_cma'] = adata_paed_cells.obs['mean_hyper_cma'].astype('float32')


In [None]:
adata_paed_cells.obs.to_csv('/nfs/team205/ny1/ThymusSpatialAtlas/new_Figure6/cite_paed_hyper_mapping.csv')