<!--  -->
# RNA Velocity & Pseudotime
Adaqpted from Michael Sterr

2024-07-10


# Setup


In [None]:
# General
import scipy as sci
import numpy as np
import pandas as pd
import logging
import time
import pickle
from itertools import chain
import h5py
import scipy.sparse as sparse
import anndata as ad
import gc
import scipy.stats as stats
import torch

# Plotting
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.colors as mcolors
from matplotlib import rcParams
from matplotlib.pyplot import rc_context
from matplotlib import cm
import seaborn as sb

# Analysis
import scanpy as sc
import scanpy.external as sce
import scvi

import scvelo as scv
import cellrank as cr

In [None]:
# Warnings
import warnings
warnings.filterwarnings('ignore') #(action='once')

## setup matplotlib

In [None]:
# Settings

## Directory
#base_dir = '/mnt/md0/Projects/Miscellaneous/Helene/'
base_dir = '/mnt/hdd/Notebooks/Gut_project/'
sc.settings.figdir = base_dir + 'Figures'
sc.settings.cachedir = base_dir + 'Cache'

## Scanpy settings
sc.settings.verbosity = 3
sc.logging.print_header()
sc.logging.print_versions()

import warnings
warnings.filterwarnings('ignore')

In [None]:
%run utils.ipynb

In [None]:
mymap = load_RdOrYl_cmap_settings(transparent=False)

# Functions

In [None]:
def plot_composition(adata, 
x_key=None, 
y_key=None, 
x_labels = None,
y_labels = None,
y_colors = None,
width = 0.85,       # the width of the bars: can also be len(x) sequence
x_rotation = 0,
y_lim_offset = 2.5,
x_lim_offset = 0.45,
figsize= (6, 4),
save=None):
    with rc_context({'figure.figsize': figsize}): #rcParams['figure.figsize']=(6,4)
        if (x_labels == None):
            x_labels = list(adata.obs[x_key].cat.categories)
        
        if (y_labels == None):
            y_labels = list(adata.obs[y_key].cat.categories)
        
        if (y_colors == None):
            y_colors = list(adata.uns[y_key + '_colors'])
            
        dic = {'x_labels':x_labels}
        
        for y_label in y_labels:
            x_values = []
            for x_label in x_labels:
                x_value = adata.obs[y_key][adata.obs[x_key]==x_label].value_counts()[y_label]/adata.obs[y_key][adata.obs[x_key]==x_label].value_counts().sum()*100
                x_values.append(x_value)
            dic[y_label] = x_values
        
        df = pd.DataFrame(data = dic)

        ax = df.plot(x='x_labels', kind='bar', stacked=True, width=width, edgecolor='0', linewidth=0.5, color=y_colors)

        ax.set_ylabel('%')
        ax.set_xlabel('')
        ax.set_title(y_key + ' by ' + x_key)
        ax.axes.set_xticklabels(labels=x_labels, rotation=x_rotation)
        ax.legend(bbox_to_anchor=(1, .5),loc='center left', edgecolor='1')

        plt.ylim([-y_lim_offset,100+y_lim_offset])
        plt.xlim([-1+x_lim_offset,len(x_labels)-x_lim_offset])

        plt.show()

        if save is not None:
            plt.savefig(save)
        
    return(df)

# Read AnnData

In [None]:
adata = sc.read_h5ad('adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated.h5ad')

In [None]:
adata

In [None]:
adata.uns['cell_type_annotation_lv1' + '_colors'] = [    
    '#d0d0d0',  # ISC 
    '#243dae',  # D-cell (Sst+) 
    '#aa9dce',  # EC    
    '#594495',  # EC   
    '#725dae',  # EC 2   
    '#46a8d9',  # EEC (peptide/immature)
    '#d0d1e6',  # EC prog.    
    '#85c6e6',  # EEC prog. (late/Peptide) 
    '#d9edf7',  # EEC prog
    '#bb4353',  # Enterocyte 
    '#c67a84',  # early Enterocyte  
    '#dd894e',  # Goblet 
    '#fec44f',  # Goblet prog.  
    '#e1f3bf',  # Goblet/EEC prog.  
    '#368cbf',  # K-cell (Gip+) 
    '#5a72dd',  # L/I-cell (Glp1+/Cck+)  
    '#238b45',  # Paneth 
    '#7BB98F',  # Paneth prog.  
    '#e7298a',  # Tuft 
    '#eca4d0',  # Tuft prog. 
    '#df65b0',  # Tuft prog. 2 
    '#339a98',  # X-cell (Ghrl+)
    '#eebcbc',  # TA  
    '#fee0d2',  # TA prox  
    '#ac9470',  # unknown0 
    ]


In [None]:
adata.uns['cell_type_annotation_lv1' + '_colors'] =['#d0d0d0',  # ISC
 '#eebcbc',  # TA
 '#fee0d2',  # TA prox
 '#c67a84',  # early Enterocyte
 '#bb4353',  # Enterocyte
 '#eca4d0',  # Tuft prog.
 '#df65b0',  # Tuft prog. 2
 '#e7298a',  # Tuft
 '#e1f3bf',  # Goblet/EEC prog.
 '#d9edf7',  # EEC prog
 '#85c6e6',  # EEC prog. (late/Peptide)
 '#46a8d9',  # EEC (peptide/immature)
 '#339a98',  # X-cell (Ghrl+)
 '#368cbf',  # K-cell (Gip+)
 '#5a72dd',  # L/I-cell (Glp1+/Cck+)
 '#243dae',  # D-cell (Sst+)
 '#d0d1e6',  # EC prog.
 '#aa9dce',  # EC (imm.)
 '#594495',  # EC (mature)
 '#725dae',  # EC 2
 '#fec44f',  # Goblet prog.
 '#dd894e',  # Goblet
 '#7BB98F',  # Paneth prog.
 '#238b45',  # Paneth
 '#ac9470'   # unknown0
]

In [None]:
annotation_key = 'cell_type_annotation_lv1'

In [None]:
adata.obs[annotation_key] = adata.obs[annotation_key].cat.reorder_categories(['ISC', 'TA', 'TA (prox.))', 'early Enterocyte', 'Enterocyte', 
'Tuft prog.', 'Tuft prog. 2', 'Tuft', 
'Goblet/EEC prog. (early)', 'EEC prog. (mid)', 'EEC prog. (late/Peptide)', 'EEC (Peptide/immature)', 'X-cell (Ghrl+)',  'K-cell (Gip+)', 'L/I-cell (Glp1+/Cck+)', 'D-cell (Sst+)',
'EC prog. (late)', 'EC (immature)', 'EC (mature)','EC 2', 
 'Goblet prog. (late)', 'Goblet',  'Paneth prog.', 'Paneth', 'unknown0' ])

In [None]:
sc.pl.umap(adata, color=['sample','cell_type_annotation_lv1'], size=7, add_outline=True, alpha=0.7, outline_width=(0.3, 0.0), ncols=4, wspace=0.85,color_map=mymap)

In [None]:
adata.X = adata.layers['log_dca_counts'].copy()

### read corrected anndata with metadata update and finer anno

In [None]:
adata= sc.read_h5ad('adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_updated.h5ad')

In [None]:
adata

In [None]:
adata.uns['cell_type_annotation_lv1' + '_colors'] =[
    '#d0d0d0',  # ISC
 '#eebcbc',  # TA
 '#fee0d2',  # TA prox
 '#c67a84',  # early Enterocyte
 '#bb4353',  # Enterocyte
 '#eca4d0',  # Tuft prog.
 '#df65b0',  # Tuft prog. 2
 '#e7298a',  # Tuft
 '#f9e1f4',  # Goblet/EEC prog.
 '#d9edf7',  # EEC prog
 '#85c6e6',  # EEC prog. (late/Peptide)
 '#46a8d9',  # EEC (peptide/immature)
 '#339a98',  # X-cell (Ghrl+)
 '#368cbf',  # K-cell (Gip+)
 '#5a72dd',  # L/I-cell (Glp1+/Cck+)
 '#243dae',  # D-cell (Sst+)
 '#d0d1e6',  # EC prog.
 '#aa9dce',  # EC (imm.)
 '#594495',  # EC (mature)
 '#725dae',  # EC 2
 '#fec44f',  # Goblet prog.
 '#dd894e',  # Goblet
 '#cedf76',   #Goblet-Paneth-like 
 '#7BB98F',   #Goblet-Paneth-like (cycling) 
 '#d5f4c5',  # Paneth prog.
 '#238b45',  # Paneth
]

In [None]:
annotation_key = 'cell_type_annotation_lv1'

In [None]:
adata.obs[annotation_key] = adata.obs[annotation_key].astype('category')

In [None]:
adata.obs[annotation_key] = adata.obs[annotation_key].cat.reorder_categories(['ISC', 'TA', 'TA (prox.))', 'early Enterocyte', 'Enterocyte', 
'Tuft prog.', 'Tuft prog. 2', 'Tuft', 
'Goblet/EEC prog. (early)', 'EEC prog. (mid)', 'EEC prog. (late/Peptide)', 'EEC (Peptide/immature)', 'X-cell (Ghrl+)',  'K-cell (Gip+)', 'L/I-cell (Glp1+/Cck+)', 'D-cell (Sst+)',
'EC prog. (late)', 'EC (immature)', 'EC (mature)','EC 2', 
 'Goblet prog. (late)', 'Goblet', 'Goblet-Paneth-like', 'Goblet-Paneth-like(cycling)', 'Paneth prog.', 'Paneth'])#, 'unknown0' ])

In [None]:
sc.pl.umap(adata, color=['pretty name','cell_type_annotation_lv1'], size=7, add_outline=True, alpha=0.7, outline_width=(0.3, 0.0), ncols=4, wspace=1,color_map=mymap)

In [None]:
adata.X = adata.layers['log_dca_counts'].copy()

# ISC Score

In [None]:
sc.tl.score_genes(adata, gene_list=['Lgr5','Olfm4','Slc12a2','Clca3b','Cps1'], score_name='ISC_score', use_raw=False)

In [None]:
sc.pl.umap(adata, color=['ISC_score'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo', title='ISC score', save = 'umap_isc_score.png')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata, use_raw=False, keys=['ISC_score'], groupby='cell_type_annotation_lv1',rotation=90,save = 'violin_isc_score.png')

# CellRank - Gene Expression

## Pseudotime

### Diffusion Pseudotime

In [None]:
sc.tl.diffmap(adata, n_comps=20)

In [None]:
sc.pl.diffmap(adata, components=['0,1','1,2','3,4','5,6','7,8','9,10','11,12','13,14','15,16','17,18','19,20'], color='ISC_score', color_map='turbo')

In [None]:
sc.pl.diffmap(adata, components=['0,1','1,2','3,4','5,6','7,8','9,10','11,12','13,14','15,16','17,18','19,20'], color='cell_type_annotation_lv1', wspace=1.8)

In [None]:
sc.pl.diffmap(adata, components=['2,12'], color=['phase','ISC_score','cell_type_annotation_lv1'])

In [None]:
stem_mask = (adata.obsm['X_diffmap'][:,1] >0) & np.isin(adata.obs['cell_type_annotation_lv1'], 'ISC')
max_stem_id = np.argmin(adata.obsm['X_diffmap'][stem_mask,11])
root_id = np.arange(len(stem_mask))[stem_mask][max_stem_id]
adata.uns['iroot'] = root_id

In [None]:
scv.pl.scatter(
    adata,
    basis='diffmap',
    c=[root_id, 'phase', 'cell_type_annotation_lv1'],
    legend_loc='right',
    components=['1,11'], wspace=0.75
)

scv.pl.scatter(
    adata,
    basis='umap',
    c=[root_id, 'phase', 'cell_type_annotation_lv1'],
    legend_loc='right',
    components=['1, 0'], wspace=0.75
)


In [None]:
adata.obs_names[root_id]

In [None]:
sc.tl.dpt(adata, n_dcs=20)

In [None]:
adata.obs['dpt_pseudotime_g1'] = adata.obs['dpt_pseudotime'].copy()

In [None]:
sc.pl.umap(adata, color=['dpt_pseudotime_g1'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata, use_raw=False, keys=['dpt_pseudotime_g1'], groupby='cell_type_annotation_lv1', rotation=90)

In [None]:
sb.displot(adata.obsm['X_diffmap'][:,11])

In [None]:
stem_mask = (adata.obsm['X_diffmap'][:,1] > 0) & np.isin(adata.obs['cell_type_annotation_lv1'], 'ISC')
max_stem_id = np.argmax(adata.obsm['X_diffmap'][stem_mask,11])
root_id = np.arange(len(stem_mask))[stem_mask][max_stem_id]
adata.uns['iroot'] = root_id

In [None]:
scv.pl.scatter(
    adata,
    basis='diffmap',
    c=[root_id, 'phase', 'cell_type_annotation_lv1'],
    legend_loc='right',
    components=['1,11'], wspace=0.75
)

scv.pl.scatter(
    adata,
    basis='umap',
    c=[root_id, 'phase', 'cell_type_annotation_lv1'],
    legend_loc='right',
    components=['1, 0'], wspace=0.75
)


In [None]:
adata.obs_names[root_id]

In [None]:
sc.tl.dpt(adata, n_dcs=20)

In [None]:
adata.obs['dpt_pseudotime_g2m'] = adata.obs['dpt_pseudotime'].copy()

In [None]:
sc.pl.umap(adata, color=['dpt_pseudotime_g2m'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata, use_raw=False, keys=['dpt_pseudotime_g2m'], groupby='cell_type_annotation_lv1', rotation=90)

In [None]:
adata.obs['dpt_pseudotime'] = adata.obs.loc[:,['dpt_pseudotime_g1','dpt_pseudotime_g2m']].mean(axis=1)
adata.obs['dpt_pseudotime'] = adata.obs['dpt_pseudotime'] - min(adata.obs['dpt_pseudotime'])
adata.obs['dpt_pseudotime'] = adata.obs['dpt_pseudotime']/max(adata.obs['dpt_pseudotime'])

In [None]:
sc.pl.umap(adata, color=['dpt_pseudotime','dpt_pseudotime_g1','dpt_pseudotime_g2m'], title=['diffusion pseudotime','diffusion pseudotime g1','diffusion pseudotime g2m'],size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo', save = 'umaps_dpt.png')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata, use_raw=False, keys=['dpt_pseudotime','dpt_pseudotime_g1','dpt_pseudotime_g2m'], groupby='cell_type_annotation_lv1', rotation=90, save = 'violins_dpt.png')

In [None]:
dptk = cr.kernels.PseudotimeKernel(adata, time_key='dpt_pseudotime')
dptk.compute_transition_matrix(n_jobs=-1)
dptk.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none',save = 'transition_dpt.png')
#dptk.write('adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_dpt_pseudotime.plk', write_adata=False)

In [None]:
dptk_g1 = cr.kernels.PseudotimeKernel(adata, time_key='dpt_pseudotime_g1')
dptk_g1.compute_transition_matrix(n_jobs=-1)
dptk_g1.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none',)
#dptk_g1.write(fname='adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_dpt_pseudotime_g1.plk', write_adata=False)

In [None]:
dptk_g2m = cr.kernels.PseudotimeKernel(adata, time_key='dpt_pseudotime_g2m')
dptk_g2m.compute_transition_matrix(n_jobs=-1)
dptk_g2m.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none')
#dptk_g2m.write(fname='adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_dpt_pseudotime_g2m.plk', write_adata=False)

In [None]:
gc.collect()

### Palantier

In [None]:
sce.tl.palantir(adata, n_components=20, use_adjacency_matrix=True, distances_key='distances')

In [None]:
sc.set_figure_params(scanpy=True, frameon=False, vector_friendly=False ,transparent=True, dpi=150, dpi_save=300)

In [None]:
sc.pl.embedding(adata, basis='X_palantir_diff_comp', components=['0,1','1,2','3,4','5,6','7,8','9,10','11,12','13,14','15,16','17,18','19,20'], color='ISC_score')

In [None]:
sc.pl.embedding(adata, basis='X_palantir_diff_comp', components=['0,1','1,2','3,4','5,6','7,8','9,10','11,12','13,14','15,16','17,18','19,20'],wspace=0.5, ncols=4,color='initial_cell_type')

In [None]:
sc.pl.embedding(adata, basis='X_palantir_diff_comp', components=['10,16'], color=['initial_cell_type','ISC_score','phase'])

In [None]:
stem_mask = (adata.obsm['X_palantir_diff_comp'][:,15] > -1) & np.isin(adata.obs['cell_type_annotation_lv1'], 'ISC')
max_stem_id = np.argmax(adata.obsm['X_palantir_diff_comp'][stem_mask,9])
root_id = np.arange(len(stem_mask))[stem_mask][max_stem_id]
adata.uns['iroot'] = root_id

In [None]:
scv.pl.scatter(
    adata,
    basis="palantir_diff_comp",
    c=["cell_type_annotation_lv1", root_id],
    legend_loc="right",
    components=["10, 16"], wspace=1.5
)

scv.pl.scatter(
    adata,
    basis='umap',
    c=['cell_type_annotation_lv1', root_id],
    legend_loc='right',
    components=['1, 0'], wspace=1.5
)

In [None]:
adata.obs_names[root_id]

In [None]:
palantier = sce.tl.palantir_results(adata, early_cell=adata.obs_names[root_id])

In [None]:
adata.obs['palantir_pseudotime_g1'] = palantier.pseudotime

In [None]:
sc.pl.umap(adata, color=['palantir_pseudotime_g1'], size=7, add_outline=True, alpha=0.7, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
stem_mask = (adata.obsm['X_palantir_diff_comp'][:,9] > 0) & np.isin(adata.obs['initial_cell_type'], 'ISC')
max_stem_id = np.argmax(adata.obsm['X_palantir_diff_comp'][stem_mask,15])
root_id = np.arange(len(stem_mask))[stem_mask][max_stem_id]
adata.uns['iroot'] = root_id

In [None]:
scv.pl.scatter(
    adata,
    basis="palantir_diff_comp",
    c=["initial_cell_type", root_id],
    legend_loc="right",
    components=["10, 16"], wspace=0.75
)

scv.pl.scatter(
    adata,
    basis='umap',
    c=['cell_type_annotation_lv1', root_id],
    legend_loc='right',
    components=['1, 0'], wspace=1.2
)

In [None]:
adata.obs_names[root_id]

In [None]:
palantier = sce.tl.palantir_results(adata, early_cell=adata.obs_names[root_id])

In [None]:
adata.obs['palantir_pseudotime_g2m'] = palantier.pseudotime

In [None]:
sc.pl.umap(adata, color=['palantir_pseudotime_g2m'], size=7, add_outline=True, alpha=0.7, outline_width=(0.3, 0.0), ncols=1, color_map='turbo')

In [None]:
adata.obs['palantir_pseudotime'] = adata.obs.loc[:,['palantir_pseudotime_g1','palantir_pseudotime_g2m']].mean(axis=1)
adata.obs['palantir_pseudotime'] = adata.obs['palantir_pseudotime'] - min(adata.obs['palantir_pseudotime'])
adata.obs['palantir_pseudotime'] = adata.obs['palantir_pseudotime']/max(adata.obs['palantir_pseudotime'])

In [None]:
sc.pl.umap(adata, color=['palantir_pseudotime','palantir_pseudotime_g1','palantir_pseudotime_g2m'], title=['palantir pseudotime','palantir pseudotime g1','palantir pseudotime g2m'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, color_map='turbo', save = 'umaps_palantir.png')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata, use_raw=False, keys=['palantir_pseudotime','palantir_pseudotime_g1','palantir_pseudotime_g2m'], groupby='cell_type_annotation_lv1', rotation=90)

In [None]:
pk_g1 = cr.kernels.PseudotimeKernel(adata, time_key='palantir_pseudotime_g1')
pk_g1.compute_transition_matrix(n_jobs=-1)
pk_g1.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none')
pk_g1.write('adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_palantir_pseudotime_g1.plk', write_adata=False)

In [None]:
pk_g2m = cr.kernels.PseudotimeKernel(adata, time_key='palantir_pseudotime_g2m')
pk_g2m.compute_transition_matrix(n_jobs=-1)
pk_g2m.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none')
pk_g2m.write(fname='adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_palantir_pseudotime_g2m.plk', write_adata=False)

In [None]:
pk = cr.kernels.PseudotimeKernel(adata, time_key='palantir_pseudotime')
pk.compute_transition_matrix(n_jobs=-1)
pk.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none', save= 'umap_transoition_palantir.png')
pk.write(fname='adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_palantir_pseudotime.plk', write_adata=False)

### Combine Kernels

In [None]:
pseudotime_kernel = 1/4 * dptk_g1 + 1/4 * dptk_g2m + 1/4 * pk_g1 + 1/4 * pk_g2m

In [None]:
pseudotime_kernel.write(fname='adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_pseudotime_pseudotime.plk', write_adata=True)

In [None]:
# Load the kernel from the file
with open('adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_pseudotime_pseudotime.plk', 'rb') as file:
    pseudotime_kernel = pickle.load(file)

In [None]:
pseudotime_kernel

In [None]:
adata = pseudotime_kernel.adata.copy()

In [None]:
with rc_context({'figure.figsize':(10,7)}):
    sc.pl.violin(
        adata,
        keys=["dpt_pseudotime", "palantir_pseudotime"],
        groupby="cell_type_annotation_lv1",
        rotation=90,
    )

In [None]:
with rc_context({'figure.figsize':(8,8)}):
    sc.pl.umap(adata, color=['palantir_pseudotime','dpt_pseudotime'], size=7, add_outline=True, alpha=0.7, outline_width=(0.3, 0.0), ncols=2, color_map='turbo', show = False, title=['Palantir pseudotime', 'Diffusion pseudotime'])
    #sc.pl.umap(adata, color=['dpt_pseudotime'], size=7, add_outline=True, alpha=0.7, outline_width=(0.3, 0.0), ncols=2, color_map='turbo', show = False)
    plt.show()


In [None]:
adata.obs['pseudotime'] = adata.obs.loc[:,['palantir_pseudotime','dpt_pseudotime']].mean(axis=1) #'sct_pseudotime' #'ct_pseudotime', 'sctour_pseudotime', 
adata.obs['pseudotime'] = adata.obs['pseudotime'] - min(adata.obs['pseudotime'])
adata.obs['pseudotime'] = adata.obs['pseudotime']/max(adata.obs['pseudotime'])

In [None]:
with rc_context({'figure.figsize':(8,8)}):
    sc.pl.umap(adata, color=['pseudotime'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata, use_raw=False, keys=['pseudotime'], groupby='cell_type_annotation_lv1', rotation=90)

In [None]:
pk = cr.kernels.PseudotimeKernel(adata, time_key='pseudotime')
pk.compute_transition_matrix(n_jobs=-1)
pk.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none', save= 'umap_transoition_pseudotime_combined.png')

## Transcriptional Diversity

### scTour

In [None]:
adata = sc.read_h5ad('adata_NB6_CR_save_before_scTour.h5ad')

In [None]:
adata

In [None]:
import sctour as sct

In [None]:
adata_multi= adata[adata.obs['sequencing']=='single nucleus'].copy()
adata_sc= adata[adata.obs['sequencing']!='single nucleus'].copy()

In [None]:
adata_multi

In [None]:
adata_sc

In [None]:
#adata_sct = adata.copy()
del adata
gc.collect()

In [None]:
gc.collect()

#### atac sctour

In [None]:
adata_multi.X = adata_multi.layers['raw_counts'] #.todense() #sc.pp.calc_qc_metrics does not work with dense matrix?!

In [None]:
adata_multi.X

In [None]:
sc.pp.calculate_qc_metrics(adata_multi, percent_top=None, log1p=False, inplace=True)

In [None]:
#adata_multi.X = adata_multi.layers['log_dca_counts'].copy() #for loss_mode mse use log counts

In [None]:
tnode = sct.train.Trainer(adata_multi, loss_mode='nb', percent=0.5, alpha_recon_lec=0.8, alpha_recon_lode=0.2, n_latent=20, n_ode_hidden=50, n_vae_hidden=256) #loss_mode= 'mse'
tnode.train()

In [None]:
adata_multi.obs['sctour_pseudotime'] = tnode.get_time()

In [None]:
mix_zs, zs, pred_zs = tnode.get_latentsp(alpha_z=0.5, alpha_predz=0.5)
adata_multi.obsm['X_scTour'] = mix_zs

In [None]:
adata_multi.obsm['X_scTourVF'] = tnode.get_vector_field(adata_multi.obs['sctour_pseudotime'].values, adata_multi.obsm['X_scTour'])

In [None]:
sc.pl.umap(adata_multi, color=['sctour_pseudotime', 'sequencing'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
sct.vf.plot_vector_field(adata_multi, zs_key='X_scTour', vf_key='X_scTourVF', use_rep_neigh='X_scANVI', color='cell_type_annotation_lv1', show=False, legend_loc='none', frameon=False, size=50, alpha=0.2)

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata_multi, use_raw=False, keys=['sctour_pseudotime'], groupby='cell_type_annotation_lv1', rotation=90)

#### sc rna sctour

In [None]:
adata_sc.X = adata_sc.layers['raw_counts']#.todense()

In [None]:
sc.pp.calculate_qc_metrics(adata_sc, percent_top=None, log1p=False, inplace=True)

In [None]:
#adata_sc.X = adata_sc.layers['log_dca_counts'].copy() #for loss_mode mse use log counts, nb for raw counts

In [None]:
tnode = sct.train.Trainer(adata_sc, loss_mode='nb', percent=0.5, alpha_recon_lec=0.8, alpha_recon_lode=0.2, n_latent=20, n_ode_hidden=50, n_vae_hidden=256) #loss_mode= 'mse'
tnode.train()

In [None]:
adata_sc.obs['sctour_pseudotime'] = tnode.get_time()

In [None]:
mix_zs, zs, pred_zs = tnode.get_latentsp(alpha_z=0.5, alpha_predz=0.5)
adata_sc.obsm['X_scTour'] = mix_zs

In [None]:
adata_sc.obsm['X_scTourVF'] = tnode.get_vector_field(adata_sc.obs['sctour_pseudotime'].values, adata_sc.obsm['X_scTour'])

In [None]:
sc.pl.umap(adata_sc, color=['sctour_pseudotime', 'sequencing'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
sct.vf.plot_vector_field(adata_sc, zs_key='X_scTour', vf_key='X_scTourVF', use_rep_neigh='X_scANVI', color='cell_type_annotation_lv1', show=False, legend_loc='none', frameon=False, size=50, alpha=0.2)

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata_sc, use_raw=False, keys=['sctour_pseudotime'], groupby='cell_type_annotation_lv1', rotation=90)

#### concat results - necessary?!

In [None]:
del adata_sc
del adata_multi
gc.collect()

In [None]:
adata_sc= sc.read_h5ad('sctour_adata_sc.h5ad')

In [None]:
adata_multi= sc.read_h5ad('sctour_adata_sn.h5ad')

In [None]:
adata_multi

In [None]:
adata_sc

#### add back to adata

In [None]:
adata = sc.read_h5ad('adata_NB6_CR_save_before_scTour.h5ad')

In [None]:
adata

In [None]:
# Create a full array to store the scTour results
X_scTour_full = np.full((adata.shape[0], 20), np.nan)  # Assuming scTour results have 2 dimensions
X_scTourVF_full = np.full((adata.shape[0], 20), np.nan)  # Assuming scTour results have 2 dimensions
sc_tour_pseudotime = np.full((adata.shape[0],), np.nan)  # Assuming scTour results have 2 dimensions

In [None]:
# Assign the scTour results from single cell and single nucleus back to the full array
X_scTour_full[adata.obs['sequencing'] == 'single cell'] = adata_sc.obsm['X_scTour']
X_scTourVF_full[adata.obs['sequencing'] == 'single cell'] = adata_sc.obsm['X_scTourVF']
sc_tour_pseudotime[adata.obs['sequencing'] == 'single cell'] = adata_sc.obs['sctour_pseudotime']

In [None]:
del adata_sc
gc.collect()

In [None]:
# multiome
X_scTour_full[adata.obs['sequencing'] == 'single nucleus'] = adata_multi.obsm['X_scTour']
X_scTourVF_full[adata.obs['sequencing'] == 'single nucleus'] = adata_multi.obsm['X_scTourVF']
sc_tour_pseudotime[adata.obs['sequencing'] == 'single nucleus'] = adata_multi.obs['sctour_pseudotime']

In [None]:
# Set the full array back to adata
adata.obsm['X_scTour'] = X_scTour_full
adata.obsm['X_scTour'] = X_scTourVF_full
adata.obs['sctour_pseudotime'] = sc_tour_pseudotime

In [None]:
adata.obs['sctour_pseudotime']

In [None]:
del adata_multi
gc.collect()

In [None]:
sctk = cr.kernels.PseudotimeKernel(adata, time_key='sctour_pseudotime')
sctk.compute_transition_matrix(n_jobs=-1)
sctk.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none')
sctk.write(fname='adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_sctour_pseudotime.plk', write_adata=False)

### CytoTRACE

### Cytotrace imputing standart

#### save state

In [None]:
adata.write('adata_save_NB6_CR_Cytotrace.h5ad')

In [None]:
adata = sc.read_h5ad('adata_save_NB6_CR_Cytotrace.h5ad')

In [None]:
adata_multi= adata[adata.obs['sequencing']=='single nucleus'].copy()
adata_sc= adata[adata.obs['sequencing']!='single nucleus'].copy()

In [None]:
del adata
gc.collect()

In [None]:
# CytoTRACE by default uses imputed data - a simple way to compute
# k-NN imputed data is to use scVelo's moments function.
# However, note that this function expects `spliced` counts because
# it's designed for RNA velocity, so we're using a simple hack here:
for adatas in [adata_multi, adata_sc]:
    if 'spliced' not in adatas.layers or 'unspliced' not in adatas.layers:
        adatas.layers['spliced'] = adatas.X
        adatas.layers['unspliced'] = adatas.X
    adatas.X = adatas.layers['raw_counts']

    sc.pp.pca(adatas)
    sc.pp.neighbors(adatas, n_pcs=30, n_neighbors=30)
    scv.pp.moments(adatas,n_pcs=None, n_neighbors=None) #https://github.com/theislab/scvelo/issues/1212

In [None]:
ctk = cr.kernels.CytoTRACEKernel(adata_sc)
ckt = ctk.compute_cytotrace(aggregation='mean', n_genes=200) #also tried wo aggregation (dflt = CytoTRACEKernel.MEAN) and ngenes (dflt = all)

In [None]:
ctk2 = cr.kernels.CytoTRACEKernel(adata_multi)
ckt2 = ctk2.compute_cytotrace(aggregation='mean', n_genes=200) #also tried wo aggregation (dflt = CytoTRACEKernel.MEAN) and ngenes (dflt = all)

In [None]:
gc.collect()

In [None]:
ctk.compute_transition_matrix(n_jobs=-1, threshold_scheme='hard')
ctk.plot_projection(color='cell_type_annotation_lv1', legend_loc='none')

In [None]:
ctk2.compute_transition_matrix(n_jobs=-1, threshold_scheme='hard')
ctk2.plot_projection(color='cell_type_annotation_lv1', legend_loc='none')

In [None]:
gc.collect()

In [None]:
adata = sc.read_h5ad('adata_save_NB6_CR_Cytotrace.h5ad')

In [None]:
# Create a full array to store the scTour results
ct_pseudotime = np.full((adata.shape[0],), np.nan)  # Assuming scTour results have 20 dimensions


In [None]:
# Assign the scTour results from single cell and single nucleus back to the full array
ct_pseudotime[adata.obs['sequencing'] == 'single cell'] = adata_sc.obs['ct_pseudotime']

In [None]:
del adata_sc
gc.collect()

In [None]:
# multiome
ct_pseudotime[adata.obs['sequencing'] == 'single nucleus'] = adata_multi.obs['ct_pseudotime']

In [None]:
# Set the full array back to adata
adata.obs['ct_pseudotime'] = ct_pseudotime

In [None]:
adata.obs['ct_pseudotime']

In [None]:
del adata_multi
gc.collect()

In [None]:
sc.pl.umap(adata, color=['ct_pseudotime'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata, use_raw=False, keys=['ct_pseudotime'], groupby='cell_type_annotation_lv1', rotation=90)

In [None]:
adata

In [None]:

if 'spliced' not in adata.layers or 'unspliced' not in adata.layers:
    adata.layers['spliced'] = adata.X
    adata.layers['unspliced'] = adata.X
adata.X = adata.layers['raw_counts']

sc.pp.pca(adata)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata,n_pcs=None, n_neighbors=None)

In [None]:
ctk = cr.kernels.CytoTRACEKernel(adata)
ckt = ctk.compute_cytotrace(aggregation='mean', n_genes=200) #also tried wo aggregation (dflt = CytoTRACEKernel.MEAN) and ngenes (dflt = all)

In [None]:
ctk.write(fname='adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_cytotrace_pseudotime.plk', write_adata=False)

### Combine Kernels

In [None]:
diversity_kernel = 1/2 * sctk + 1/2 * ctk

In [None]:
diversity_kernel.write(fname='adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_diversity_pseudotime.plk', write_adata=False)

In [None]:
gc.collect()

In [None]:
adata.write('adata_NB6_CR_Cytotrace_save.h5ad')

## RNA Velocity

### scVelo

In [None]:
adata = sc.read_h5ad('/home/idr/helene.reich/adata_scVelo_save.h5ad')

In [None]:
adata_layers= sc.read_h5ad('/mnt/hdd/data/Healthy/adata_markedDoublets_normalized_initialAnno_rmDoublets_integrated_all.h5ad')

In [None]:
adata_layers

In [None]:
vars_names = [name for name in adata.var_names]

In [None]:
adata_layers = adata_layers[:,vars_names]

In [None]:
adata.layers['ambiguous'] = adata_layers.layers['ambiguous']
adata.layers['spliced'] = adata_layers.layers['spliced']
adata.layers['unspliced'] = adata_layers.layers['unspliced']

In [None]:
del adata_layers
gc.collect()

In [None]:
adata

In [None]:
del adata.layers['dca_counts']
#del adata.layers['log_dca_counts']
del adata.layers['sct_counts']
del adata.layers['palantir_imp']
gc.collect()

In [None]:
gc.collect()

In [None]:
adata.X =  adata.layers['raw_counts'].copy()

In [None]:
scv.pl.proportions( adata, groupby='cell_type_annotation_lv1',fontsize=5, figsize=(9, 3),dpi = 300, save= 'scvelo_splicing_proportions_celltype.png')

In [None]:
scv.pl.proportions( adata, groupby='sequencing',fontsize=6, figsize=(6, 2),dpi = 300, save= 'scvelo_splicing_proportions_modality.png')

In [None]:
scv.pl.proportions( adata, groupby='pretty name',fontsize=4.5, figsize=(8, 2),dpi = 300, save= 'scvelo_splicing_proportions_sample.png')

In [None]:
scv.pp.filter_and_normalize( adata, min_shared_counts=20, n_top_genes=2000, subset_highly_variable=False)
gc.collect()

In [None]:
sc.pp.neighbors( adata, use_rep="X_scANVI", n_pcs=50, n_neighbors=20)
gc.collect()

In [None]:
scv.pp.moments( adata, n_pcs=None, n_neighbors=None)

In [None]:
gc.collect()

this should only be the imputed data, connectivities and potentially the fitted parameters specified in adata.var. You also only need to keep the (potential) velocity genes, for example estimated by the steady state model. from https://github.com/theislab/scvelo/issues/405

In [None]:
adata

In [None]:
del adata.layers['ambiguous']
del adata.layers[ 'spliced']
del adata.layers[ 'unspliced']
gc.collect()


In [None]:
adata.obs.drop(['leiden', 'dca_split', 'size_factors', 'Project', 'pretty name', 'sequencing', 'condition', 'kit', 'line', 'strain', 'enriched', 'enrichment proportion', 'treatment', 'diet', 'tissue', 'structure', 'target cell number', 'Index Type', 'sequencing machine', 'leiden_1', 'leiden_sub1', 'leiden_2', 'leiden_3', 'leiden_4', 'leiden_5', 'leiden_6', 'leiden_7', 'leiden_8', 'leiden_9', 'leiden_10', 'leiden_11', 'leiden_12', 'leiden_13', 'leiden_13_save', 'dpt_pseudotime', 'dpt_pseudotime_g2m', 'dpt_pseudotime_g1', 'palantir_pseudotime_g1', 'palantir_pseudotime_g2m', 'palantir_pseudotime'],axis=1)
gc.collect()

In [None]:
del adata.obsm['T_fwd_umap']
del adata.obsm[ 'X_diffmap']
del adata.obsm['X_palantir_diff_comp']
del adata.obsm['X_palantir_multiscale']

del adata.obsp['palantir_diff_op']

gc.collect()

#### steady-state model

In [None]:
scv.tl.velocity(adata, mode="deterministic")

In [None]:
scv.tl.velocity_graph(adata, n_jobs=8)

In [None]:
scv.pl.velocity_embedding_stream( adata, basis='umap', color='cell_type_annotation_lv1', legend_loc='none', save = 'cr_scvelo_velocity_graph_steady_state_model.png')

#### EM model

In [None]:
adata

### continue after recovering dynamics

In [None]:
adata_layers = sc.read_h5ad('adata_scVelo_save.h5ad')

In [None]:
adata = sc.read_h5ad('adata_scvelo_save_meta_corr_anno_updated.h5ad')

In [None]:
gc.collect()

In [None]:
# add dca imputed counts
from anndata._io.specs import read_elem
with h5py.File('/mnt/hdd/data/Healthy/pseudotime/adata_scvelo_minimal_fit_pars.h5ad', 'r') as f:
    # Read specific columns from `obs`
    #sample_column = f['obs/sample'][:]
    #n_counts_column = f['obs/n_counts'][:]
    #https://github.com/scverse/anndata/issues/436:
    #cell_types = read_elem(f["obs/celltype"])
    #umap = read_elem(f["obsm/X_umap"])
    recover_dyn = read_elem(f["uns/recover_dynamics"])
    fit_t = read_elem(f['layers/fit_t'])
    fit_t_ = read_elem(f['var/fit_t_'])
    fit_likelihood = read_elem(f['var/fit_likelihood'])
    fit_alpha = read_elem(f['var/fit_alpha'])
    fit_beta = read_elem(f['var/fit_beta'])
    fit_gamma = read_elem(f['var/fit_gamma'])
    fit_scaling = read_elem(f['var/fit_scaling'])
    fit_std_u = read_elem(f['var/fit_std_u'])
    fit_std_s = read_elem(f['var/fit_std_s'])


In [None]:
adata_fit_pars = sc.read_h5ad('/mnt/hdd/data/Healthy/pseudotime/adata_scvelo_minimal_fit_pars.h5ad')

In [None]:
adata_fit_pars

In [None]:
adata

In [None]:
# add dca imputed counts
from anndata._io.specs import read_elem
with h5py.File('adata_scvelo_save_meta_corr_anno_updated.h5ad', 'r') as f:
    # Read specific columns from `obs`
    #sample_column = f['obs/sample'][:]
    #n_counts_column = f['obs/n_counts'][:]
    #https://github.com/scverse/anndata/issues/436:
    #cell_types = read_elem(f["obs/celltype"])
    #umap = read_elem(f["obsm/X_umap"])
    ctanno = read_elem(f["obs/cell_type_annotation_lv1"])
    ctanno_colors = read_elem(f['uns/cell_type_annotation_lv1_colors'])


In [None]:
adata_fit_pars.obs['cell_type_annotation_lv1']=ctanno
adata_fit_pars.uns['cell_type_annotation_lv1_colors']=ctanno_colors

In [None]:
adata.uns['recover_dynamics']=recover_dyn
adata.layers['fit_t'] = fit_t
adata.var['fit_t_'] = fit_t_
adata.var['fit_likelihood']=fit_likelihood
adata.var['fit_alpha']=fit_alpha 
adata.var['fit_beta']=fit_beta
adata.var['fit_gamma']=fit_gamma
adata.var['fit_scaling']=fit_scaling
adata.var['fit_std_u']=fit_std_u
adata.var['fit_std_s']=fit_std_s


In [None]:
gc.collect()

In [None]:
#del adata_fit_pars
del recover_dyn
del fit_t
del fit_likelihood
del fit_alpha
del fit_beta
del fit_gamma
del fit_scaling
del fit_std_s
del fit_std_u
gc.collect()

In [None]:
vars_names = [name for name in adata.var_names]

In [None]:
adata_layers = adata_layers[:,vars_names]

In [None]:
adata_layers

In [None]:
adata.layers['ambiguous'] = adata_layers.layers['ambiguous']
adata.layers['spliced'] = adata_layers.layers['spliced']
adata.layers['unspliced'] = adata_layers.layers['unspliced']

In [None]:
del adata_layers
gc.collect()

In [None]:
del adata
gc.collect()

In [None]:
scv.tl.velocity( adata_fit_pars, mode='dynamical')
scv.tl.velocity_graph( adata_fit_pars, n_jobs=1)

In [None]:
scv.pl.velocity_embedding_stream( adata_fit_pars, basis='umap', color='cell_type_annotation_lv1', legend_loc='none', save = 'cr_scvelo_velocity_graph_EM_model.png')

In [None]:
scv.tl.latent_time( adata_fit_pars)#, root_key='iroot'

In [None]:
sc.pl.umap( adata_fit_pars, color=['latent_time'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo', title= 'scVelo latent time',save ='scvelo_latent_time_dyn_model.png')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin( adata_fit_pars, use_raw=False, keys=['latent_time'], groupby='cell_type_annotation_lv1', rotation=90,save ='violin_scvelo_latent_time_dyn_model.png')

### deterministic steady state model

In [None]:
scv.tl.velocity(adata, mode="deterministic")
scv.tl.velocity_graph( adata, n_jobs=1)

In [None]:
scv.pl.velocity_embedding_stream( adata, basis='umap', color='cell_type_annotation_lv1', legend_loc='none', save = 'cr_scvelo_velocity_graph_det_model.png')

In [None]:
# Open the HDF5 file
from anndata._io.specs import read_elem
with h5py.File('adata_NB6_CR_save_before_scTour.h5ad', 'r') as f:
    # Read specific columns from `obs`
    #sample_column = f['obs/sample'][:]
    #n_counts_column = f['obs/n_counts'][:]
    #https://github.com/scverse/anndata/issues/436:
    #cell_types = read_elem(f["obs/celltype"])
    #umap = read_elem(f["obsm/X_umap"])
    diffmap = read_elem(f["obsm/X_diffmap"])
    diffmap_evals = read_elem(f["uns/diffmap_evals"])

In [None]:
adata.obsm['X_diffmap'] = diffmap
adata.uns['diffmap_evals'] = diffmap_evals

In [None]:
del diffmap
del diffmap_evals
gc.collect()

In [None]:
adata.write('adata_scvelo_with_layers_and_diffmap.h5ad')
gc.collect()

### conttinue after all calculations

In [None]:
adata = sc.read_h5ad('adata_scvelo_with_layers_and_diffmap.h5ad')

In [None]:
adata

In [None]:
scv.tl.latent_time( adata_fit_pars)#, root_key='iroot'

In [None]:
sc.pl.umap( adata, color=['latent_time'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo', title= 'scVelo latent time',save ='scvelo_latent_time_dyn_model.png')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin( adata, use_raw=False, keys=['latent_time'], groupby='cell_type_annotation_lv1', rotation=90,save ='violin_scvelo_latent_time_dyn_model.png')

In [None]:
scvk = cr.kernels.VelocityKernel(adata)
scvk.compute_transition_matrix(n_jobs=-1)
scvk.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none')
scvk.write(fname='adata_markedDoublets_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_scvelo_wo_root.plk', write_adata=True)

### VeloVI

In [None]:
adata_vvi = sc.read_h5ad('adata_scVelo_save.h5ad')

In [None]:
adata_vvi.X = adata_vvi.layers['raw_counts'].copy()

In [None]:
# add dca imputed counts
from anndata._io.specs import read_elem
with h5py.File('/mnt/md0/Projects/Miscellaneous/Helene/adata_markedDoublets_normalized_initialAnno_rmDoublets_integrated_all_imputed.h5ad', 'r') as f:
    # Read specific columns from `obs`
    #sample_column = f['obs/sample'][:]
    #n_counts_column = f['obs/n_counts'][:]
    #https://github.com/scverse/anndata/issues/436:
    #cell_types = read_elem(f["obs/celltype"])
    #umap = read_elem(f["obsm/X_umap"])
    spliced = read_elem(f["layers/spliced"])
    unspliced = read_elem(f["layers/unspliced"])


In [None]:
spliced.shape

In [None]:
adata_vvi

In [None]:
adata_vvi.layers['spliced']=spliced
adata_vvi.layers['unspliced']=unspliced

In [None]:
scv.pp.filter_and_normalize(adata_vvi, min_shared_counts=20, n_top_genes=3000, subset_highly_variable=False)
sc.pp.neighbors(adata_vvi, use_rep="X_scANVI", n_pcs=50, n_neighbors=20)
scv.pp.moments(adata_vvi, n_pcs=None, n_neighbors=None)

In [None]:
from velovi import preprocess_data, VELOVI

In [None]:
VELOVI.setup_anndata(adata_vvi, spliced_layer="Ms", unspliced_layer="Mu")
vae = VELOVI(adata_vvi)
vae.train()

In [None]:
fig, ax = plt.subplots()
vae.history["elbo_train"].iloc[20:].plot(ax=ax, label="train")
vae.history["elbo_validation"].iloc[20:].plot(ax=ax, label="validation")
plt.legend()


In [None]:
def add_velovi_outputs_to_adata(adata, vae):
    latent_time = vae.get_latent_time(n_samples=25,)
    velocities = vae.get_velocity(n_samples=25, velo_statistic="mean")

    t = latent_time
    scaling = 20 / t.max(0)

    adata.layers["velocity"] = velocities / scaling
    adata.layers["latent_time_velovi"] = latent_time

    adata.var["fit_alpha"] = vae.get_rates()["alpha"] / scaling
    adata.var["fit_beta"] = vae.get_rates()["beta"] / scaling
    adata.var["fit_gamma"] = vae.get_rates()["gamma"] / scaling
    adata.var["fit_t_"] = (
        torch.nn.functional.softplus(vae.module.switch_time_unconstr)
        .detach()
        .cpu()
        .numpy()
    ) * scaling
    scaling = np.array(scaling)
    adata.layers["fit_t"] = latent_time.values * scaling[None, :]
    adata.var['fit_scaling'] = 1.0

    return adata

adata_vvi = add_velovi_outputs_to_adata(adata_vvi, vae)

In [None]:
scv.tl.velocity_graph(adata_vvi)

In [None]:
scv.pl.velocity_embedding_stream(adata_vvi, basis='umap', color='cell_type_annotation_lv1', legend_loc='none')

In [None]:
vvik = cr.kernels.VelocityKernel(adata_vvi)
vvik.compute_transition_matrix(n_jobs=-1)
vvik.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none')


In [None]:
vvik.write(fname='adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_veloVI.plk', write_adata=False)

In [None]:
adata_vvi

In [None]:
adata

### Combine Kernels

In [None]:
velocity_kernel = 1/2 * scvk + 1/2 * vvik

### recover scvelo and velovi kernels

In [None]:
adata_vvi = sc.read_h5ad('adata_scVelo_save.h5ad')

In [None]:
# Load the kernel from the file
with open('/mnt/hdd/data/Healthy/pseudotime/adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_veloVI.plk', 'rb') as file:
    vvik = pickle.load(file)

In [None]:
#scvk = cr.kernels.VelocityKernel(adata_scvelo)
# Load the kernel from the file
with open('adata_markedDoublets_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_scvelo.plk', 'rb') as file:
    scvk = pickle.load(file)

In [None]:
scvk._adata

In [None]:
scvk._adata.write('adata_after_scvelo.h5ad')

In [None]:
vvik._adata = adata_vvi

In [None]:
vvik._adata

### Combine Kernels

In [None]:
velocity_kernel = 1/2 * scvk + 1/2 * vvik

In [None]:
del scvk
del vvik
gc.collect()

In [None]:
del adata_vvi
gc.collect()

In [None]:
velocity_kernel.write(fname='adata_markedDoublets_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_velocity_kernel.plk', write_adata=False)

## Combine Kernels & Pseudotime

In [None]:
adata.obs['pseudotime'] = adata.obs.loc[:,['palantir_pseudotime','dpt_pseudotime']].mean(axis=1)#, 'ct_pseudotime', 'sctour_pseudotime', 'latent_time']].mean(axis=1) #'sct_pseudotime'
adata.obs['pseudotime'] = adata.obs['pseudotime'] - min(adata.obs['pseudotime'])
adata.obs['pseudotime'] = adata.obs['pseudotime']/max(adata.obs['pseudotime'])

In [None]:
sc.pl.umap(adata, color=['pseudotime'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata, use_raw=False, keys=['pseudotime'], groupby='cell_type_annotation_lv1', rotation=90)

## Lineage Inference

In [None]:
pseudotime_kernel.adata

In [None]:
pseudotime_kernel.adata.X

In [None]:
g = cr.estimators.GPCCA(pseudotime_kernel)

In [None]:
del adata
gc.collect()

In [None]:
g.fit(cluster_key="cell_type_annotation_lv1", n_states=11) #10
g.plot_macrostates(which="all", discrete=True, legend_loc="right", s=20)

In [None]:
g.predict_initial_states(allow_overlap=True)
g.plot_macrostates(which="initial", legend_loc="right", s=20)

In [None]:
g.macrostates.cat.categories

In [None]:
g.set_initial_states(states=['ISC'])

In [None]:
g.set_terminal_states(states=['D-cell (Sst+)',
       'L/I-cell (Glp1+/Cck+)', 'X-cell (Ghrl+)', 'EC (mature)', 'Tuft',
       'Enterocyte', 'Paneth'])
g.plot_macrostates(which="terminal", legend_loc="right", s=20)

In [None]:
g.plot_macrostates(which="terminal", discrete=False, legend_loc='none')

In [None]:
g.plot_coarse_T()

## Fate Probabilities

In [None]:
g.compute_fate_probabilities()
g.plot_fate_probabilities(same_plot=False, ncols=4)

In [None]:
g.plot_fate_probabilities(same_plot=False, ncols=4)

In [None]:
g.plot_fate_probabilities(same_plot=True, legend_loc='none')

In [None]:
g.terminal_states.cat.categories

In [None]:
adata = pseudotime_kernel.adata

In [None]:
adata.obs['fp_EEC_Ghrl'] = g.fate_probabilities['X-cell (Ghrl+)'].X.flatten()
adata.obs['fp_EEC_Glp1_Cck'] = g.fate_probabilities['L/I-cell (Glp1+/Cck+)'].X.flatten()
#adata.obs['fp_EEC_Gip'] = g.fate_probabilities['K-cell (Gip+)'].X.flatten()
adata.obs['fp_EEC_Sst'] = g.fate_probabilities['D-cell (Sst+)'].X.flatten()
adata.obs['fp_EC'] = g.fate_probabilities['EC (mature)'].X.flatten()
#adata.obs['fp_Goblet1'] = g.fate_probabilities['Goblet_1'].X.flatten()
adata.obs['fp_Paneth'] = g.fate_probabilities['Paneth'].X.flatten()
adata.obs['fp_Tuft'] = g.fate_probabilities['Tuft'].X.flatten()
adata.obs['fp_Enterocyte'] = g.fate_probabilities['Enterocyte'].X.flatten()

In [None]:
fp_EEC = [column for column in adata.obs.columns if (column.startswith('fp_EE')) | (column.startswith('fp_EC'))]
adata.obs['fp_EEC'] = adata.obs.loc[:,fp_EEC].sum(axis=1)

In [None]:
fp_EEC = [column for column in adata.obs.columns if (column.startswith('fp_EE')) | (column.startswith('fp_EC'))]
sc.pl.umap(adata, color=fp_EEC + ['cell_type_annotation_lv1'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, color_map='turbo', save = 'fp_EECs.png')

In [None]:
fp_GobPan= [column for column in adata.obs.columns if (column.startswith('fp_Pan')) | (column.startswith('fp_Go'))]
sc.pl.umap(adata, color=fp_GobPan + ['cell_type_annotation_lv1'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo', save = 'fp_paneth.png')

In [None]:
cr.pl.circular_projection(adata, keys=['cell_type_annotation_lv1'], legend_loc='right', lineage_order='optimal',legend_fontsize=10,label_distance= 1.1, title = '', figsize=(22,22) ,save = 'circ_proj_fates_opt_order.png')

In [None]:
cr.pl.circular_projection(adata, keys=['cell_type_annotation_lv1'], legend_loc='right', lineage_order='default',legend_fontsize=10,label_distance= 1.1, title = '', figsize=(22,22) ,save = 'circ_proj_fates_default_order.png')

In [None]:
cr.pl.circular_projection(adata, keys=['kl_divergence','phase','fp_EEC'], legend_loc='right', lineage_order='default', ncols=3, cmap='turbo',legend_fontsize=10, label_distance= 1.1, fontsize =15, figsize=(20,20) ,save = 'circ_proj_fates_phase_EEC_dive4rgence.png')

In [None]:
sc.pl.umap(adata, color=['init_states_fwd_probs', 'term_states_fwd_probs', 'lineages_fwd_kl_divergence'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo', save = 'umaps_fwd_probs.png')

In [None]:
fate_probability_keys = ['init_states_fwd_probs', 'term_states_fwd_probs', 'lineages_fwd_kl_divergence'] + [column for column in adata.obs.columns if column.startswith('fp_')] #'lineages_fwd_entropy']
fate_probability_keys

# Annotation Refinement

## CellRank-Based Annotation with Fate Probabilities

In [None]:
sc.pl.umap(adata, color=['phase','init_states_fwd_probs', 'term_states_fwd_probs', 'lineages_fwd_kl_divergence', "fp_EEC",'fp_Tuft','fp_Paneth','fp_Enterocyte','cell_type_annotation_lv1'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), cmap='turbo')

### Lineages

In [None]:
adata.obs['term_states_rev_probs'] = 1-adata.obs['term_states_fwd_probs']
sc.pl.umap(adata, color=['term_states_rev_probs','term_states_fwd_probs'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), cmap='turbo')

In [None]:
adata.obs['fp_EEC_bin']=pd.Categorical(list(map(str,list(#((adata.obs['fp_EEC']/adata.obs['fp_Goblet2'])>1) & 
                                                             ((adata.obs['fp_EEC']/adata.obs['fp_Paneth'])>0.6) & 
                                                             ((adata.obs['fp_EEC']/adata.obs['fp_Tuft'])>1) & 
                                                             ((adata.obs['fp_EEC']/adata.obs['fp_Enterocyte'])>0.9)
                                                            ))))

adata.obs['fp_Tuft_bin']=pd.Categorical(list(map(str,list(#((adata.obs['fp_Tuft']/adata.obs['fp_Goblet2'])>0.5) & 
                                                             ((adata.obs['fp_Tuft']/adata.obs['fp_Paneth'])>0.2) & 
                                                             ((adata.obs['fp_Tuft']/adata.obs['fp_EEC'])>=1) & #
                                                             ((adata.obs['fp_Tuft']/adata.obs['fp_Enterocyte'])>0.15)
                                                            ))))

'''adata.obs['fp_Goblet_bin']=pd.Categorical(list(map(str,list(((adata.obs['fp_Goblet2']/adata.obs['fp_Tuft'])>=1/0.5) & #
                                                             ((adata.obs['fp_Goblet2']/adata.obs['fp_Paneth2'])>0.5) & 
                                                             ((adata.obs['fp_Goblet2']/adata.obs['fp_EEC'])>=1) & #
                                                             ((adata.obs['fp_Goblet2']/adata.obs['fp_Enterocyte'])>1)
                                                            ))))'''

adata.obs['fp_Paneth_bin']=pd.Categorical(list(map(str,list(#((adata.obs['fp_Paneth2']/adata.obs['fp_Goblet2'])>=1/0.5) & #
                                                             ((adata.obs['fp_Paneth']/adata.obs['fp_Tuft'])>=10) & #5
                                                             ((adata.obs['fp_Paneth']/adata.obs['fp_EEC'])>=6) & #10
                                                             ((adata.obs['fp_Paneth']/adata.obs['fp_Enterocyte'])>4) #0.15
                                                            ))))

adata.obs['fp_Enterocyte_bin']=pd.Categorical(list(map(str,list(#((adata.obs['fp_Enterocyte']/adata.obs['fp_Goblet2'])>=1/1) & #
                                                             ((adata.obs['fp_Enterocyte']/adata.obs['fp_Paneth'])>=1/0.15) & #
                                                             ((adata.obs['fp_Enterocyte']/adata.obs['fp_EEC'])>=1/0.15) & #
                                                             ((adata.obs['fp_Enterocyte']/adata.obs['fp_Tuft'])>=1/0.15)
                                                            ))))

adata.obs['fp_ISC_bin']=pd.Categorical(list(map(str,list((adata.obs['term_states_rev_probs']>=0.785)& #0.76
                                                             ((adata.obs['init_states_fwd_probs'])>=0.8) 
                                                            ))))

In [None]:
cr.pl.circular_projection(adata, keys=['fp_EEC_bin','fp_Tuft_bin','fp_Paneth_bin','fp_Enterocyte_bin','fp_ISC_bin'], legend_loc="right", lineage_order='default', 
                          lineages=['X-cell (Ghrl+)', 'L/I-cell (Glp1+/Cck+)', 'D-cell (Sst+)', 'EC (mature)', 'Enterocyte', 'Tuft', 'Paneth'], ncols =5, legend_fontsize=10, label_distance= 1.1, fontsize =15, figsize=(20,20) ,save = 'circ_proj_fate_probabilities.png')
sc.pl.umap(adata, color=['fp_EEC_bin','fp_Tuft_bin','fp_Paneth_bin','fp_Enterocyte_bin','fp_ISC_bin'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols =5,save = 'umaps_fate_probabilities.png')

In [None]:
adata.obs['cr_lineage'] = 'NA'
for lineage in ['Enterocyte','Paneth','Tuft','EEC','ISC']:
    adata.obs.loc[adata.obs['fp_' + lineage + '_bin'] == 'True','cr_lineage'] = lineage

In [None]:
adata.obs['cr_lineage'].value_counts()

In [None]:
sc.pl.umap(adata, color=['cr_lineage'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols = 3, save = 'CR_lineage_only_pseudoitime.png')

In [None]:
plot_composition(adata, y_key='cell_type_annotation_lv1', x_key='cr_lineage', x_rotation=90)

In [None]:
adata.obs['cr_lineage'].cat.categories

In [None]:
adata.obs['cr_lineage']= adata.obs['cr_lineage'].cat.add_categories(['TA','Goblet'])

In [None]:
adata.obs['cr_lineage'].cat.categories

In [None]:
adata.obs.loc[(adata.obs['cr_lineage']=='NA') & (adata.obs['cell_type_annotation_lv1']=='early Enterocyte'),'cr_lineage'] = 'Enterocyte'
adata.obs.loc[(adata.obs['cr_lineage']=='NA') & (adata.obs['cell_type_annotation_lv1']=='Enterocyte'),'cr_lineage'] = 'Enterocyte'
adata.obs.loc[(adata.obs['cr_lineage']=='NA') & (adata.obs['cell_type_annotation_lv1']=='Goblet prog. (early)'),'cr_lineage'] = 'Goblet'
adata.obs.loc[(adata.obs['cr_lineage']=='NA') & (adata.obs['cell_type_annotation_lv1']=='Goblet prog. (late)'),'cr_lineage'] = 'Goblet'
adata.obs.loc[(adata.obs['cr_lineage']=='NA') & (adata.obs['cell_type_annotation_lv1']=='Goblet'),'cr_lineage'] = 'Goblet'
adata.obs.loc[(adata.obs['cr_lineage']=='NA') & (adata.obs['cell_type_annotation_lv1']=='Goblet/EEC prog. (early)'),'cr_lineage'] = 'Goblet'
adata.obs.loc[(adata.obs['cr_lineage']=='NA') & (adata.obs['cell_type_annotation_lv1']=='ISC'),'cr_lineage'] = 'ISC'
adata.obs.loc[(adata.obs['cr_lineage']=='NA') & (adata.obs['cell_type_annotation_lv1']=='TA'),'cr_lineage'] = 'TA'
adata.obs.loc[(adata.obs['cr_lineage']=='NA') & (adata.obs['cell_type_annotation_lv1']=='TA (prox.))'),'cr_lineage'] = 'TA'

In [None]:
adata.obs.loc[(adata.obs['cr_lineage']=='NA')  & (adata.obs['cell_type_annotation_lv1'].isin(['EEC prog. (late/Peptide)', 'EEC prog. (late/EC)', 'D-cell (Sst+)'])),'cr_lineage'] = 'EEC'
adata.obs.loc[(adata.obs['cr_lineage']=='NA')  & (adata.obs['cell_type_annotation_lv1'].isin(['Goblet-Paneth-like', 'Goblet-Paneth-like(cycling)'])),'cr_lineage'] = 'Goblet'

In [None]:
adata.obs['cr_lineage'] = adata.obs['cr_lineage'].cat.remove_unused_categories()

In [None]:
adata.obs['cr_lineage'].cat.categories

In [None]:
adata.obs['cr_lineage'].value_counts()

In [None]:
sc.pl.umap(adata, color=['cr_lineage'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0))

### Cell Types

In [None]:
annotation_key = 'cr_cell_type'
adata.obs[annotation_key] = 'NA'

lineage = 'EEC'

adata.obs['fp_' + lineage + '_states'] = (2*adata.obs['fp_' + lineage] + adata.obs['term_states_fwd_probs'])/3
adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),annotation_key] = pd.cut(adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),'fp_' + lineage + '_states'], bins=[-10, 0.4, 0.8, 0.9, 10], labels=[lineage+' prog. (early)',lineage+' prog. (mid)',lineage+' prog. (late)',lineage])
#sc.pl.umap(adata, color=['Neurog3','Sox4',annotation_key], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), layer='log_dca_counts', ncols=2)

'''lineage = 'Goblet'
adata.obs['fp_' + lineage + '_states'] = (3*adata.obs['fp_' + lineage] + adata.obs['term_states_fwd_probs'])/4
adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),annotation_key] = pd.cut(adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),'fp_' + lineage + '_states'], bins=[-10, 0.55, 0.6, 10], labels=[lineage+' prog. (early)',lineage+' prog. (late)',lineage])'''

lineage = 'Tuft'
adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),annotation_key] = pd.cut(adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),'fp_' + lineage], bins=[-10, 0.81, 0.85, 10], labels=[lineage+' prog. (early)',lineage+' prog. (late)',lineage])

lineage = 'Enterocyte'
adata.obs['fp_' + lineage + '_states'] = (2*adata.obs['fp_' + lineage] + adata.obs['term_states_fwd_probs'])/3
adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),annotation_key] = pd.cut(adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),'fp_' + lineage + '_states'], bins=[-10, 0.835, 0.92, 10], labels=[lineage+' prog. (early)',lineage+' prog. (late)',lineage])

lineage = 'Paneth'
adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),annotation_key] = pd.cut(adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),'fp_' + lineage], bins=[-10, 0, 10], labels=[lineage+' prog. (late)',lineage])

lineage = 'ISC'
adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),annotation_key] = pd.cut(adata.obs.loc[adata.obs.cr_lineage.isin([lineage]),'term_states_rev_probs'], bins=[-10, 0, 10], labels=[lineage+' prog. (late)',lineage])



In [None]:
sc.pl.umap(adata, color=[annotation_key], size=8, add_outline=True, alpha=1, outline_width=(0.3, 0.0), save = 'umap_cellrank_cell_types.png')
cr.pl.circular_projection(adata, keys=[annotation_key], legend_loc="right", lineage_order='default', 
                          lineages=['X-cell (Ghrl+)', 'L/I-cell (Glp1+/Cck+)','D-cell (Sst+)', 'EC (mature)', 'Enterocyte', 'Tuft', 'Paneth'], legend_fontsize=10, label_distance= 1.1, fontsize =15, figsize=(20,20) ,save = 'circ_proj_cellrank_cell_types.png')

In [None]:
adata 

In [None]:
adata.obs['pseudotime'] = adata.obs.loc[:,['palantir_pseudotime','dpt_pseudotime']].mean(axis=1)#, 'ct_pseudotime', 'sctour_pseudotime', 'latent_time']].mean(axis=1) #'sct_pseudotime'
adata.obs['pseudotime'] = adata.obs['pseudotime'] - min(adata.obs['pseudotime'])
adata.obs['pseudotime'] = adata.obs['pseudotime']/max(adata.obs['pseudotime'])

In [None]:
adata.obs['cr_cell_type'].cat.categories

In [None]:
# Step 2: Define the new category order
new_order = [
    'ISC', 'NA', 
    'Enterocyte prog. (early)', 'Enterocyte prog. (late)', 'Enterocyte', 
    'Tuft prog. (early)', 'Tuft prog. (late)', 'Tuft', 
    'EEC prog. (early)', 'EEC prog. (mid)', 'EEC prog. (late)', 'EEC',
    'Paneth'
]

# Step 3: Reorder the categories
adata.obs['cr_cell_type'] = adata.obs['cr_cell_type'].cat.reorder_categories(new_order, ordered=True)


In [None]:
with rc_context({'figure.figsize':(6,4)}):
    sc.pl.violin(adata, use_raw=False, keys=['pseudotime'], groupby='cr_cell_type', rotation=90, save = 'pseudotime_by_cr_cell_annop.png')

In [None]:
with rc_context({'figure.figsize':(5,3)}):
    s = 0
    for n in [3,7,10]:
        sc.pl.violin(adata, use_raw=False, keys=fate_probability_keys[s:n+1], groupby='cr_cell_type', rotation=90)
        s =n+1

In [None]:
comp = plot_composition(adata, x_key='cell_type_annotation_lv1', y_key='cr_cell_type', x_rotation=90)

In [None]:
comp = plot_composition(adata, y_key='cell_type_annotation_lv1', x_key='cr_cell_type', x_rotation=90)

## Combine Clustering and CellRank-Based Annotations

In [None]:
sc.pl.umap(adata, color=['cell_type_annotation_lv1','cr_cell_type'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), wspace=1, layer='log_dca_counts')

### Cell Type Fine with EEC Subtypes

In [None]:
adata.obs['cell_type_fine_eec'] = list(adata.obs['cell_type_annotation_lv1'])

In [None]:
adata.obs.loc[(adata.obs['cell_type_fine_eec'].isin(['EEC prog. (mid)','Goblet/EEC prog. (early)','ISC','Tuft prog. (early)'])) & 
              (~adata.obs['cr_cell_type'].isin(['Enterocyte prog. (early)'])),'cell_type_fine_eec'] = list(adata.obs.loc[(adata.obs['cell_type_fine_eec'].isin(['EEC prog. (mid)','Goblet/EEC prog. (early)','ISC','Tuft prog. (early)'])) & 
              (~adata.obs['cr_cell_type'].isin(['Enterocyte prog. (early)'])),'cr_cell_type'])

In [None]:
adata.obs.loc[(adata.obs['cell_type_fine_eec'].isin(['Enterocyte'])) & 
              (adata.obs['cr_cell_type'].isin(['Enterocyte prog. (early)','ISC'])),'cell_type_fine_eec'] = 'Enterocyte prog. (TA)'

In [None]:
sc.pl.umap(adata, color=['cell_type_annotation_lv1','cr_cell_type','cell_type_fine_eec'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), layer='log_dca_counts', wspace=1)

In [None]:
adata.obs['cell_type_fine_eec'] = adata.obs['cell_type_fine_eec'].cat.reorder_categories(['ISC', 'EEC prog. (early)', 'EEC prog. (mid)', 'EEC prog. (late/EC)','EC (immature)','EC (mature)', 'EEC prog. (late/Peptide)'
                                                                              ,'X-cell (Ghrl+)','L/I-cell (Glp1+/Cck+)','L-cell (Glp1+)', 'K-cell (Gip+)', 'D-cell (Sst+)',
                                                                         'Goblet prog. (early)', 'Goblet prog. (late)', 'Goblet', 'Paneth', 'Tuft prog. (early)', 'Tuft prog. (late)', 'Tuft', 'Enterocyte prog. (TA)', 'Enterocyte' ])

In [None]:
adata.uns['cell_type_fine_eec_colors'] = [
'#d0d0d0', #ISC
'#b0b8d4', #EEC prog. (early)
'#8ca1d7', #EEC prog. (mid)
'#a08ab8',# 'EEC prog. (late/EC)',
'#ac7199',# 'EC (immature)',
'#b2577c',# 'EC (mature)',
'#75afbd',#'#6c99ab',# 'EEC prog. (late/Peptide)',
'#52c471',#'#488f81',# 'X-cell (Ghrl+)',
'#00b297',# 'L/I-cell (Glp1+/Cck+)',
'#2994ba',# 'L-cell (Glp1+)',
'#0076dc',# 'K-cell (Gip+)',
'#5e6ac9',# 'D-cell (Sst+)',
'#eab694', #Goblet prog. (early)
'#f89b58', #Goblet prog. (late)
'#ff7f00', #Goblet
'#984ea3', # Paneth
'#e4a091', #Tuft prog. (early)
'#e96b56', #Tuft prog. (late)
'#e41a1c', #Tuft
'#86bd77', #Enterocyte prog. (TA) 
'#00a704' #Enterocyte
]

In [None]:
sc.pl.umap(adata, color=['cell_type_fine_eec'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), layer='log_dca_counts', wspace=1)

### Cell Type Fine

In [None]:
cluster_anno = dict()
for cat in adata.obs['cell_type_fine_eec'].cat.categories:

    cluster_anno[str(cat)] = cat


cluster_anno

In [None]:
cluster_anno = {'ISC': 'ISC',
 'EEC prog. (early)': 'EEC prog. (early)',
 'EEC prog. (mid)': 'EEC prog. (mid)',
 'EEC prog. (late/EC)': 'EEC prog. (late)',
 'EC (immature)': 'EEC',
 'EC (mature)': 'EEC',
 'EEC prog. (late/Peptide)': 'EEC prog. (late)',
 'X-cell (Ghrl+)': 'EEC',
 'L/I-cell (Glp1+/Cck+)': 'EEC',
 'L-cell (Glp1+)': 'EEC',
 'K-cell (Gip+)': 'EEC',
 'D-cell (Sst+)': 'EEC',
 'Goblet prog. (early)': 'Goblet prog. (early)',
 'Goblet prog. (late)': 'Goblet prog. (late)',
 'Goblet': 'Goblet',
 'Paneth': 'Paneth',
 'Tuft prog. (early)': 'Tuft prog. (early)',
 'Tuft prog. (late)': 'Tuft prog. (late)',
 'Tuft': 'Tuft',
 'Enterocyte prog. (TA)': 'Enterocyte prog. (TA)',
 'Enterocyte': 'Enterocyte'}

In [None]:
adata.obs['cell_type_fine'] = adata.obs.cell_type_fine_eec.astype("str")
adata.obs.cell_type_fine = adata.obs.cell_type_fine.map(cluster_anno).astype("category")

In [None]:
adata.obs['cell_type_fine'] = adata.obs['cell_type_fine'].cat.reorder_categories(['ISC', 'EEC prog. (early)', 'EEC prog. (mid)', 'EEC prog. (late)','EEC',
                                                                         'Goblet prog. (early)', 'Goblet prog. (late)', 'Goblet', 'Paneth', 'Tuft prog. (early)', 'Tuft prog. (late)', 'Tuft', 'Enterocyte prog. (TA)', 'Enterocyte' ])

In [None]:
adata.uns['cell_type_fine_colors'] = [
    '#d0d0d0', #ISC
    '#b0b8d4', #EEC prog. (early)
    '#8ca1d7', #EEC prog. (mid)
    '#618bda', #EEC prog. (late)
    '#0076dc', #EEC
    '#eab694', #Goblet prog. (early)
    '#f89b58', #Goblet prog. (late)
    '#ff7f00', #Goblet
    '#984ea3', # Paneth
    '#e4a091', #Tuft prog. (early)
    '#e96b56', #Tuft prog. (late)
    '#e41a1c', #Tuft
    '#86bd77', #Enterocyte prog. (TA) 
    '#00a704' #Enterocyte
    
]

In [None]:
sc.pl.umap(adata, color=['cell_type_fine'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), layer='log_dca_counts', wspace=1)

### Cell Type

In [None]:
cluster_anno = dict()
for cat in adata.obs['cell_type_fine'].cat.categories:

    cluster_anno[str(cat)] = cat


cluster_anno

In [None]:
cluster_anno = {'ISC': 'ISC',
 'EEC prog. (early)': 'EEC prog.',
 'EEC prog. (mid)': 'EEC prog.',
 'EEC prog. (late)': 'EEC prog.',
 'EEC': 'EEC',
 'Goblet prog. (early)': 'Goblet prog.',
 'Goblet prog. (late)': 'Goblet prog.',
 'Goblet': 'Goblet',
 'Paneth': 'Paneth',
 'Tuft prog. (early)': 'Tuft prog.',
 'Tuft prog. (late)': 'Tuft prog.',
 'Tuft': 'Tuft',
 'Enterocyte prog. (TA)': 'Enterocyte prog. (TA)',
 'Enterocyte': 'Enterocyte'}

In [None]:
adata.obs['cell_type'] = adata.obs.cell_type_fine.astype("str")
adata.obs.cell_type = adata.obs.cell_type.map(cluster_anno).astype("category")

In [None]:
adata.obs['cell_type'] = adata.obs['cell_type'].cat.reorder_categories(['ISC', 'EEC prog.','EEC',
                                                                         'Goblet prog.', 'Goblet', 'Paneth', 'Tuft prog.', 'Tuft', 'Enterocyte prog. (TA)', 'Enterocyte' ])

In [None]:
adata.uns['cell_type_colors'] = [
    '#d0d0d0', #ISC
    '#8ca1d7', #EEC prog.
    '#0076dc', #EEC
    '#f2a976', #Goblet prog.
    '#ff7f00', #Goblet
    '#984ea3', # Paneth
    '#e88673', #Tuft prog.
    '#e41a1c', #Tuft
    '#86bd77', #Enterocyte prog. (TA) 
    '#00a704' #Enterocyte
    
]

In [None]:
sc.pl.umap(adata, color=['cell_type'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), layer='log_dca_counts', wspace=1)

### Cell Type with Early Prog.

In [None]:
cluster_anno = dict()
for cat in adata.obs['cell_type_fine'].cat.categories:

    cluster_anno[str(cat)] = cat


cluster_anno

In [None]:
cluster_anno = {'ISC': 'ISC',
 'EEC prog. (early)': 'Early EEC/Goblet prog.',
 'EEC prog. (mid)': 'EEC prog.',
 'EEC prog. (late)': 'EEC prog.',
 'EEC': 'EEC',
 'Goblet prog. (early)': 'Early EEC/Goblet prog.',
 'Goblet prog. (late)': 'Goblet prog.',
 'Goblet': 'Goblet',
 'Paneth': 'Paneth',
 'Tuft prog. (early)': 'Tuft prog.',
 'Tuft prog. (late)': 'Tuft prog.',
 'Tuft': 'Tuft',
 'Enterocyte prog. (TA)': 'Enterocyte prog. (TA)',
 'Enterocyte': 'Enterocyte'}

In [None]:
adata.obs['cell_type_prog'] = adata.obs.cell_type_fine.astype("str")
adata.obs.cell_type_prog = adata.obs.cell_type_prog.map(cluster_anno).astype("category")

In [None]:
adata.obs['cell_type_prog'] = adata.obs['cell_type_prog'].cat.reorder_categories(['ISC', 'Early EEC/Goblet prog.', 'EEC prog.','EEC',
                                                                         'Goblet prog.', 'Goblet', 'Paneth', 'Tuft prog.', 'Tuft', 'Enterocyte prog. (TA)', 'Enterocyte' ])

In [None]:
adata.uns['cell_type_prog_colors'] = [
    '#d0d0d0', #ISC
    '#d0b7b4', #Early EEC/Goblet prog.
    '#8ca1d7', #EEC prog.
    '#0076dc', #EEC
    '#f2a976', #Goblet prog.
    '#ff7f00', #Goblet
    '#984ea3', # Paneth
    '#e88673', #Tuft prog.
    '#e41a1c', #Tuft
    '#86bd77', #Enterocyte prog. (TA) 
    '#00a704' #Enterocyte
    
]

In [None]:
sc.pl.umap(adata, color=['cell_type_prog'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), layer='log_dca_counts', wspace=1)

### Cell Type Broad

In [None]:
cluster_anno = dict()
for cat in adata.obs['cell_type'].cat.categories:

    cluster_anno[str(cat)] = cat


cluster_anno

In [None]:
cluster_anno = {'ISC': 'ISC',
 'EEC prog.': 'EEC',
 'EEC': 'EEC',
 'Goblet prog.': 'Goblet',
 'Goblet': 'Goblet',
 'Paneth': 'Paneth',
 'Tuft prog.': 'Tuft',
 'Tuft': 'Tuft',
 'Enterocyte prog. (TA)': 'Enterocyte',
 'Enterocyte': 'Enterocyte'}

In [None]:
adata.obs['cell_type_broad'] = adata.obs.cell_type.astype("str")
adata.obs.cell_type_broad = adata.obs.cell_type_broad.map(cluster_anno).astype("category")

In [None]:
adata.obs['cell_type_broad'] = adata.obs['cell_type_broad'].cat.reorder_categories(['ISC', 'EEC', 'Goblet', 'Paneth', 'Tuft', 'Enterocyte' ])

In [None]:
adata.uns['cell_type_broad_colors'] = [
    '#d0d0d0', #ISC
    '#0076dc', #EEC
    '#ff7f00', #Goblet
    '#984ea3', # Paneth
    '#e41a1c', #Tuft
    '#00a704' #Enterocyte
    
]

In [None]:
sc.pl.umap(adata, color=['cell_type_broad'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), layer='log_dca_counts', wspace=1)

# Final Embedding

In [None]:
sc.tl.paga(adata, groups='cr_cell_type')
with rc_context({'figure.figsize':(8,9)}):
    sc.pl.paga(adata)

In [None]:
sc.tl.umap(adata, min_dist=0.3, spread=1, negative_sample_rate=1, gamma=0.25, init_pos='paga')
sc.pl.umap(adata, color=['cr_cell_type'], size=7, add_outline=True, alpha=0.7, outline_width=(0.3, 0.0), ncols=4, color_map=mymap)

In [None]:
X_umap = adata.obsm['X_umap'].copy()

In [None]:
adata.obsm['X_umap'][:,0] = -X_umap[:,1]
adata.obsm['X_umap'][:,1] = X_umap[:,0]

In [None]:
sc.pl.umap(adata, color=['cell_type_fine_eec'], size=7, add_outline=True, alpha=0.7, outline_width=(0.3, 0.0), ncols=4, color_map=mymap)

In [None]:
adata.obsm['X_umap_1'] = adata.obsm['X_umap'].copy()

In [None]:
x = [-1.8967831515, -0.6309303456,  0.3827609341,  0.9099140789,
        2.6775622968, 3.7106613777,  1.8314349218,  2.3410250773,
        3.5740502873,  2.9918378132,  2.8622651778,  3.8019963112,
       -0.9893851645, -0.1098106305,  0.2936851902,  0.8256345188,
       2.4014849551, 4.0493468353, 5.3015194184, -2.8193176011,
       -4.7982385463]
y = [n*-1 for n in [ 2.9076160547,  2.1498607138,  0.2270826627, -1.2992813705,
       0.0251587383,  0.0742913744, -1.4369549157, -2.7514030451,
       -3.4287473711, -2.4896041397, -1.7163397366, -2.1320511791,
        3.1825299293,  4.0235521491,  4.9837549759,  6.3674694719,
        1.5360984981,  1.2070121067,  0.9830056655,  4.4088735203,
        6.7755531783]]

In [None]:
sb.scatterplot(x=x, y=y)

In [None]:
init_pos = np.array([x, y]).T

In [None]:
adata.uns['paga']['pos'] = init_pos

# Save adata

In [None]:
combined_kernel.write_to_adata()

In [None]:
combined_kernel.write(fname='_'.join(['/'.join([file_path,file_base_name]),'adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_combined.plk']), write_adata=True)

In [None]:
adata.write('adata_markedDoublets_normalized_initialAnno_rmDoublets_integrated_imputed_annotatedRefined.h5ad')

In [None]:
adata = sc.read('adata_markedDoublets_normalized_initialAnno_rmDoublets_integrated_imputed_annotatedRefined.h5ad')