In [None]:
import scvelo as scv
import scanpy as sc
import cellrank as cr
import numpy as np
import pandas as pd
import anndata as ad


In [None]:
scv.settings.verbosity = 3
scv.settings.set_figure_params('scvelo', facecolor='white', dpi=100, frameon=False,
                              figsize = [5,5])
scv.settings.presenter_view = True  # set max width size for presenter view
scv.settings.set_figure_params('scvelo')  # for beautified visualization

In [None]:
input_path = '/project/gca/yuzhao1/work/final_RC2rna/velocity/epithelial/subset_pouch/'
input_path_raw = '/project/gca/yuzhao1/work/final_RC2rna/velocity/epithelial/'

In [None]:
######################## part 1: process ##########################

In [None]:
adata = sc.read_h5ad(input_path_raw+'adata_input.h5ad')
# add metadata and visualize
cell_meta = pd.read_csv(input_path_raw+"metadata.csv")
adata.obs['anno1'] = cell_meta['anno1'].values
adata = adata[adata.obs['biopsy_location'].isin(['POU'])]

In [None]:
import pickle
filename = input_path_raw+'loomdata_union.pkl'
loomdata_union = pickle.load(open(filename, 'rb'))

In [None]:
adata = scv.utils.merge(adata, loomdata_union)

In [None]:
adata

In [None]:
# plot umap to check
sc.pl.umap(adata, color='anno1', frameon=False, legend_loc='on data', title='', save=False)

In [None]:
scv.pl.proportions(adata, groupby='anno1')

In [None]:
# pre-process, pre-filter all tiny noises
scv.pp.filter_genes(adata, min_shared_counts=10)

# normalize X, spliced and unspliced
scv.pp.normalize_per_cell(adata, enforce=True) 

# get highly variable genes
scv.pp.filter_genes_dispersion(adata, n_top_genes=2000)

# only log the X
scv.pp.log1p(adata)

adata

In [None]:
adata.obsm['X_pca']

In [None]:
# use a copy to calculate harmony_pca in this subset
# using subset because otherwise the X will be subset by variable genes
tempann = adata.copy()
#sc.pp.highly_variable_genes(tempann, min_mean=0.0125, max_mean=3, min_disp=0.5)
#sc.pl.highly_variable_genes(tempann)
#tempann = tempann[:, tempann.var.highly_variable]
sc.pp.regress_out(tempann, ['nCount_RNA', 'percent.mt', 'CC.Difference'])
sc.pp.scale(tempann, max_value=10)
sc.tl.pca(tempann, svd_solver='arpack', n_comps=50)
import scanpy.external as sce
sce.pp.harmony_integrate(tempann, 'Patient_ID')
adata.obsm['X_pca'] = tempann.obsm['X_pca_harmony']
adata.obsm['X_pca']

In [None]:
# necessary because I have transfer it from seurat
sc.pp.neighbors(adata, n_neighbors=20, use_rep='X_pca')

scv.pp.moments(adata, use_rep='X_pca', n_pcs=50, 
               n_neighbors=20, use_highly_variable=True)

In [None]:
######################## part 2.1: Run dynamical model ##########################

In [None]:
# dynamics model requires to run scv.tl.recover_dynamics(adata, **params) beforehand
scv.tl.recover_dynamics(adata, n_jobs=4, n_top_genes=2000, fit_basal_transcription = True)

In [None]:
# compute velocity
scv.tl.velocity(adata, mode='dynamical', filter_genes = False)


!jupyter nbextension enable --py widgetsnbextension

scv.tl.velocity_graph(adata, n_neighbors=20, n_jobs=4)

scv.settings.set_figure_params('scvelo', facecolor='white', dpi=100, frameon=False,
                              figsize = [5,5])

In [None]:
scv.pl.velocity_embedding_grid(adata, basis='umap', color='anno1', 
                               save=False, title='', scale=0.2)

In [None]:
scv.pl.velocity_embedding_stream(adata, basis='umap', color=['anno1'], 
                                 save=False, title='')

In [None]:
# save dataset as anndata format
adata.write(input_path+'adata_dynamical_output.h5ad')

In [None]:
######################## part 2.2: Run stochastic model ##########################

In [None]:
scv.tl.velocity(adata, mode='stochastic')

scv.tl.velocity_graph(adata)

scv.settings.set_figure_params('scvelo', facecolor='white', dpi=100, frameon=False,
                              figsize = [5,5])

In [None]:
scv.pl.velocity_embedding_grid(adata, basis='umap', color='anno1', 
                               save=False, title='', scale=0.2)

In [None]:
scv.pl.velocity_embedding_stream(adata, basis='umap', color=['anno1'], 
                                 save=False, title='')

In [None]:
######################## part 3: visualize ##########################

In [None]:
# reload dynamical data
adata = sc.read_h5ad(input_path+'adata_dynamical_output.h5ad')

In [None]:
scv.settings.set_figure_params('scvelo', facecolor='white', dpi=100, frameon=False,
                              figsize = [5,5])

In [None]:
scv.pl.velocity_embedding_grid(adata, basis='umap', color='anno1', 
                               save=False, title='', scale=0.2)

In [None]:
scv.pl.velocity_embedding_stream(adata, basis='umap', color=['anno1'], 
                                 save=False, title='')

In [None]:
############### part 4: downstream analysis ###############

In [None]:
df = adata.var
df = df[(df['fit_likelihood'] > .1) & df['velocity_genes'] == True]

kwargs = dict(xscale='log', fontsize=16)
with scv.GridSpec(ncols=3) as pl:
    pl.hist(df['fit_alpha'], xlabel='transcription rate', **kwargs)
    pl.hist(df['fit_beta'] * df['fit_scaling'], xlabel='splicing rate', xticks=[.1, .4, 1], **kwargs)
    pl.hist(df['fit_gamma'], xlabel='degradation rate', xticks=[.1, .4, 1], **kwargs)

scv.get_df(adata, 'fit*', dropna=True).head()

In [None]:
scv.tl.rank_velocity_genes(adata, groupby='anno1', min_corr=.3)

In [None]:
df = scv.DataFrame(adata.uns['rank_velocity_genes']['names'])
df.head(20)

In [None]:
scv.tl.latent_time(adata)
scv.pl.scatter(adata, color='latent_time', color_map='gnuplot', size=80)

In [None]:
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:300]
scv.pl.heatmap(adata, var_names=top_genes, sortby='latent_time', col_color='anno1', n_convolve=100)

In [None]:
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index
scv.pl.scatter(adata, color='anno1', basis=top_genes[:15], ncols=5, frameon=False)

In [None]:
scv.pl.velocity(adata, ['LGR5',  'SATB2', 'APOA4', 'CA2'], ncols=2)

In [None]:
############### part 5: differential kinetics ###############

In [None]:
# scv.tl.velocity(adata, diff_kinetics=False, mode='dynamical')
scv.tl.velocity(adata, diff_kinetics=False, mode='stochastic')
# scv.tl.velocity(adata, diff_kinetics=True, mode='stochastic', groupby='anno1',
#                groups = ['Goblet1', 'Goblet2'])

In [None]:
scv.tl.velocity_graph(adata)

In [None]:
scv.pl.velocity_embedding_stream(adata, basis='umap', color=['anno1'], 
                                 save=False, title='')

In [None]:
scv.tl.rank_velocity_genes(adata, groupby='anno1', min_corr=.3)

In [None]:
df = scv.DataFrame(adata.uns['rank_velocity_genes']['names'])
df.head(5)

In [None]:
scv.tl.latent_time(adata)
scv.pl.scatter(adata, color='latent_time', color_map='gnuplot', size=80)

In [None]:
scv.pl.velocity(adata, ['LGR5',  'SATB2', 'APOA4', 'CA2'], ncols=2)

In [None]:
############### part 6: cellrank ###############
# not suitable for our dataset, they lack a general adaptability

In [None]:
import cellrank as cr

In [None]:
cr.tl.terminal_states(adata, cluster_key="anno1", weight_connectivities=0.2)

In [None]:
cr.pl.terminal_states(adata)

In [None]:
cr.tl.initial_states(adata, cluster_key="anno1")

In [None]:
cr.pl.initial_states(adata, discrete=True)

In [None]:
cr.tl.lineage_drivers(adata)