# RNA velocity

Using scvelo.

# 0. Loading the libraries

In [1]:
import scanpy as sc
import matplotlib.pyplot as plt
import scvelo as scv

import multiprocessing
multiprocessing.cpu_count()

import cellrank as cr
import warnings

warnings.simplefilter("ignore", category=UserWarning)
warnings.simplefilter("ignore", category=FutureWarning)
warnings.simplefilter("ignore", category=DeprecationWarning)

import pickle

def save_object(obj, filename):
    with open(filename, 'wb') as outp:  # Overwrites any existing file.
        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)

In [None]:
# Set up constants
save_folder = "figures/"
objects_folder = "saved_objects/"
sc.settings.figdir = './'+save_folder

plt.rcParams['figure.figsize']=(8,8) #rescale figures
sc.settings.verbosity = 3

-----
anndata     0.8.0
scanpy      1.9.1
-----
PIL                                         9.1.0
aa8f2297d25b4dc6fd3d98411eb3ba53823c4f42    NA
absl                                        NA
adjustText                                  NA
anndata2ri                                  1.0.6
appdirs                                     1.4.4
asttokens                                   NA
astunparse                                  1.6.3
attr                                        21.4.0
backcall                                    0.2.0
backports                                   NA
batchglm                                    v0.7.4
beta_ufunc                                  NA
binom_ufunc                                 NA
bioinfokit                                  2.0.8
bioservices                                 1.8.4
boto3                                       1.23.1
botocore                                    1.26.1
brotli                                      1.0.9
bs4                

# 1. Both

In [None]:
with open('saved_objects/adata_annotated.pkl', 'rb') as inp:
    adata = pickle.load(inp)

In [None]:
scv.pl.proportions(adata, groupby="sample2", save="splice_props.png")

# 2. DMSO

In [None]:
COND = 'DMSO'
OTHER_COND = 'N6'

In [None]:
with open('saved_objects/adata_annotated.pkl', 'rb') as inp:
    adata = pickle.load(inp)

adata = adata[adata.obs.cond != OTHER_COND]

In [None]:
sc.tl.pca(adata, random_state=0)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30, random_state=0)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)

computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:12)
computing neighbors
    using 'X_pca' with n_pcs = 30
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:39)
Normalized count data: spliced, unspliced.
computing moments based on connectivities
    finished (0:00:29) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)


In [None]:
scv.tl.recover_dynamics(adata, n_top_genes = 4000, n_jobs=40)

recovering dynamics (using 40/40 cores)


  0%|          | 0/2143 [00:00<?, ?gene/s]

    finished (0:09:46) --> added 
    'fit_pars', fitted parameters for splicing dynamics (adata.var)


In [None]:
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:1000]
scv.tl.differential_kinetic_test(adata, var_names=top_genes, groupby='tp')

testing for differential kinetics
    finished (0:13:12) --> added 
    'fit_diff_kinetics', clusters displaying differential kinetics (adata.var)
    'fit_pvals_kinetics', p-values of differential kinetics (adata.var)


In [None]:
scv.tl.velocity(adata, mode = "dynamical", groupby="tp", diff_kinetics=True)
scv.tl.velocity_graph(adata, n_jobs=40)

computing velocities
    finished (0:02:01) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph (using 40/40 cores)


  0%|          | 0/18064 [00:00<?, ?cells/s]

    finished (0:01:59) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)


In [None]:
save_object(adata, "saved_objects/velocity"+"_"+COND+".pkl")

In [None]:
plt.rcParams['figure.figsize']=(8,8) #rescale figures

In [None]:
scv.pl.velocity_embedding_stream(adata, basis='umap', color=["tp", "Il10_positive"], legend_loc = "right margin", 
                                show=False, save="embed_umap"+"_"+COND+".png", min_mass=3, smooth = 0.5)

computing velocity embedding
    finished (0:00:13) --> added
    'velocity_umap', embedded velocity vectors (adata.obsm)
saving figure to file ./figures/scvelo_embed_umap_DMSO.png


[<AxesSubplot:title={'center':'tp'}>,
 <AxesSubplot:title={'center':'Il10 positive'}>]

In [None]:
genes = ["Il10", "Mki67", "Myc"]
scv.pl.velocity(adata[adata.obs['cond'] != OTHER_COND], genes, ncols=1, size=100, figsize = (15, 15), show=False, save="velo_plot"+"_"+COND+".png")

saving figure to file ./figures/scvelo_velo_plot_DMSO.png


<AxesSubplot:title={'center':'expression'}>

In [None]:
scv.tl.velocity_confidence(adata)
keys = 'velocity_length', 'velocity_confidence'
scv.pl.scatter(adata, c=keys, cmap='coolwarm', perc=[5, 95], show=False, save="velo_conf"+"_"+COND+".png")

df = adata.obs.groupby("tp")[keys].mean().T
df.style.background_gradient(cmap='coolwarm', axis=1)

--> added 'velocity_length' (adata.obs)
--> added 'velocity_confidence' (adata.obs)
--> added 'velocity_confidence_transition' (adata.obs)
saving figure to file ./figures/scvelo_velo_conf_DMSO.png


tp,0h,2h,24h,48h,d7
velocity_length,6.761059,6.401182,7.294352,6.913974,7.405513
velocity_confidence,0.772643,0.766919,0.839478,0.800936,0.790394


In [None]:
scv.tl.velocity_pseudotime(adata, 
    root_key = adata[adata.obs["tp"] == "0h"].obs_names.tolist()[0], 
    end_key = adata[(adata.obs["tp"] == "d7") & (adata.obs["Il10_positive"] == "Il10+")].obs_names.tolist()[0])
scv.pl.scatter(adata, color='velocity_pseudotime', cmap='gnuplot', size=80, show=False, save="pseudo_time"+"_"+COND+".png")

saving figure to file ./figures/scvelo_pseudo_time_DMSO.png


<AxesSubplot:title={'center':'velocity pseudotime'}>

In [None]:
save_object(adata, "saved_objects/adata_cr_prepro"+"_"+COND+".pkl")

# 3. N6

In [None]:
COND = 'N6'
OTHER_COND = 'DMSO'

In [None]:
with open('saved_objects/adata_annotated.pkl', 'rb') as inp:
    adata = pickle.load(inp)

adata = adata[adata.obs.cond != OTHER_COND]

In [None]:
sc.tl.pca(adata, random_state=0)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30, random_state=0)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)

computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:12)
computing neighbors
    using 'X_pca' with n_pcs = 30
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:39)
Normalized count data: spliced, unspliced.
computing moments based on connectivities
    finished (0:00:29) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)


In [None]:
scv.tl.recover_dynamics(adata, n_top_genes = 4000, n_jobs=40)

recovering dynamics (using 40/40 cores)


  0%|          | 0/2143 [00:00<?, ?gene/s]

    finished (0:09:46) --> added 
    'fit_pars', fitted parameters for splicing dynamics (adata.var)


In [None]:
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:1000]
scv.tl.differential_kinetic_test(adata, var_names=top_genes, groupby='tp')

testing for differential kinetics
    finished (0:13:12) --> added 
    'fit_diff_kinetics', clusters displaying differential kinetics (adata.var)
    'fit_pvals_kinetics', p-values of differential kinetics (adata.var)


In [None]:
scv.tl.velocity(adata, mode = "dynamical", groupby="tp", diff_kinetics=True)
scv.tl.velocity_graph(adata, n_jobs=40)

computing velocities
    finished (0:02:01) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph (using 40/40 cores)


  0%|          | 0/18064 [00:00<?, ?cells/s]

    finished (0:01:59) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)


In [None]:
save_object(adata, "saved_objects/velocity"+"_"+COND+".pkl")

In [None]:
plt.rcParams['figure.figsize']=(8,8) #rescale figures

In [None]:
scv.pl.velocity_embedding_stream(adata, basis='umap', color=["tp", "Il10_positive"], legend_loc = "right margin", 
                                show=False, save="embed_umap"+"_"+COND+".png", min_mass=3, smooth = 0.5)

computing velocity embedding
    finished (0:00:13) --> added
    'velocity_umap', embedded velocity vectors (adata.obsm)
saving figure to file ./figures/scvelo_embed_umap_DMSO.png


[<AxesSubplot:title={'center':'tp'}>,
 <AxesSubplot:title={'center':'Il10 positive'}>]

In [None]:
genes = ["Il10", "Mki67", "Myc"]
scv.pl.velocity(adata[adata.obs['cond'] != OTHER_COND], genes, ncols=1, size=100, figsize = (15, 15), show=False, save="velo_plot"+"_"+COND+".png")

saving figure to file ./figures/scvelo_velo_plot_DMSO.png


<AxesSubplot:title={'center':'expression'}>

In [None]:
scv.tl.velocity_confidence(adata)
keys = 'velocity_length', 'velocity_confidence'
scv.pl.scatter(adata, c=keys, cmap='coolwarm', perc=[5, 95], show=False, save="velo_conf"+"_"+COND+".png")

df = adata.obs.groupby("tp")[keys].mean().T
df.style.background_gradient(cmap='coolwarm', axis=1)

--> added 'velocity_length' (adata.obs)
--> added 'velocity_confidence' (adata.obs)
--> added 'velocity_confidence_transition' (adata.obs)
saving figure to file ./figures/scvelo_velo_conf_DMSO.png


tp,0h,2h,24h,48h,d7
velocity_length,6.761059,6.401182,7.294352,6.913974,7.405513
velocity_confidence,0.772643,0.766919,0.839478,0.800936,0.790394


In [None]:
scv.tl.velocity_pseudotime(adata, 
    root_key = adata[adata.obs["tp"] == "0h"].obs_names.tolist()[0], 
    end_key = adata[(adata.obs["tp"] == "d7") & (adata.obs["Il10_positive"] == "Il10+")].obs_names.tolist()[0])
scv.pl.scatter(adata, color='velocity_pseudotime', cmap='gnuplot', size=80, show=False, save="pseudo_time"+"_"+COND+".png")

saving figure to file ./figures/scvelo_pseudo_time_DMSO.png


<AxesSubplot:title={'center':'velocity pseudotime'}>

In [None]:
save_object(adata, "saved_objects/adata_cr_prepro"+"_"+COND+".pkl")