# scVelo

In [None]:
#load packages
import scvelo as scv
import pandas as pd
import scanpy as sc
import seaborn as sns
import cellrank as cr
import scvelo as scv
import numpy as np
import matplotlib.pyplot as plt

scv.settings.verbosity = 3
scv.settings.set_figure_params("scvelo")
cr.settings.verbosity = 2

In [None]:
#load the loom files for the 3 samples
#velocyto output combined with the cellranger output
EW1 = scv.read('EW1-spliced.h5ad', cache=True)
EW2 = scv.read('EW2-spliced.h5ad', cache=True)
EW3 = scv.read('EW3-spliced.h5ad', cache=True)

The barcodes saved in these files are different to the ones in seurat.

Need to add '-1' to the end of each plus the '_1' for sample 1, '_2' for sample 2, and '_3' for sample 3 so that the splice information will match up to the correct cell in the seurat object and the clustering and cluster identities can be consistent.

In [None]:
#check what the current barcodes look like
barcodes = EW1.obs.index.tolist()
barcodes

In [None]:
#for each barcode in the list of barcodes from sample 1 add '-1_1'
barcodes = [bc[0:len(bc)] + '-1_1' for bc in barcodes]
barcodes

In [None]:
#make this new list of barcodes the index for sample 1
EW1.obs.index = barcodes

In [None]:
#repeat for sample 2
barcodes = EW2.obs.index.tolist()
barcodes = [bc[0:len(bc)] + '-1_2' for bc in barcodes]
EW2.obs.index = barcodes

#repeat for sample3
barcodes = EW3.obs.index.tolist()
barcodes = [bc[0:len(bc)] + '-1_3' for bc in barcodes]
EW3.obs.index = barcodes

In [None]:
#check that the index list has been correctly updated
EW3.obs.index

In [None]:
# make variable names unique
EW1.var_names_make_unique()
EW2.var_names_make_unique()
EW3.var_names_make_unique()

In [None]:
# concatenate the three loom
ldata = EW1.concatenate([EW2, EW3])

In [None]:
ldata

In [None]:
ldata.obs.index

In [None]:
#merging the three loom files has added a new code to the end to match 'batch' numbers
#remove so that the barcodes still match the seurat info
barcodes = ldata.obs.index.tolist()
barcodes = [bc[0:len(bc)-2] for bc in barcodes]
ldata.obs.index = barcodes

In [None]:
ldata.obs.index

In [None]:
ldata.obs.batch

In [None]:
#load the seurat data that has been converted into .h5ad
seurat_combined = scv.read('harmony_merged.h5ad', cache=True)

In [None]:
#check all of the metadata info has been copied
seurat_combined

In [None]:
# merge matrices into the original adata object
adata = scv.utils.merge(seurat_combined, ldata)

In [None]:
adata

In [None]:
adata.obs.clusters = adata.obs.clusters.astype('category')
# plot umap to check
sc.pl.umap(adata, color='clusters', frameon=False, legend_loc='on data', title='')

In [None]:
scv.pl.proportions(adata, groupby='clusters')

# Pre-process the data

Filter out genes which don’t have enough spliced/unspliced counts, normalize and log transform the data and restrict to the top highly variable genes. Further, compute principal components and moments for velocity estimation.

In [None]:
#filter and normalise with default parameters
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
#run PCA
sc.tl.pca(adata)
#calculate nearest neighbour/cluster
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)
#calculate moments
scv.pp.moments(adata, n_pcs=None, n_neighbors=None)

# Run scVelo

Use the dynamical model from scVelo to estimate the velocities.

In [None]:
scv.tl.recover_dynamics(adata, n_jobs=8)

In [None]:
scv.tl.velocity(adata, mode="dynamical")

In [None]:
scv.tl.velocity_graph(adata)

# Plot Graphs

In [None]:
#Set colours to match Seurat
ident_colours = ["#ED68ED", "#ABA300", "#00BFC4", "#FF61CC", "#0CB702","#00A9FF", "#00C19A", "#00B8E7", "#E68613", "#F8766D", "#7CAE00", "#C77CFF"]

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [10, 8]
#min_mass : float (default: 1) Minimum threshold for mass to be shown. It can range between 0 (all velocities) 
#and 5 (large velocities only)
#, save = "harmony_embedding_stream.pdf"

scv.pl.velocity_embedding_stream(
    adata, basis="X_umap", title='Velocity Embedding Stream', smooth=0.5, min_mass=0, color = 'clusters', palette = ident_colours, legend_loc = 'right margin', save = 'harmony_velocity_embedding_grid.pdf')

In [None]:
scv.pl.velocity_embedding_grid(adata, basis='umap', color='clusters', title='Velocity Embedding Grid', scale=0.4, palette = ident_colours, legend_loc = 'right margin', arrow_size=1.5)