# Analysis Part 6 - RNA velocity, Slingshot, SCPA, Pathway Analysis

In [None]:
%load_ext autoreload
%matplotlib inline

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore')
import os
import scanpy as sc
import anndata as ann
import scirpy as ir
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
from matplotlib import rcParams
from mudata import MuData
import mudata

import tarfile
import warnings
from glob import glob

import anndata
import muon as mu

%autoreload 2
import sys
sys.path.append('..')
import utility.annotation as utils_annotation
import utility.representation as utils_representation
import utility.visualisation as utils_vis

from pyslingshot import Slingshot
import scvelo as scv

In [None]:
sc.settings.set_figure_params(dpi=150)
sc.settings.verbosity = 3
sc.set_figure_params(vector_friendly=True, color_map='viridis', transparent=True)
sb.set_style('whitegrid')

from matplotlib.colors import LinearSegmentedColormap
colors = ['darkblue', 'blue', 'lightblue', '#FFEA00', 'orange', 'red', 'darkred']
cust = LinearSegmentedColormap.from_list('custom_cmap', colors)
colormap = cust

## Get input data¶

In [None]:
mdata = mu.read('/Users/mimi/Sina/data_specificity_annotated_final_pseudotime_cite.h5mu')

## Slingshot

In [None]:
#start in the very naive cluster
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))
custom_xlim = (-12, 12)
custom_ylim = (-12, 12)
# plt.setp(axes, xlim=custom_xlim, ylim=custom_ylim)
ad = mdata['gex']
slingshot = Slingshot(ad, celltype_key="leiden", obsm_key="X_umap", start_node=2, debug_level='verbose')
slingshot.fit(num_epochs=5, debug_axes=axes)

In [None]:
#sample 1
velocyto run -b /media/agschober/HDD12/3_scRNA-Seq_Sina/CS1-multi_new2/outs/per_sample_outs/CS1-multi_new2/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz -o /media/agschober/HDD12/3_scRNA-Seq_Sina/velocyto_output_sample1 /media/agschober/HDD12/3_scRNA-Seq_Sina/CS1-multi_new2/outs/per_sample_outs/CS1-multi_new2/count/sample_alignments.bam /media/agschober/HDD01/refdata-gex-GRCh38-2020-A/genes/genes.gtf

In [None]:
#sample 2
velocyto run -b /media/agschober/HDD12/3_scRNA-Seq_Sina/CS2-multi_new2/outs/per_sample_outs/CS2-multi_new2/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz -o /media/agschober/HDD12/3_scRNA-Seq_Sina/velocyto_output_sample2 /media/agschober/HDD12/3_scRNA-Seq_Sina/CS2-multi_new2/outs/per_sample_outs/CS2-multi_new2/count/sample_alignments.bam /media/agschober/HDD01/refdata-gex-GRCh38-2020-A/genes/genes.gtf

## Run velocyto in bash environment

In [None]:
#sample 3
velocyto run -b /media/agschober/HDD12/3_scRNA-Seq_Sina/CS3-multi_new2/outs/per_sample_outs/CS3-multi_new2/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz -o /media/agschober/HDD12/3_scRNA-Seq_Sina/velocyto_output_sample3 /media/agschober/HDD12/3_scRNA-Seq_Sina/CS3-multi_new2/outs/per_sample_outs/CS3-multi_new2/count/sample_alignments.bam /media/agschober/HDD01/refdata-gex-GRCh38-2020-A/genes/genes.gtf

In [None]:
#sample 4
velocyto run -b /media/agschober/HDD12/3_scRNA-Seq_Sina/CS4-multi_new2/outs/per_sample_outs/CS4-multi_new2/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz -o /media/agschober/HDD12/3_scRNA-Seq_Sina/velocyto_output_sample4 /media/agschober/HDD12/3_scRNA-Seq_Sina/CS4-multi_new2/outs/per_sample_outs/CS4-multi_new2/count/sample_alignments.bam /media/agschober/HDD01/refdata-gex-GRCh38-2020-A/genes/genes.gtf

In [None]:
#sample 5
/home/agschober/.local/bin/velocyto run -b /media/agschober/HDD12/3_scRNA-Seq_Sina/Cellranger_output/2nd_Experiment/run1/outs/per_sample_outs/run1/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz -o /media/agschober/HDD12/3_scRNA-Seq_Sina/velocyto_output_sample5 /media/agschober/HDD12/3_scRNA-Seq_Sina/Cellranger_output/2nd_Experiment/run1/outs/per_sample_outs/run1/count/sample_alignments.bam /media/agschober/HDD01/refdata-gex-GRCh38-2020-A/genes/genes.gtf

In [None]:
#samtools is not found somehow: export PATH=/path/to/your/folder:$PATH
export PATH=/media/agschober/HDD12/samtools/bin:$PATH

In [None]:
#sample 6
/home/agschober/.local/bin/velocyto run -b /media/agschober/HDD12/3_scRNA-Seq_Sina/Cellranger_output/2nd_Experiment/run2/outs/per_sample_outs/run2/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz -o /media/agschober/HDD12/3_scRNA-Seq_Sina/velocyto_output_sample6 /media/agschober/HDD12/3_scRNA-Seq_Sina/Cellranger_output/2nd_Experiment/run2/outs/per_sample_outs/run2/count/sample_alignments.bam /media/agschober/HDD01/refdata-gex-GRCh38-2020-A/genes/genes.gtf

In [None]:
#sample 7
/home/agschober/.local/bin/velocyto run -b /media/agschober/HDD12/3_scRNA-Seq_Sina/Cellranger_output/2nd_Experiment/run3/outs/per_sample_outs/run3/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz -o /media/agschober/HDD12/3_scRNA-Seq_Sina/velocyto_output_sample7 /media/agschober/HDD12/3_scRNA-Seq_Sina/Cellranger_output/2nd_Experiment/run3/outs/per_sample_outs/run3/count/sample_alignments.bam /media/agschober/HDD01/refdata-gex-GRCh38-2020-A/genes/genes.gtf

In [None]:
#sample 8
/home/agschober/.local/bin/velocyto run -b /media/agschober/HDD12/3_scRNA-Seq_Sina/Cellranger_output/2nd_Experiment/run4/outs/per_sample_outs/run4/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz -o /media/agschober/HDD12/3_scRNA-Seq_Sina/velocyto_output_sample8 /media/agschober/HDD12/3_scRNA-Seq_Sina/Cellranger_output/2nd_Experiment/run4/outs/per_sample_outs/run4/count/sample_alignments.bam /media/agschober/HDD01/refdata-gex-GRCh38-2020-A/genes/genes.gtf

In [None]:
#sample 9
/home/agschober/.local/bin/velocyto run -b /media/agschober/HDD12/3_scRNA-Seq_Sina/Cellranger_output/3rd_Experiment/Sina_3/outs/per_sample_outs/Sina_3/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz -o /media/agschober/HDD12/3_scRNA-Seq_Sina/velocyto_output_sample9 /media/agschober/HDD12/3_scRNA-Seq_Sina/Cellranger_output/3rd_Experiment/Sina_3/outs/per_sample_outs/Sina_3/count/sample_alignments.bam /media/agschober/HDD01/refdata-gex-GRCh38-2020-A/genes/genes.gtf

## Use scvelo for further analysis

In [None]:
scv.set_figure_params()

#### load data

In [None]:
ldata1 = scv.read('/Users/mimi/Sina/6_RNA-velocity/velocyto_output/sample_alignments_GA9IA.loom', cache=True)
ldata2 = scv.read('/Users/mimi/Sina/6_RNA-velocity/velocyto_output/sample_alignments_Z6GT7.loom', cache=True)
ldata3 = scv.read('/Users/mimi/Sina/6_RNA-velocity/velocyto_output/sample_alignments_LTQUZ.loom', cache=True)
ldata4 = scv.read('/Users/mimi/Sina/6_RNA-velocity/velocyto_output/sample_alignments_KQG6T.loom', cache=True)
ldata5 = scv.read('/Users/mimi/Sina/6_RNA-velocity/velocyto_output/sample_alignments_J6WQS.loom', cache=True)
ldata6 = scv.read('/Users/mimi/Sina/6_RNA-velocity/velocyto_output/sample_alignments_B0F3R.loom', cache=True)
ldata7 = scv.read('/Users/mimi/Sina/6_RNA-velocity/velocyto_output/sample_alignments_E7S0J.loom', cache=True)
ldata8 = scv.read('/Users/mimi/Sina/6_RNA-velocity/velocyto_output/sample_alignments_DMOYN.loom', cache=True)
ldata9 = scv.read('/Users/mimi/Sina/6_RNA-velocity/velocyto_output/sample_alignments_8AKKB.loom', cache=True)

In [None]:
ldata1.var_names_make_unique()
ldata2.var_names_make_unique()
ldata3.var_names_make_unique()
ldata4.var_names_make_unique()
ldata5.var_names_make_unique()
ldata6.var_names_make_unique()
ldata7.var_names_make_unique()
ldata8.var_names_make_unique()
ldata9.var_names_make_unique()

In [None]:
ldata = ldata1.concatenate([ldata2, ldata3, ldata4, ldata5, ldata6, ldata7, ldata8, ldata9])

In [None]:
ldata

In [None]:
adata_gex = mdata['gex']

In [None]:
adata = scv.utils.merge(adata_gex, ldata)

In [None]:
adata = adata[adata.obs['leiden'].isin(['4', '10']) & adata.obs['specific_new'].isin(['NS4B214'])]

### Stochastic method of velocity calculation

In [None]:
sc.pp.pca(adata)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)

In [None]:
scv.pp.moments(adata, n_pcs=None, n_neighbors=None)

In [None]:
scv.tl.velocity(adata, mode='stochastic')

In [None]:
scv.tl.velocity_graph(adata)

In [None]:
scv.pl.velocity_embedding(adata, basis='umap')
scv.pl.velocity_embedding_grid(adata, basis='umap')
scv.pl.velocity_embedding_stream(adata, basis='umap')

In [None]:
scv.pl.velocity_embedding_stream(adata, basis='umap', color=['leiden'])

In [None]:
scv.tl.velocity_pseudotime(adata)
sc.pl.umap(adata, color=["velocity_pseudotime"])

In [None]:
#own measure for 'amount' of RNA velocity
row_sums = np.sum(np.abs(adata.layers['velocity']), axis=1)
adata.obs['acceleration'] = row_sums
sc.pl.umap(adata, color=['acceleration'])

In [None]:
#calculate paga
scv.tl.paga(adata, groups='leiden')

In [None]:
scv.pl.paga(adata)
scv.pl.paga(adata, color='leiden', threshold=0.1, node_size_scale=0.5, node_size_power=1)

In [None]:
scv.pl.paga(adata, basis='umap', size=50, alpha=.2, min_edge_width=1, node_size_scale=1.5)

In [None]:
#cell cycle score
scv.tl.score_genes_cell_cycle(adata)
scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], smooth=True, perc=[5, 95])

In [None]:
#confidence and length as estimation of differentiation velocity
scv.tl.velocity_confidence(adata)
keys = 'velocity_length', 'velocity_confidence'
sc.pl.umap(adata, color=keys, cmap=cust)

In [None]:
l1_norm = np.abs(adata.obs['velocity_length']).sum()

# Perform L1 normalization
adata.obs['velocity_length_norm'] = adata.obs['velocity_length'] / l1_norm

In [None]:
sc.pl.umap(adata, color=['velocity_length_norm'])

### Deterministic method of calculating the velocity

In [None]:
scv.tl.velocity(adata, mode='deterministic')

In [None]:
scv.tl.velocity_graph(adata)
scv.pl.velocity_embedding(adata, basis='umap')
scv.pl.velocity_embedding_grid(adata, basis='umap')
scv.pl.velocity_embedding_stream(adata, basis='umap')

In [None]:
scv.pl.velocity_embedding_stream(adata, basis='umap', color=['leiden'])

In [None]:
scv.tl.velocity_pseudotime(adata)
sc.pl.umap(adata, color=["velocity_pseudotime"])

In [None]:
#own measure for 'amount' of RNA velocity
row_sums = np.sum(np.abs(adata.layers['velocity']), axis=1)
adata.obs['acceleration'] = row_sums
sc.pl.umap(adata, color=['acceleration'])

In [None]:
#calculate paga
scv.tl.paga(adata, groups='leiden')

In [None]:
scv.pl.paga(adata)
scv.pl.paga(adata, color='leiden', threshold=0.1, node_size_scale=0.5, node_size_power=1)

In [None]:
scv.pl.paga(adata, basis='umap', size=50, alpha=.2, min_edge_width=1, node_size_scale=1.5)

In [None]:
#cell cycle score
scv.tl.score_genes_cell_cycle(adata)
scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], smooth=True, perc=[5, 95])

In [None]:
#confidence and length as estimation of differentiation velocity
scv.tl.velocity_confidence(adata)
keys = 'velocity_length', 'velocity_confidence'
sc.pl.umap(adata, color=keys, cmap='coolwarm')

### Dynamic method of calculating the velocity

In [None]:
scv.tl.recover_dynamics(adata)

In [None]:
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)

In [None]:
scv.pl.velocity_embedding(adata, basis='umap')
scv.pl.velocity_embedding_grid(adata, basis='umap')
scv.pl.velocity_embedding_stream(adata, basis='umap')

In [None]:
scv.pl.velocity_embedding_stream(adata, basis='umap', color=['leiden'])

In [None]:
scv.tl.velocity_pseudotime(adata)
sc.pl.umap(adata, color='velocity_pseudotime', cmap=cust)

In [None]:
#own measure for 'amount' of RNA velocity
row_sums = np.sum(np.abs(adata.layers['velocity']), axis=1)
adata.obs['acceleration'] = row_sums
sc.pl.umap(adata, color=['acceleration'])

In [None]:
#calculate paga
scv.tl.paga(adata, groups='leiden')

In [None]:
scv.pl.paga(adata)
scv.pl.paga(adata, color='leiden', threshold=0.1, node_size_scale=0.5, node_size_power=1)

In [None]:
scv.pl.paga(adata, basis='umap', size=50, alpha=.2, min_edge_width=1, node_size_scale=1.5)

In [None]:
#cell cycle score
scv.tl.score_genes_cell_cycle(adata)
scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], smooth=True, perc=[5, 95])

In [None]:
#confidence and length as estimation of differentiation velocity
scv.tl.velocity_confidence(adata)
keys = 'velocity_length', 'velocity_confidence'
sc.pl.umap(adata, color=keys, cmap=cust)

In [None]:
sc.pl.umap(adata, color=['MKI67'], cmap=cust)
sc.pl.umap(adata, color=['MKI67'], cmap=cust)

In [None]:
sc.pl.umap(mdata["gex"], color='MKI67', cmap=cust, size=50)

In [None]:
ax = sc.pl.umap(mdata["gex"], show=False, size=50)
ax = sc.pl.umap(mdata["gex"][mdata["gex"].obs['specific_new']=="NS4B214"], color='MKI67', cmap=cust, size=50, ax=ax)

#sc.pl.umap(mdata["gex"][mdata["gex"].obs['specific']!="no_binding"], color='MKI67', cmap=colormap, size=30)
#sc.pl.umap(mdata["gex"][mdata["gex"].obs['FACS_Phenotype']=="CM"], color='MKI67', cmap=colormap, size=30)
#sc.pl.umap(mdata["gex"], color='MKI67', cmap=colormap, size=30)