# Endo Cellrank
# Xiaonan Wang
## 17July2024

In [None]:
import scvelo as scv
import scanpy as sc
import cellrank as cr
import numpy as np

scv.settings.verbosity = 3
scv.settings.set_figure_params("scvelo")
cr.settings.verbosity = 2

In [None]:
import warnings

warnings.simplefilter("ignore", category=UserWarning)
warnings.simplefilter("ignore", category=FutureWarning)
warnings.simplefilter("ignore", category=DeprecationWarning)

In [None]:
results_file = '../write/EndoCells_v2_BC.h5ad'
adata_scanpy = sc.read(results_file)

In [None]:
adata_scanpy.obs['batch_name'] = adata_scanpy.obs['batch']
adata_scanpy.obs['batch_name'] = adata_scanpy.obs['batch_name'].astype('category')
adata_scanpy.obs['batch_name'] = adata_scanpy.obs['batch_name'].cat.rename_categories(['possorted_genome_bam_3ZJQV', 'possorted_genome_bam_VV1XV','possorted_genome_bam_XTS2D'])
adata_scanpy.obs['temp_name'] = [adata_scanpy.obs['batch_name'][x]+':'+adata_scanpy.obs_names[x][:-4]+'x' for x in range(adata_scanpy.shape[0])]
adata_scanpy.obs['obs_name'] = adata_scanpy.obs_names
adata_scanpy.obs_names = adata_scanpy.obs['temp_name']

In [None]:
adata = scv.read('../combined_45_46_47_orig.loom', cache=True)
adata.var_names_make_unique()
adata = adata[adata_scanpy.obs_names, :]

In [None]:
adata.obs['clusters'] = adata_scanpy.obs['louvain_BC']
adata.obsm['X_umap'] = adata_scanpy.obsm['X_umap']
adata.obsm['X_pca'] = adata_scanpy.obsm['X_pca']

In [None]:
adata.obs['Study'] = adata_scanpy.obs['Study']

In [None]:
adata.obs['louvain_rd10'] = adata_scanpy.obs['louvain_rd10']
adata.obs['louvain_rd10'] = adata.obs['louvain_rd10'].astype('category')
louvain_rd10_colors = [
    '#0B5345', '#15AB0F', '#63D95E', '#1D8348','#4BB80C', '#52BE80', '#1EF861', 
    '#D4E95E', '#8CA11C', '#7B8003', '#444604',
    '#5175FB', 
    '#58DDF5', '#2190A4', 
    '#A42DE9', '#7015EB', '#ABB7FF',
    '#F22BA2', '#EC7DBF',
    '#CC3D6E', '#FCC176', '#FFADAB', '#A93226', '#FD1901',
    '#FF5733', '#ff9900',
    '#C0C0C0'    
]
palette = np.array(louvain_rd10_colors)[np.array(list(adata.obs['louvain_rd10'].cat.categories)).astype(int)]
adata.uns['louvain_rd10_colors'] = palette

In [None]:
scv.utils.show_proportions(adata)
adata

In [None]:
scv.pp.filter_and_normalize(adata, min_shared_counts=10, n_top_genes=2000)

In [None]:
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)

In [None]:
scv.tl.recover_dynamics(adata)
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)

In [None]:
scv.pl.velocity_embedding_stream(
    adata, color='Study', basis="umap", legend_fontsize=12, title="", legend_loc='right margin'
)

In [None]:
scv.pl.velocity_embedding_stream(
    adata, color='clusters', basis="umap", legend_fontsize=12, title="", legend_loc='on data'
)

In [None]:
from cellrank.tl.kernels import VelocityKernel

vk = VelocityKernel(adata)

In [None]:
vk.compute_transition_matrix()

In [None]:
vk.compute_projection(basis="umap", key_added='T_fwd')
scv.pl.velocity_embedding_stream(
    adata, color="clusters", vkey="T_fwd", basis="umap", legend_loc="right"
)

In [None]:
from cellrank.tl.estimators import GPCCA
g = GPCCA(vk)
g.compute_schur(n_components=20)
g.plot_spectrum(real_only=True)

In [None]:
scv.tl.recover_latent_time(adata)

In [None]:
import pandas as pd
def get_terminal_states(adata, clusters, cell_no=50, clusters_key='leiden',
                        pseudotime_key='dpt_pseudotime'):
    cells = pd.Series(np.nan, index=adata.obs_names)
    for i in clusters:
        cluster_adata = adata[adata.obs[clusters_key] == i, :]
        topT = cluster_adata.obs[pseudotime_key].sort_values(ascending=False)
        topT = topT.index[:cell_no].values
        cells[topT] = i
    return(pd.Series(cells, dtype="category"))

In [None]:
finalstates = get_terminal_states(adata, clusters =['10', '3', '9', '1', '8', '11', '5'], clusters_key='clusters',pseudotime_key='latent_time')

In [None]:
g.set_terminal_states(finalstates, cluster_key="clusters")

In [None]:
g.compute_absorption_probabilities( solver="gmres", n_jobs=6, preconditioner='ilu')

In [None]:
g.plot_absorption_probabilities(same_plot=False, basis="umap", perc=[0, 99], ncols=3)

In [None]:
g.plot_absorption_probabilities(same_plot=True, basis="umap")

In [None]:
g.compute_lineage_drivers(cluster_key="Leiden_ct", use_raw=False)

In [None]:
lin_drivers = g.lineage_drivers

In [None]:
lin_drivers['11_corr']

In [None]:
scv.tl.recover_latent_time(
    adata, root_key="initial_states_probs", end_key="terminal_states_probs"
)

In [None]:
sc.pl.umap(adata, color='latent_time')

In [None]:
scv.tl.paga(
    adata,
    groups="clusters",
    root_key="initial_states_probs",
    end_key="terminal_states_probs",
    use_time_prior="velocity_pseudotime",
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="paga_pie",
    cluster_key="clusters",
    basis="umap",
    legend_kwargs={"loc": "top right out"},
    legend_loc="top left out",
    node_size_scale=5,
    edge_width_scale=1,
    max_edge_width=4,
    title="directed PAGA",
)

In [None]:
cr.pl.lineage_drivers(adata, lineage="11", n_genes=5, ncols=3)

In [None]:
cr.pl.lineage_drivers(adata, lineage="5", n_genes=5, ncols=3)

In [None]:
root_idx = np.where(adata.obs["initial_states"] == "2")[0][0]
adata.uns["iroot"] = root_idx
sc.tl.dpt(adata)

scv.pl.scatter(
    adata,
    color=["clusters", root_idx, "latent_time", "dpt_pseudotime"],
    fontsize=16,
    cmap="viridis",
    perc=[2, 98],
    colorbar=True,
    rescale_color=[0, 1],
    title=["clusters", "root cell", "latent time", "dpt pseudotime"],
)

In [None]:
model = cr.ul.models.GAM(adata)
cr.pl.heatmap(
    adata,
    model,
    genes=adata.varm['terminal_lineage_drivers']["11_corr"].sort_values(ascending=False).index[:100],
    show_absorption_probabilities=True,
    lineages="11",
    n_jobs=1,
    backend="loky",
)

In [None]:
cr.pl.heatmap(
    adata,
    model,
    genes=adata.varm['terminal_lineage_drivers']["5_corr"].sort_values(ascending=False).index[:100],
    show_absorption_probabilities=True,
    lineages="5",
    n_jobs=1,
    backend="loky",
)

In [None]:
import smqpp
with open('../../ZFIN.gmt') as f:
    DBcont = f.readlines()

In [None]:
results_file = '../write/EndoCells_v2_BC.h5ad'
adata = sc.read(results_file)
temp = adata.raw 
del adata.raw
import anndata
adata.raw = anndata.AnnData(X=temp.X, var=temp.var, obs=adata.obs)
del temp

In [None]:
scoreBiomarker = smqpp.pathway_score_cal(adata, DBcont)
scoreBiomarker = anndata.AnnData(scoreBiomarker, obs=adata.obs, var=pd.DataFrame(scoreBiomarker.columns, index=scoreBiomarker.columns), obsm=adata.obsm)
sc.tl.rank_genes_groups(scoreBiomarker, 'louvain_BC', n_genes=scoreBiomarker.shape[1], key_added='DE_louvain_BC')

In [None]:
from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list(name='gene_cmap', colors=['lightgrey', 'thistle', 'red', 'darkred']) 

In [None]:
sc.pl.umap(scoreBiomarker, color=['inflammatory response', 'extracellular matrix structural constituent', 'thrombocyte differentiation'], color_map=cmap)