In [1]:
import scanpy as sc
import anndata
from scipy import io
from scipy.sparse import coo_matrix, csr_matrix
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt

import scvelo as scv
import scanpy as sc
import cellrank as cr
import numpy as np
import pandas as pd
import anndata as ad
import igraph as ig

In [2]:
# load dataset
adata_final = sc.read_h5ad('cornea_epithelium_final_data_merge.h5ad')


In [None]:
sc.pl.umap(adata_final, color='leiden_anno', frameon=False, legend_loc='on data', title='')#, save='_celltypes_final.pdf')


In [None]:
scv.pl.proportions(adata_final, groupby='group')

In [5]:
#sc.pp.normalize_total(adata_final, target_sum=1e4)
sc.pp.normalize_per_cell(adata_final)
sc.pp.log1p(adata_final)

In [6]:
# load highly_variable:
with open("cornea_epithelium_final.val_gene.csv", 'r') as f:
 highly_variable = f.read().splitlines()

adata_final = adata_final[:,highly_variable]


In [None]:
sc.tl.pca(adata_final)
sc.pp.neighbors(adata_final, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata_final, n_pcs=None, n_neighbors=None)

In [None]:
#scv.pp.filter_and_normalize(adata_final, min_shared_counts=20, n_top_genes=3000)
scv.pp.filter_and_normalize(adata_final, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata_final)
sc.pp.neighbors(adata_final, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata_final, n_pcs=None, n_neighbors=None)

In [None]:
scv.tl.recover_dynamics(adata_final, n_jobs=48)

In [None]:
scv.tl.velocity(adata_final, mode="dynamical")
scv.tl.velocity_graph(adata_final,n_jobs=48)

In [None]:
scv.pl.velocity_embedding_stream(
    adata_final, basis="umap", legend_fontsize=5, title="", smooth=1, min_mass=0.5, color=['leiden_anno','initial_size_spliced'],#color='cell_type'
    save='cornea.epithelium_final.velocity.svg'
)

In [160]:
adata_final.write('cornea_epithelium_final_data_phate_scVelocity_adata.h5ad')

In [3]:
adata_final = sc.read_h5ad('cornea_epithelium_final_data_phate_scVelocity_adata.h5ad')


In [None]:
#cr.tl.terminal_states(adata_final, cluster_key="leiden_anno",weight_connectivities=0.2,n_jobs=48,estimator=cr.tl.estimators.CFLARE,n_states=2)

cr.tl.terminal_states(adata_final, cluster_key="leiden_anno",weight_connectivities=0.2,n_jobs=48,estimator=cr.tl.estimators.GPCCA,n_states=2)#,n_states=1)

cr.pl.terminal_states(adata_final, same_plot=False)

In [None]:
###
#cr.tl.initial_states(adata_final, cluster_key="leiden_anno",n_jobs=48,weight_connectivities=0.2,estimator=cr.tl.estimators.CFLARE,n_states=1)
cr.tl.initial_states(adata_final, cluster_key="leiden_anno",n_jobs=48,weight_connectivities=0.2,estimator=cr.tl.estimators.GPCCA,n_states=1)
cr.pl.initial_states(adata_final, discrete=True)

In [None]:
###Compute fate maps

cr.tl.lineages(adata_final)


In [None]:
cr.pl.lineages(adata_final, same_plot=True)

In [None]:
scv.tl.recover_latent_time(
    adata_final, root_key="initial_states_probs", end_key="terminal_states_probs"
)

In [None]:
##
scv.tl.paga(
    adata_final,
    groups="leiden_anno",
    root_key="initial_states_probs",
    end_key="terminal_states_probs",
    use_time_prior="velocity_pseudotime",
)

In [None]:
cr.pl.cluster_fates(
    adata_final,
    mode="paga_pie",
    cluster_key="leiden_anno",
    basis="umap",
    legend_kwargs={"loc": "top right out"},
    legend_loc="top left out",
    node_size_scale=5,
    edge_width_scale=1,
    max_edge_width=4,
    title="directed PAGA",
    save="cornea_PAGA_final_3.pdf"
)

In [None]:
#Compute lineage drivers

##
cr.tl.lineage_drivers(adata_final,n_jobs=48)


In [None]:
cr.pl.lineage_drivers(adata_final,lineage="adult_cornea.superficial", n_genes=10,save="terminal_lineage_drivers_umap.pdf")

In [None]:
##Gene expression trends

# compue DPT, starting from CellRank defined root cell
root_idx = np.where(adata_final.obs["initial_states"] == "E_cornea.basal")[0][0]
adata_final.uns["iroot"] = root_idx
sc.tl.dpt(adata_final)

scv.pl.scatter(
    adata_final,
    color=["cell_type", root_idx, "latent_time", "dpt_pseudotime"],
    fontsize=16,
    cmap="viridis",
    perc=[2, 98],
    colorbar=True,
    rescale_color=[0, 1],
    title=["clusters", "root cell", "latent time", "dpt pseudotime"],
   # save="./final/latent_time_dpt_pseudotime.pdf"
)

In [None]:
model = cr.ul.models.GAM(adata_final)
cr.pl.gene_trends(
    adata_final,
    model=model,
    data_key="X",
    #genes=["KRT15", "BCAM", "KRT14","LGALS1","S100A2","RORA","PITX1",
     #      "KRT3","KRT12","KRT24","MAL","MAL2","LGALS3","MUC21","MUC16","LYPD2","CTSV","S100A4","CAV1","CAV2",],
    genes=["KRT15", "BCAM","WNT6","CAV1","CAV2","KRT14","IFITM3","LGALS1",
          "KRT3","KRT12","KRT24","CLU","LGALS3","MUC16","LGALSL","MAL","MAL2","LYPD2","NQO1","S100A4"],

    # genes=["RORA","PITX1","EHF","ELF3","KLF4","KLF5","KLF6","BHLHE40","BHLHE41", "HES5","DBP","MAFF","ELK3","GRHL3","IRF6",
     #     "OVOL1","ZBTB43","ZNF431","ZNF750","MBD2","NR1D1"],
    ncols=4,
    time_key="latent_time",
    same_plot=True,
    hide_cells=True,
    figsize=(12, 12),
    n_test_points=200,
    lineages="adult_cornea.superficial",
    n_jobs=40,
    save="./final/key TF trends.svg",
)


In [None]:
model = cr.ul.models.GAM(adata_final)
cr.pl.heatmap(
    adata_final,
    model,
    #genes=adata_final.varm['terminal_lineage_drivers']["adult_cornea.superficial_corr"].sort_values(ascending=False).index[:100],
    #genes=adata_final.varm['terminal_lineage_drivers']["limbal.superfcial_corr"].sort_values(ascending=True).index[:100],
    #genes=["RORA","PITX1"],
    genes=["RORA","PITX1","EHF","ELF3","KLF4","KLF5","KLF6","BHLHE40","BHLHE41", "HES5","DBP","MAFF","ELK3","GRHL3","IRF6",
          "OVOL1","ZBTB43","ZNF431","ZNF750","MBD2","NR1D1"],
    show_absorption_probabilities=False,
    lineages="adult_cornea.superficial",
    n_jobs=48,
    backend="loky",
    keep_gene_order=True,
    #mode = 'genes',
    show_all_genes=True,
    figsize=(8,8),
   # backward=True,
    #cluster_key="leiden_anno",
    save="./final/terminal_lineage_TF_heatmap.pdf",
)