Create new conda env, install cellrank. Downgrade matplotlib to 3.7.0. Update scvelo with developmental version from their github main. Downgrade 'install -c conda-forge petsc4py=3.19.0 slepc4py'

In [None]:
# Import dependencies
import os
import anndata as ad
import numpy as np
import pandas as pd

import cellrank as cr
import scanpy as sc
import scvelo as scv
import seaborn as sns

import matplotlib.pyplot as plt

# Initialize random seed
import random
random.seed(111)

# Print date and time:
import datetime
e = datetime.datetime.now()
print ("Current date and time = %s" % e)

# set a working directory
#wdir = "/ceph/project/tendonhca/akurjan/analysis/"
wdir = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/"
os.chdir( wdir )

# folder structures
HARMONY_FOLDERNAME = "foetal/results/Harmony/"
VELOCITY_FOLDERNAME = "foetal/results/Velocity/"
RESULTS_FOLDERNAME = "foetal/results/CellRank/"
FIGURES_FOLDERNAME = "foetal/figures/CellRank/"

if not os.path.exists(RESULTS_FOLDERNAME):
    os.makedirs(RESULTS_FOLDERNAME)
if not os.path.exists(FIGURES_FOLDERNAME):
    os.makedirs(FIGURES_FOLDERNAME)

# Set folder for saving figures into
sc.settings.figdir = FIGURES_FOLDERNAME
scv.settings.figdir = FIGURES_FOLDERNAME
    
def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.tight_layout()
    fig.savefig(os.path.join(folder, fname), format='svg')

# Set other settings
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
scv.set_figure_params('scvelo')

In [None]:
import warnings

warnings.simplefilter("ignore", category=UserWarning)

# CellRank

## Identifying Probable Terminal and Initial States

In [None]:
adata = scv.read(os.path.join(VELOCITY_FOLDERNAME, 'allages_tendonfibro_prepped.h5ad'), cache=False)
adata

adata=scv.read(os.path.join(VELOCITY_FOLDERNAME, 'allages_tendonfibro_velocity.h5ad'), cache=True)
adata.var_names_make_unique()
adata.uns = harmony.uns.copy()
adata.obsp = harmony.obsp.copy()
adata.layers = harmony.layers.copy()
adata

In [None]:
pk = cr.kernels.PseudotimeKernel(adata, time_key="palantir_pseudotime").compute_transition_matrix()
print(pk)

In [None]:
pk.plot_projection(basis="umap", color='C_scANVI', legend_loc='right margin',
                   frameon=False, recompute=True, s=10, legend_fontsize=6,
                   save='allages_tendonfibro_cellrank_projection.svg')

In [None]:
pk.plot_projection(basis="umap_orig", color='C_scANVI', legend_loc='right margin',
                   frameon=False, recompute=True, s=10, legend_fontsize=6,
                   save='allages_tendonfibro_cellrank_projection.svg')

### Estimating (Palantir Pseudotime Kernel)

In [None]:
g = cr.estimators.GPCCA(pk)
print(g)

In [None]:
g.fit(cluster_key="C_scANVI", n_states=[10, 20])
g.plot_macrostates(which="all", discrete=True, legend_loc="right", s=100,
                  save='allages_tendonfibro_macrostates_discrete_rightleg.svg')

In [None]:
g.predict_terminal_states()
g.plot_macrostates(which="terminal", legend_loc="right", s=100,
                  save='allages_tendonfibro_terminal_discrete_rightleg.svg')

In [None]:
g.predict_initial_states()
g.plot_macrostates(which="initial", legend_loc="right", s=100,
                  save='allages_tendonfibro_initial_discrete_rightleg.svg')

In [None]:
# subset to just Beta cells
bdata = adata[adata.obs["C_scANVI"] == "Beta"].copy()

# create an annotation for terminal vs. not-terminal
bdata.obs["maturation_state"] = np.where(
    bdata.obs["term_states_fwd"] == "Beta", "terminal", "not terminal"
)

# show distribution in violin plot
sc.pl.violin(bdata, keys=["Ins1"], groupby="maturation_state")

# use a simple t-test to quantify how different the two distributions are
a = bdata[bdata.obs["maturation_state"] == "terminal", "Ins1"].X.data
b = bdata[bdata.obs["maturation_state"] == "not terminal", "Ins1"].X.data
st.ttest_ind(a, b, equal_var=False)

In [None]:
g2 = cr.estimators.GPCCA(pk)
g2.compute_schur(n_components=20)
g2.plot_spectrum(real_only=True, show_eigengap=True)

In [None]:
g2.compute_macrostates(n_states=7, cluster_key="C_scANVI")
g2.plot_macrostates(which="all", legend_fontsize=9, 
                    basis='umap', s=100,
                    #save='allages_tendonfibro_macrostates_discrete.svg',
                    figsize=(5,4))

In [None]:
g2.plot_macrostate_composition(key="C_scANVI", figsize=(7, 4))

In [None]:
g2.plot_coarse_T()

In [None]:
g2.predict_terminal_states()
g2.plot_macrostates(which="terminal", legend_loc="right", s=100)

In [None]:
g2.predict_initial_states()
g2.plot_macrostates(which="initial", s=100, legend_loc='right margin')

In [None]:
g2.compute_fate_probabilities()
g2.plot_fate_probabilities(same_plot=False)

In [None]:
g2.plot_fate_probabilities(same_plot=True, save='allages_fates_umap.svg')

In [None]:
cr.pl.circular_projection(adata, keys=["age", "C_scANVI"], legend_loc="right",
                         figsize=(25,20), save='allages_tendonfibro_circular_projection_pseudokernel.svg'
                         )

In [None]:
states = ["ABI3BP GAS2 Fibroblasts 1", "ABI3BP GAS2 Fibroblasts 2", 
              "COL3A1 PI16 Fibroblasts", "COL6A6 FNDC1 Fibroblasts"]
sc.pl.embedding(
    adata, basis="umap", color="C_scANVI", groups=states, legend_loc="right"
)

In [None]:
cr.pl.aggregate_fate_probabilities(
    adata,
    mode="violin",
    lineages=["ABI3BP GAS2 Fibroblasts 1_2"],
    cluster_key="C_scANVI",
    clusters=states,
)

In [None]:
lin_drivers = g2.compute_lineage_drivers()
lin_drivers.to_csv(os.path.join(RESULTS_FOLDERNAME, 'allages_tendonfibro_lineagedrivers.csv'))

In [None]:
g2.terminal_states.cat.categories

In [None]:
lineages = list(g2.terminal_states.cat.categories)
for i in lineages:
    g2.plot_lineage_drivers(i, n_genes=8, basis='umap', vmax=10,
                   cmap='viridis', # save=f'allages_fibro_palantirlind_{i}.svg'
                   )

In [None]:
lin_drivers_dict={}
for i in g2.terminal_states.cat.categories:
    lin_drivers_dict[i] = g2.compute_lineage_drivers(lineages=i,return_drivers=True)

In [None]:
g2.terminal_states.cat.categories

In [None]:
# define set of genes to annotate
genes_oi = {
    "ABI3BP GAS2 Fibroblasts 1_1": list(lin_drivers_dict['ABI3BP GAS2 Fibroblasts 1_1'].index[:15]),
    "COL3A1 PI16 Fibroblasts_1": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_1'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="ABI3BP GAS2 Fibroblasts 1_1",
    lineage_y="COL3A1 PI16 Fibroblasts_1",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL3A1 PI16 Fibroblasts_2": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_2'].index[:15]),
    "COL3A1 PI16 Fibroblasts_1": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_1'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="COL3A1 PI16 Fibroblasts_2",
    lineage_y="COL3A1 PI16 Fibroblasts_1",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
    save='COL3_1_vs_COL3_2.svg'
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL3A1 PI16 Fibroblasts_3": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_3'].index[:15]),
    "COL3A1 PI16 Fibroblasts_2": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_2'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="COL3A1 PI16 Fibroblasts_3",
    lineage_y="COL3A1 PI16 Fibroblasts_2",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL3A1 PI16 Fibroblasts_2": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_2'].index[:15]),
    "FGF14 THBS4 Fibroblasts": list(lin_drivers_dict['FGF14 THBS4 Fibroblasts'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="COL3A1 PI16 Fibroblasts_2",
    lineage_y="FGF14 THBS4 Fibroblasts",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL6A6 FNDC1 Fibroblasts": list(lin_drivers_dict['COL6A6 FNDC1 Fibroblasts'].index[:15]),
    "FGF14 THBS4 Fibroblasts": list(lin_drivers_dict['FGF14 THBS4 Fibroblasts'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="COL6A6 FNDC1 Fibroblasts",
    lineage_y="FGF14 THBS4 Fibroblasts",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "ABI3BP GAS2 Fibroblasts 1_1": list(lin_drivers_dict['ABI3BP GAS2 Fibroblasts 1_1'].index[:15]),
    "FGF14 THBS4 Fibroblasts": list(lin_drivers_dict['FGF14 THBS4 Fibroblasts'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="ABI3BP GAS2 Fibroblasts 1_1",
    lineage_y="FGF14 THBS4 Fibroblasts",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "ABI3BP GAS2 Fibroblasts 1_2": list(lin_drivers_dict['ABI3BP GAS2 Fibroblasts 1_2'].index[:15]),
    "FGF14 THBS4 Fibroblasts": list(lin_drivers_dict['FGF14 THBS4 Fibroblasts'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="ABI3BP GAS2 Fibroblasts 1_2",
    lineage_y="FGF14 THBS4 Fibroblasts",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL6A6 FNDC1 Fibroblasts": list(lin_drivers_dict['COL6A6 FNDC1 Fibroblasts'].index[:15]),
    "COL3A1 PI16 Fibroblasts_2": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_2'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="COL6A6 FNDC1 Fibroblasts",
    lineage_y="COL3A1 PI16 Fibroblasts_2",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL6A6 FNDC1 Fibroblasts": list(lin_drivers_dict['COL6A6 FNDC1 Fibroblasts'].index[:15]),
    "COL3A1 PI16 Fibroblasts_1": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_1'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="COL6A6 FNDC1 Fibroblasts",
    lineage_y="COL3A1 PI16 Fibroblasts_1",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)