In [None]:
# Import dependencies
import os
import anndata as ad
import numpy as np
import pandas as pd
import scanpy as sc
import scvelo as scv
import seaborn as sns

import matplotlib.pyplot as plt

# Initialize random seed
import random
random.seed(111)

# Print date and time:
import datetime
e = datetime.datetime.now()
print ("Current date and time = %s" % e)

# set a working directory
#wdir = "/ceph/project/tendonhca/akurjan/analysis/"
wdir = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/developmental/"
os.chdir( wdir )

# folder structures
HARMONY_FOLDERNAME = "scVI/Fibroblasts/results/"
RESULTS_FOLDERNAME = "CellRank/Fibroblasts/results/"
FIGURES_FOLDERNAME = "CellRank/Fibroblasts/figures/"

if not os.path.exists(RESULTS_FOLDERNAME):
    os.makedirs(RESULTS_FOLDERNAME)
if not os.path.exists(FIGURES_FOLDERNAME):
    os.makedirs(FIGURES_FOLDERNAME)

# Set folder for saving figures into
sc.settings.figdir = FIGURES_FOLDERNAME
scv.settings.figdir = FIGURES_FOLDERNAME
    
def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.tight_layout()
    fig.savefig(os.path.join(folder, fname), format='svg')

# Set other settings
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
scv.set_figure_params('scvelo')

In [None]:
adata = scv.read(os.path.join(HARMONY_FOLDERNAME, 'dev_fibros_palantir.h5ad'), cache=True)
adata.var_names_make_unique()
adata

In [None]:
sc.pp.filter_genes(adata, min_cells=5)
sc.pp.highly_variable_genes(adata, n_top_genes=2000, flavor='cell_ranger',
                            layer='log1p_norm', subset=True)
sc.pp.neighbors(adata,n_neighbors=30,use_rep="X_diff")

In [None]:
adata

# CellRank

## Identifying Probable Terminal and Initial States

In [None]:
sc.pl.violin(
    adata,
    keys=["palantir_pseudotime"],
    groupby="C_scANVI_original",
    rotation=90,
)

In [None]:
sc.pl.umap(adata, color=['phase', 'C_scANVI_original'])

### Estimating (Palantir Pseudotime Kernel)

In [None]:
import cellrank as cr
from cellrank.tl.kernels import PseudotimeKernel

pk = PseudotimeKernel(adata, time_key="palantir_pseudotime").compute_transition_matrix()

In [None]:
pk.plot_random_walks(n_sims=300, start_ixs={"C_scANVI_original": "FGF14 SCX Fibroblasts"}, 
                     max_iter=1000, seed=0, basis='umap', s=100,
                     figsize=(7,7), save='random_walks_palantirpseudo.svg')

In [None]:
from cellrank.tl.estimators import GPCCA

g2 = GPCCA(pk)
g2.compute_schur(n_components=20)
g2.plot_spectrum(real_only=True, show_eigengap=True)

In [None]:
g2.compute_macrostates(n_states=9, cluster_key="C_scANVI_original")
g2.plot_macrostates(discrete=True,
                   legend_fontsize=9, 
                   basis='draw_graph_fa', s=100, legend_loc='right margin',
                   save='devfibros_macrostates_discrete.svg',
                   figsize=(5,4))

In [None]:
g2.plot_coarse_T()

In [None]:
sc.pl.embedding(adata, color=['PRG4', 'CREB5', 'MKX', 'EGR1', 'PTCH2', 'EBF2', 'COL4A1', 'POSTN', 'NEGR1',
                             'COL6A6', 'PDGFRA', 'BMP5', 'TSHZ2', 'FSTL5'], basis='umap', 
                             vmin=0, vmax="p99", sort_order=False,
                             cmap="Reds", use_raw=False, layer='log1p_norm', frameon=False)

In [None]:
g2.plot_macrostates(same_plot=False, basis='draw_graph_fa',
                    ncols=3,
                    save='devfibros_macrostates_separate.svg',
                   )

In [None]:
`['FGF14 THBS4 Fibroblasts_1', 'COL3A1 PI16 Fibroblasts_1', 'COL3A1 PI16 Fibroblasts_2', 
  'COL3A1 PI16 Fibroblasts_3', 'COL3A1 PI16 Fibroblasts_4', 'FGF14 THBS4 Fibroblasts_2', 
  'ABI3BP GAS2 Fibroblasts_1', 'COL3A1 PI16 Fibroblasts_5', 'ABI3BP GAS2 Fibroblasts_2']`

In [None]:
g2.compute_terminal_states()
#g2.set_terminal_states_from_macrostates([
#                                         'COL3A1 PI16 Fibroblasts_2', 'FGF14 THBS4 Fibroblasts_2', 
#                                         'ABI3BP GAS2 Fibroblasts_1', 'COL3A1 PI16 Fibroblasts_5', 
#                                         'ABI3BP GAS2 Fibroblasts_2'
#                                       ])
adata.obs['terminal_states'].value_counts()

In [None]:
g2.compute_absorption_probabilities()
cr.pl.circular_projection(adata, keys=['age', 'C_scANVI_original'],
                          legend_loc="right", s=10, alpha=0.9,
                          save='devfibros_circular_projection.svg',
                          figsize=(25,25))

In [None]:
cr.pl.circular_projection(adata, keys=['phase', 'C_scANVI'],
                          legend_loc="right", s=5,
                          save='devfibros_circular_projection2.svg',
                          figsize=(25,25))

In [None]:
lin_drivers2 = g2.compute_lineage_drivers()
lin_drivers2.to_csv(os.path.join(RESULTS_FOLDERNAME, 'devfibros_palantirkernel_lineagedrivers.csv'))

In [None]:
lineages = list(adata.obs['terminal_states'].cat.categories)
for i in lineages:
    g2.plot_lineage_drivers(i, n_genes=8, basis='umap', vmax=10,
                   cmap='plasma', save=f'devfibros_palantirlind_{i}.svg'
                   )
    print(i)

In [None]:
lin_drivers_dict={}
for i in g2.terminal_states.cat.categories:
    lin_drivers_dict[i] = g2.compute_lineage_drivers(lineages=i,return_drivers=True)
    lin_drivers_dict[i].to_csv(os.path.join(RESULTS_FOLDERNAME, f'allages_tendonfibro_pseudokernel_lineagedrivers_{i}.csv'))

In [None]:
g2.compute_lineage_drivers()

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL3A1 PI16 Fibroblasts_2": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_2'].index[:15]),
    "COL3A1 PI16 Fibroblasts_1": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_1'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="COL3A1 PI16 Fibroblasts_2",
    lineage_y="COL3A1 PI16 Fibroblasts_1",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
    save='COL3_1_vs_COL3_2.svg'
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL3A1 PI16 Fibroblasts_3": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_3'].index[:15]),
    "COL3A1 PI16 Fibroblasts_2": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_2'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="COL3A1 PI16 Fibroblasts_3",
    lineage_y="COL3A1 PI16 Fibroblasts_2",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
    save='COL3_3_vs_COL3_2.svg'
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL3A1 PI16 Fibroblasts_1": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_1'].index[:15]),
    "COL3A1 PI16 Fibroblasts_3": list(lin_drivers_dict['COL3A1 PI16 Fibroblasts_3'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="COL3A1 PI16 Fibroblasts_1",
    lineage_y="COL3A1 PI16 Fibroblasts_3",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
    save='COL3_1_vs_COL3_3.svg'
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "ABI3BP GAS2 Fibroblasts 1_1": list(lin_drivers_dict['ABI3BP GAS2 Fibroblasts 1_1'].index[:15]),
    "ABI3BP GAS2 Fibroblasts 1_2": list(lin_drivers_dict['ABI3BP GAS2 Fibroblasts 1_2'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="ABI3BP GAS2 Fibroblasts 1_1",
    lineage_y="ABI3BP GAS2 Fibroblasts 1_2",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
    save='ABI3_1_1_vs_ABI3_1_2.svg'
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL6A6 FNDC1 Fibroblasts": list(lin_drivers_dict['COL6A6 FNDC1 Fibroblasts'].index[:15]),
    "FGF14 THBS4 Fibroblasts": list(lin_drivers_dict['FGF14 THBS4 Fibroblasts'].index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g2.plot_lineage_drivers_correlation(
    lineage_x="COL6A6 FNDC1 Fibroblasts",
    lineage_y="FGF14 THBS4 Fibroblasts",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
    save='COL6_vs_FGF14.svg'
)

In [None]:
pk_bk = PseudotimeKernel(adata, time_key="palantir_pseudotime", backward=True).compute_transition_matrix()
g_bk2 = GPCCA(pk_bk)
g_bk2.compute_schur(n_components=20)
g_bk2.plot_spectrum(real_only=True, show_eigengap=True)

In [None]:
g_bk2.compute_macrostates(n_states=1, cluster_key="C_scANVI")
g_bk2.plot_macrostates(legend_fontsize=9, basis='umap', discrete=True,
                   save='allages_tendonfibro_macrostates_initial2.svg',
                   figsize=(5,4))

In [None]:
g_bk2.plot_macrostates(same_plot=False, legend_fontsize=9, basis='umap',
                   save='allages_tendonfibro_macrostates_initial_discrete2.svg',
                   figsize=(5,4))

In [None]:
g_bk2.compute_terminal_states()
g_bk2.compute_absorption_probabilities()
adata.obs['initial_states'].value_counts()

In [None]:
scv.tl.recover_latent_time(
    adata, root_key="initial_states_probabilities", end_key="terminal_states_probabilities"
)
scv.tl.paga(
    adata,
    groups="C_scANVI",
    threshold_root_end_prior=0.9,
    root_key="initial_states_probabilities",
    end_key="terminal_states_probabilities",
    use_time_prior="palantir_pseudotime",
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="bar",
    cluster_key="C_scANVI",
    backward=False,
    ncols=5,
    figsize=(15,3),
    save='allages_tendonfibro_palantirpseudotime_directedPAGA_bar2.svg',
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="paga_pie",
    cluster_key="C_scANVI",
    backward=False,
    basis="umap",
    legend_kwargs={"loc": "top right out"},
    legend_loc="top left out",
    node_size_scale=7,
    edge_width_scale=10,
    max_edge_width=10,
    figsize=(7,7),
    title="directed PAGA",
    save='allages_tendonfibro_palantir_pseudotime_directedPAGA_umap2.svg',
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="paga_pie",
    cluster_key="C_scANVI",
    backward=False,
    basis="draw_graph_fa",
    legend_kwargs={"loc": "top right out"},
    legend_loc="top left out",
    node_size_scale=7,
    edge_width_scale=10,
    max_edge_width=10,
    figsize=(7,7),
    title="directed PAGA",
    save='allages_tendonfibro_palantir_pseudotime_directedPAGA_fa2.svg',
)

In [None]:
#scv.tl.recover_latent_time(
#    adata, root_key="initial_states_probabilities", end_key="terminal_states_probabilities"
#)
scv.tl.paga(
    adata,
    groups="fibro_louvain02",
    threshold_root_end_prior=0.9,
    root_key="initial_states_probabilities",
    end_key="terminal_states_probabilities",
    use_time_prior="palantir_pseudotime",
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="bar",
    cluster_key="fibro_louvain02",
    backward=False,
    ncols=5,
    figsize=(15,3),
    #save='allages_tendonfibro_velocitypseudotime_directedPAGA_bar.svg',
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="paga_pie",
    cluster_key="fibro_louvain02",
    backward=False,
    basis="umap",
    legend_kwargs={"loc": "top right out"},
    legend_loc="top left out",
    node_size_scale=7,
    edge_width_scale=10,
    max_edge_width=10,
    figsize=(7,7),
    title="directed PAGA",
    #save='allages_tendonfibro_palantir_pseudotime_directedPAGA_umap2.svg',
)

In [None]:
scv.tl.paga(
    adata,
    groups="C_scANVI",
    threshold_root_end_prior=0.9,
    root_key="initial_states_probabilities",
    end_key="terminal_states_probabilities",
    use_time_prior="velocity_pseudotime",
)

cr.pl.cluster_fates(
    adata,
    mode="bar",
    cluster_key="C_scANVI",
    backward=False,
    ncols=5,
    figsize=(15,3),
    save='allages_tendonfibro_velocitypseudotime_directedPAGA_bar.svg',
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="paga_pie",
    cluster_key="C_scANVI",
    backward=False,
    basis="umap",
    legend_kwargs={"loc": "top right out"},
    legend_loc="top left out",
    node_size_scale=7,
    edge_width_scale=10,
    max_edge_width=10,
    figsize=(7,7),
    title="directed PAGA",
    save='allages_tendonfibro_velocitypseudotime_directedPAGA_umap.svg',
)

In [None]:
scv.tl.paga(
    adata,
    groups="fibro_louvain02",
    threshold_root_end_prior=0.9,
    root_key="initial_states_probabilities",
    end_key="terminal_states_probabilities",
    use_time_prior="velocity_pseudotime",
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="bar",
    cluster_key="fibro_louvain02",
    backward=False,
    ncols=5,
    figsize=(15,15),
    #save='allages_tendonfibro_velocitypseudotime_directedPAGA_bar.svg',
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="paga_pie",
    cluster_key="fibro_louvain02",
    backward=False,
    basis="umap",
    legend_kwargs={"loc": "top right out"},
    legend_loc="top left out",
    node_size_scale=7,
    edge_width_scale=10,
    max_edge_width=10,
    figsize=(7,7),
    title="directed PAGA",
    #save='allages_tendonfibro_velocitypseudotime_directedPAGA_umap.svg',
)

In [None]:
import warnings
warnings.filterwarnings("ignore")

model = cr.models.GAM(adata, n_knots=6)
cr.pl.gene_trends(
    adata,
    model=model,
    data_key="MAGIC_imputed_data",
    genes=["MKX", "TNMD", "FMOD", "COL1A1", "COL3A1", "KERA"],
    same_plot=True,
    ncols=2,
    time_key="palantir_pseudotime",
    hide_cells=True,
    weight_threshold=(1e-3, 1e-3),
    save='allages_tendonfibro_palantir_pseudotime_genetrendstenocyte.svg',
)

In [None]:
lin_drivers_dict={}
for i in g2.terminal_states.cat.categories:
    lin_drivers_dict[i] = g2.compute_lineage_drivers(lineages=i,return_drivers=True)

for i in g2.terminal_states.cat.categories:
    # plot heatmap
    cr.pl.heatmap(
        adata,
        model=model,  # use the model from before
        lineages=i,
        cluster_key="C_scANVI",
        data_key="MAGIC_imputed_data",
        genes=lin_drivers_dict[i].head(40).index,
        time_key="palantir_pseudotime",
        figsize=(12, 10),
        show_all_genes=True,
        weight_threshold=(1e-3, 1e-3),
        save=f'{i}_lineage_heatmap.svg'
    )

### Estimating (Velocity + Connectivities Kernel)

In [None]:
g = GPCCA(combined_kernel)
g.compute_schur(n_components=20)
g.plot_spectrum(real_only=True, show_eigengap=True)

Given that the eigengap is at component 2 and components 0, 1, and 2 have relatively high Re values, I specify three macrostates. This choice captures the clear separation of the first two macrostates and allows for a third state to capture finer-scale transitions.ABI3BP GAS2 Fibroblasts 1 and COL3A1 PI16 Fibroblasts are the two dominant macrostates.

In [None]:
g.compute_macrostates(n_states=6, cluster_key="C_scANVI")
g.plot_macrostates(discrete=True,
                   legend_fontsize=9, 
                   basis='umap', s=100,
                   #save='20w_macrostates_discrete.svg',
                   figsize=(5,4))

In [None]:
g.plot_macrostates(discrete=True,
                   legend_fontsize=9, 
                   basis='umap', s=100,
                   #save='allages_tendonfibro_macrostates_velocity_umap.svg',
                   figsize=(5,4))

In [None]:
g.plot_macrostates(same_plot=False, basis='umap',
                   #save='allages_tendonfibro_macrostates_separated_velocity_umap.svg',
                  )

In [None]:
#g.predict()

In [None]:
g.compute_terminal_states()
#g.set_terminal_states_from_macrostates(['COL3A1 PI16 Fibroblasts_1', 'ABI3BP GAS2 Fibroblasts 1_2'])
adata.obs['terminal_states'].value_counts()

In [None]:
g.compute_absorption_probabilities()
cr.pl.circular_projection(adata, keys=['phase', 'C_scANVI'],
                          legend_loc="right", s=100, 
                          #save='20w_circular_projection.svg',
                          figsize=(25,25))

In [None]:
cr.pl.circular_projection(adata, keys=['age', 'C_scANVI'],
                          legend_loc="right", s=100, 
                          #save='20w_circular_projection.svg',
                          figsize=(25,25))

### Computing lineage drivers
We can compute the driver genes for all or just a subset of lineages. We can also restrict this to some subset of clusters by specifying clusters=... (not shown below). In the resulting dataframe, we also see the p-value, the corrected p-value (q-value) and the 95% confidence interval for the correlation statistic.

In [None]:
lin_drivers = g.compute_lineage_drivers()

In [None]:
lineages = list(adata.obs['terminal_states'].cat.categories)
for i in lineages:
    g.plot_lineage_drivers(i, n_genes=8, basis='umap',
                   cmap='magma', save=f'allages_fibro_velocitylind_{i}.svg'
                   )

In [None]:
# define set of genes to annotate
genes_oi = {
    "ABI3BP GAS2 Fibroblasts 1": list(lin_drivers_abi3.index[:15]),
    "FGF14 THBS4 Fibroblasts": list(lin_drivers_fgf14.index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g.plot_lineage_drivers_correlation(
    lineage_x="FGF14 THBS4 Fibroblasts",
    lineage_y="ABI3BP GAS2 Fibroblasts 1",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "ABI3BP GAS2 Fibroblasts 1": list(lin_drivers_abi3.index[:15]),
    "COL3A1 PI16 Fibroblasts": list(lin_drivers_col3.index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g.plot_lineage_drivers_correlation(
    lineage_x="ABI3BP GAS2 Fibroblasts 1",
    lineage_y="COL3A1 PI16 Fibroblasts",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)

In [None]:
# define set of genes to annotate
genes_oi = {
    "COL3A1 PI16 Fibroblasts": list(lin_drivers_col3.index[:15]),
    "FGF14 THBS4 Fibroblasts": list(lin_drivers_fgf14.index[:15])
}

# make sure all of these exist in AnnData
assert [
    gene in adata.var_names for genes in genes_oi.values() for gene in genes
], "Did not find all genes"

# compute mean gene expression across all cells
adata.var["mean expression"] = adata.X.A.mean(axis=0)

# visualize in a scatter plot
g.plot_lineage_drivers_correlation(
    lineage_x="FGF14 THBS4 Fibroblasts",
    lineage_y="COL3A1 PI16 Fibroblasts",
    adjust_text=True,
    gene_sets=genes_oi,
    color="mean expression",
    legend_loc="none",
    figsize=(5, 5),
    dpi=150,
    fontsize=9,
    size=50,
)

### Identifying Initial States

In [None]:
vk_bk = VelocityKernel(adata, backward=True).compute_transition_matrix()
ck_bk = ConnectivityKernel(adata, backward=True).compute_transition_matrix()
combined_kernel = 0.8 * vk_bk + 0.2 * ck_bk
print(combined_kernel)

In [None]:
g_bk = GPCCA(combined_kernel)
g_bk.compute_schur(n_components=20)
g_bk.plot_spectrum(real_only=True, show_eigengap=True)

In [None]:
g_bk.compute_macrostates(n_states=1, cluster_key="C_scANVI")
g_bk.plot_macrostates(legend_fontsize=9, basis='umap',
                   #save='20w_macrostates_initial.svg',
                   figsize=(5,4))

In [None]:
g_bk.plot_macrostates(legend_fontsize=9, discrete=True, basis='umap',
                   #save='20w_macrostates_initialdiscrete.svg', s=100,
                   figsize=(5,4))

In [None]:
g_bk.compute_terminal_states()
g_bk.compute_absorption_probabilities()
adata.obs['initial_states'].value_counts()

In [None]:
# compute a score in scanpy by aggregating across a few ductal markers
sc.tl.score_genes(
    adata, gene_list=["SCX", "MKX", "TNMD", "FMOD", "COL1A1"], score_name="tenocyte_score"
)

# write macrostates to AnnData
adata.obs["macrostates"] = g.macrostates
adata.uns["macrostates_colors"] = g.macrostates_memberships.colors

# visualize via heatmaps
sc.pl.violin(adata, keys="tenocyte_score", groupby="macrostates", rotation=90)

In [None]:
# compute a score in scanpy by aggregating across a few ductal markers
sc.tl.score_genes(
    adata, gene_list=["COL3A1", "LUM"], score_name="col3lum_score"
)

# write macrostates to AnnData
adata.obs["macrostates"] = g.macrostates
adata.uns["macrostates_colors"] = g.macrostates_memberships.colors

# visualize via heatmaps
sc.pl.violin(adata, keys="col3lum_score", groupby="macrostates", rotation=90)

# PAGA

In [None]:
scv.tl.recover_latent_time(
    adata, root_key="initial_states_probabilities", end_key="terminal_states_probabilities"
)

scv.tl.paga(
    adata,
    groups="C_scANVI",
    threshold_root_end_prior=0.9,
    root_key="initial_states_probabilities",
    end_key="terminal_states_probabilities",
    use_time_prior="velocity_pseudotime",
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="bar",
    cluster_key="C_scANVI",
    backward=False,
    ncols=5,
    figsize=(15,3),
    #save='20w_directedPAGA_bar.svg',
)

In [None]:
cr.pl.cluster_fates(
    adata,
    mode="paga_pie",
    cluster_key="C_scANVI",
    backward=False,
    basis="umap",
    legend_kwargs={"loc": "top right out"},
    legend_loc="top left out",
    node_size_scale=7,
    edge_width_scale=2,
    max_edge_width=3,
    figsize=(7,7),
    title="directed PAGA",
    #save='20w_directedPAGA_umap.svg',
)

We use pie charts to show cell fates averaged per cluster. Edges between clusters are given by transcriptomic similarity between the clusters, just as in normal PAGA.

In [None]:
model = cr.models.GAM(adata, n_knots=6)

In [None]:
model = cr.models.GAM(adata, n_knots=6)
cr.pl.gene_trends(
    adata,
    model=model,
    data_key="X",
    genes=["SCX", "MKX", "COL1A1", "COL3A1", "FMOD", "KERA", "LUM"],
    same_plot=True,
    ncols=2,
    time_key="dpt_pseudotime",
    hide_cells=True,
    weight_threshold=(1e-3, 1e-3),
)

In [None]:
# plot heatmap
cr.pl.heatmap(
    adata,
    model=model,  # use the model from before
    lineages="COL3A1 PI16 Fibroblasts",
    cluster_key="C_scANVI",
    data_key="X",
    genes=lin_drivers_col3.head(40).index,
    time_key="velocity_pseudotime",
    figsize=(12, 10),
    show_all_genes=True,
    weight_threshold=(1e-3, 1e-3),
)

In [None]:
cr.pl.heatmap(
    adata,
    model=model,  # use the model from before
    lineages="FGF14 THBS4 Fibroblasts",
    cluster_key="C_scANVI",
    data_key="X",
    genes=lin_drivers_fgf14.head(40).index,
    time_key="velocity_pseudotime",
    figsize=(12, 10),
    show_all_genes=True,
    weight_threshold=(1e-3, 1e-3),
)

In [None]:
cr.pl.heatmap(
    adata,
    model=model,  # use the model from before
    lineages="ABI3BP GAS2 Fibroblasts 1",
    cluster_key="C_scANVI",
    data_key="X",
    genes=lin_drivers_abi3.head(40).index,
    time_key="velocity_pseudotime",
    figsize=(12, 10),
    show_all_genes=True,
    weight_threshold=(1e-3, 1e-3),
)

# SCFATES

In [None]:
import scFates as scf

scf.tl.tree(adata,method="ppt",Nodes=200,use_rep="msdiff",
            device="cpu",seed=1,ppt_lambda=100,ppt_sigma=0.025,ppt_nsteps=200)

In [None]:
scf.pl.graph(adata, basis='umap')

In [None]:
scf.tl.root(adata,89)

In [None]:
scf.tl.pseudotime(adata,n_jobs=10,n_map=1000,seed=42)