In [None]:
# Import dependencies
%matplotlib inline
import os
import numpy as np
import scanpy as sc
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import anndata as ad

import harmony
import harmony.core
import harmony.plot
import palantir
import scvi

matplotlib.rcParams['font.family'] = 'sans-serif'

# Initialize random seed
import random
random.seed(111)

# Print date and time:
import datetime
e = datetime.datetime.now()
print ("Current date and time = %s" % e)

# set a working directory
wdir = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks"
os.chdir( wdir )

# folder structures
SCVI_FOLDERNAME = "foetal/results/scVI/"
RESULTS_FOLDERNAME = "foetal/results/Harmony"
FIGURES_FOLDERNAME = "foetal/figures/Harmony"

if not os.path.exists(RESULTS_FOLDERNAME):
    os.makedirs(RESULTS_FOLDERNAME)
if not os.path.exists(FIGURES_FOLDERNAME):
    os.makedirs(FIGURES_FOLDERNAME)

# Set folder for saving figures into
sc.settings.figdir = FIGURES_FOLDERNAME

def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.savefig(os.path.join(folder, fname), format='svg')

# Set other settings
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.set_figure_params(dpi=150, fontsize=10, dpi_save=600)

# Subsetting and scVI/scANVI integration
Use scvi-env conda environment

In [None]:
adata = sc.read_h5ad(os.path.join(SCVI_FOLDERNAME, 'dev_scANVI.h5ad'))
adata

In [None]:
adata.obs['C_scANVI'].value_counts()

In [None]:
adata.X = adata.layers['counts'].copy()

In [None]:
adata.obs['C_scANVI_orig'] = adata.obs['C_scANVI']

adata.obs['C_scANVI'] = np.where(
    adata.obs['C_scANVI'] == 'ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts', adata.obs['C_scANVI']
)
adata.obs['C_scANVI'] = np.where(
    adata.obs['C_scANVI'] == 'ABI3BP GAS2 Fibroblasts 2', 'ABI3BP GAS2 Fibroblasts', adata.obs['C_scANVI']
)
#foetal.obs['C_scANVI'] = np.where(
#    foetal.obs['C_scANVI'] == 'COL6A6 FNDC1 Fibroblasts', 'COL3A1 PI16 Fibroblasts', foetal.obs['C_scANVI']
#)
#foetal.obs['C_scANVI'] = np.where(
#    foetal.obs['C_scANVI'] == 'Smooth Myocytes', 'Mural Cells', foetal.obs['C_scANVI']
#)

print(adata.obs['C_scANVI'].value_counts())

In [None]:
sc.pl.umap(adata, color=['C_scANVI', 'age', 'sampletype'],
          frameon=False, wspace=0.5)

In [None]:
adata.var_names = adata.var_names.astype(str)
adata.var_names_make_unique()

In [None]:
keep = ['ABI3BP GAS2 Fibroblasts',
        'COL3A1 PI16 Fibroblasts',
        'COL6A6 FNDC1 Fibroblasts',
        'FGF14 THBS4 Fibroblasts', 
        #'NEGR1 SCN7A Fibroblasts'
       ]
# Use the boolean mask to subset the AnnData object
adata = adata[adata.obs['C_scANVI'].isin(keep)].copy()
adata

# Data Preparation (count reorganisation to meet Harmony assumptions)

In [None]:
data_df = pd.DataFrame(adata.X.toarray(), index=adata.obs_names, columns=adata.var_names)
data_df

In [None]:
layer_df = pd.DataFrame(adata.layers['counts'].toarray(), index=adata.obs_names, columns=adata.var_names)
layer_df

In [None]:
adata.obs

In [None]:
# sort by categorical age
adata.obs['age'] = pd.Categorical(adata.obs['age'], categories=['12w', '17w', '20w'], ordered=True)
adata = adata[adata.obs['age'].argsort()]
adata.obs 

In [None]:
data_df = pd.DataFrame(adata.X.toarray(), index=adata.obs_names, columns=adata.var_names)
data_df

In [None]:
layer_df = pd.DataFrame(adata.layers['counts'].toarray(), index=adata.obs_names, columns=adata.var_names)
layer_df

the above was to check that adata.obs reorganisation also makes sure that the counts are reogranised correctly.

In [None]:
adata.obs

In [None]:
del data_df, layer_df

In [None]:
adata.obs['C_scANVI'].value_counts()

In [None]:
sc.pp.filter_genes(adata, min_counts=5, inplace=True)

In [None]:
sc.pp.filter_cells(adata, min_genes=200)

In [None]:
adata.raw = adata

In [None]:
scaled_counts = sc.pp.normalize_total(adata, target_sum=None, inplace=False)
adata.layers["log1p_norm"] = sc.pp.log1p(scaled_counts["X"], copy=True)
print(adata.layers["log1p_norm"][0:5, 0:5])

In [None]:
adata.X = adata.layers['log1p_norm'].copy()

In [None]:
sc.pp.highly_variable_genes(adata, n_top_genes=4500, flavor="cell_ranger", batch_key='libbatch', subset=False)
sc.pl.highly_variable_genes(adata)

In [None]:
sc.pp.scale(adata, max_value=10)
print(adata.X[0:5,0:5])

In [None]:
adata.layers['scaled'] = adata.X.copy()
sc.pp.pca(adata, n_comps=50, svd_solver="arpack")

In [None]:
sc.pl.pca_loadings(adata, components='1,2,3')

In [None]:
sc.pl.pca(adata, components=['1,2', '3,4', '5,6', '7,8'], ncols=2, color='phase')

In [None]:
sc.pl.pca(adata, components=['1,2', '3,4', '5,6', '7,8'], ncols=2, color='pct_counts_mt')

In [None]:
sc.pl.pca(adata, components=['1,2', '3,4', '5,6', '7,8'], ncols=2, color='total_counts')

In [None]:
adata.X = adata.layers['log1p_norm'].copy()

# Regressing out biological variation

In [None]:
sc.pp.regress_out(adata, ['pct_counts_mt', 'pct_counts_ribo', 'S_score', 'G2M_score'])

In [None]:
sc.pp.highly_variable_genes(adata, n_top_genes=4500, flavor="cell_ranger", batch_key='libbatch', subset=False)
sc.pl.highly_variable_genes(adata)

In [None]:
hvgenes = adata.var.index[adata.var.highly_variable].tolist()
len(hvgenes)

In [None]:
sc.pp.scale(adata, max_value=10)
print(adata.X[0:5,0:5])

In [None]:
adata.layers['scaled'] = adata.X.copy()
sc.pp.pca(adata, n_comps=50, svd_solver="arpack")

In [None]:
sc.pl.pca_loadings(adata, components='1,2,3')

In [None]:
sc.pl.pca(adata, components=['1,2', '3,4', '5,6', '7,8'], ncols=2, color='phase')

In [None]:
sc.pl.pca(adata, components=['1,2', '3,4', '5,6', '7,8'], ncols=2, color='libbatch')

In [None]:
sc.pl.pca_variance_ratio(adata, log=True, n_pcs=50)

In [None]:
sc.pp.neighbors(adata, use_rep="X_pca", metric='correlation', n_neighbors=15)
sc.tl.umap(adata)
sc.pl.umap(adata, color='C_scANVI')

In [None]:
sc.pl.umap(adata, color=['C_scANVI', 'age', 'sampletype'], ncols=2,
          frameon=False, wspace=0.5)

In [None]:
var_genes = {}
var_genes['cell_ranger'] = adata.var_names[adata.var['highly_variable']]

In [None]:
scvi.data.poisson_gene_selection(adata, layer='counts', batch_key='libbatch', n_top_genes=4500)
adata.var

In [None]:
var_genes['scvi'] = adata.var_names[adata.var['highly_variable']]

In [None]:
from venny4py.venny4py import *

sets = {}
for layer, genes in var_genes.items():
    sets[layer] = set(list(genes)) 

venny4py(sets=sets)

In [None]:
# Create a column in adata.var to indicate the category of each gene
adata.var['HVG'] = 'None'  # Initialize with 'None'
adata.var.loc[list(var_genes['scvi']), 'HVG'] = 'High Dropout'
adata.var.loc[list(var_genes['cell_ranger']), 'HVG'] = 'Highly Variable+Expressed'
adata.var.loc[list(sets['scvi'].intersection(sets['cell_ranger'])), 'HVG'] = 'Both'

# Create a custom colormap
gene_category_colors = {'Both': 'magenta', 'Highly Variable+Expressed': 'cyan', 
                        'High Dropout': 'black', 'None': 'grey'}

adata.var['HVG'].value_counts()

In [None]:
def plot_mean_variance(countslayers, log_scale=False):
    num_rows = 1
    num_cols = 2

    fig, axes = plt.subplots(num_rows, num_cols, figsize=(10, 5))

    for idx, layer in enumerate(countslayers):
        mean_counts = np.mean(adata.layers[layer].toarray(), axis=0)
        variance = np.var(adata.layers[layer].toarray(), axis=0)

        ax = axes[idx]
        ax.scatter(mean_counts, variance,
                   c=adata.var['HVG'].map(gene_category_colors),
                  edgecolors='none')

        ax.set_xlabel('Mean Counts')
        ax.set_ylabel('Variance')
        if log_scale:
            ax.set_xscale('log')
            ax.set_yscale('log')
            ax.set_xlabel('Log Mean Counts')
            ax.set_ylabel('Log Variance')
        ax.set_title(f'Mean-Variance Plot ({layer})')

    plt.tight_layout()
    plt.show()
    
plot_mean_variance(['counts', 'log1p_norm'], log_scale=True)

In [None]:
adata_hvg = adata[:, adata.var.highly_variable] #cell-ranger hvgs
adata_hvg

In [None]:
adata_hvg.var

In [None]:
print(adata_hvg.X.shape)
print(adata_hvg.raw.X.shape)

In [None]:
scvi.model.SCVI.setup_anndata(adata,
                              layer="counts", 
                              batch_key="libbatch",
                              categorical_covariate_keys=["sample", "type"], # effects that are taken into account that we are not necessarily interested in
                              #continuous_covariate_keys=['S_score', 'G2M_score']
                             )

In [None]:
vae = scvi.model.SCVI(adata, n_hidden = 128, n_latent=30, n_layers=2, 
                      dropout_rate=0.1, dispersion="gene-batch", gene_likelihood='zinb')
vae.view_anndata_setup(adata)

In [None]:
%%time
vae.train(max_epochs = 400, train_size = 0.9, validation_size = 0.1, 
          use_gpu=True, accelerator='gpu', 
          check_val_every_n_epoch=1,
          early_stopping=True,
          early_stopping_patience=20,
          early_stopping_monitor="elbo_validation",
         )

In [None]:
train_test_results = vae.history["elbo_train"]
train_test_results["elbo_validation"] = vae.history["elbo_validation"]
train_test_results.iloc[10:].plot(logy=True)  # exclude first 10 epochs
plt.show()

In [None]:
vae.save(os.path.join(RESULTS_FOLDERNAME, "FoetalFibroblastsGrouped_SampleTypeLibbatch_ZinbGeneBatch/"), overwrite=True)

In [None]:
adata.obsm["X_scVI"] = vae.get_latent_representation()
# use scVI latent space for UMAP generation
sc.pp.neighbors(adata, use_rep="X_scVI", metric='correlation', n_neighbors=50)
sc.tl.umap(adata)

In [None]:
def plot_umaps(anndata, parameters: list, filename: str):
    n_plots = len(parameters)
    fig, axs = plt.subplots(n_plots, 1, figsize=(7, 4*n_plots))
    for i, param in enumerate(parameters):
        sc.pl.umap(anndata, color=param, ax=axs[i], show=False, frameon=False)
        axs[i].set_title(param)
    plt.tight_layout()
    savesvg(filename, fig)
    plt.show()

In [None]:
plot_umaps(adata, ['sampletype', 'sample', 'age', 'libbatch', 'type', 'phase', 'sex', 'C_scANVI'], 
           filename = 'dev_UMAP_plots_tendonfibros_scVI_regressed_libbatchSampleTypePhaseScores.svg'
          )

In [None]:
plot_umaps(adata, ['sampletype', 'sample', 'age', 'libbatch', 'type', 'phase', 'sex', 'C_scANVI'], 
           #filename = 'dev_UMAP_plots_tendonfibros_scVI_newgrouped.svg'
          )

In [None]:
lvae = scvi.model.SCANVI.from_scvi_model(
    vae,
    adata=adata,
    labels_key="C_scANVI",
    unlabeled_category="Unknown",
)

lvae.train(max_epochs=25, train_size = 0.9, validation_size = 0.1, 
          use_gpu=True, accelerator='gpu', 
          check_val_every_n_epoch=1,
          early_stopping=True,
          early_stopping_patience=10,
          early_stopping_monitor="elbo_validation")

In [None]:
adata.obs["C_scANVI_orig"] = adata.obs["C_scANVI"]
adata.obs["C_scANVI"] = lvae.predict(adata)
adata.obsm["X_scANVI"] = lvae.get_latent_representation(adata)

In [None]:
sc.pp.neighbors(adata, use_rep="X_scANVI", metric='correlation')
sc.tl.umap(adata)
sc.pl.umap(adata, color=['C_scANVI', 'C_scANVI_orig'], frameon=False,
          wspace=0.5)

In [None]:
plot_umaps(adata, ['sampletype', 'sample', 'age', 'libbatch', 'type', 'phase', 'sex', 'C_scANVI_orig', 'C_scANVI'], 
           filename = 'dev_UMAP_plots_tendonfibros_scANVI_grouped.svg')

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, '{}.h5ad'.format('dev_tendonfibros_scANVI2')))

# Harmony
Use seacells_env environment

In [None]:
adata = sc.read_h5ad(os.path.join(RESULTS_FOLDERNAME, '{}.h5ad'.format('dev_tendonfibros_scANVI2')))

In [None]:
adata.obs

In [None]:
# specify the timepoint information for Harmony
tp = adata.obs['age'].astype(str)

# specify the timepoint connections for the augmented affinity matrix
timepoint_connections = pd.DataFrame({'from_timepoint': ['12w', '17w'], 'to_timepoint': ['17w', '20w']})

specifying the number of neighbors helps define the level of granularity in identifying cell relationships. A smaller value like 10 will result in a more local and fine-grained view of cell-to-cell connections, whereas a larger value would capture more distant or global similarities.

My data is fairly sparce, so k value will need to be larger to capture meaningful similarities.

In [None]:
data_df=pd.DataFrame(adata.obsm["X_scANVI"],index=adata.obs_names)
# data_df=pd.DataFrame(adata.obsm["X_pca"][:,:15],index=adata.obs_names) - not batch-corrected!
data_df

In [None]:
adata.var_names_make_unique()

In [None]:
# generate the augmented affinity matrix (aug_aff) and the non-augmented matrix (aff)
aug_aff, aff = harmony.core.augmented_affinity_matrix(data_df, tp, timepoint_connections, 
                                                      n_neighbors=100, pc_components=None)

# add the augmented affinity matrix to obsm
adata.obsm['X_aug_aff'] = aug_aff

In [None]:
# computes force directed layout coordinates from the augmented aff matrix
layout = harmony.plot.force_directed_layout(aug_aff, data_df.index)

In [None]:
# add layout coordinates to the anndata object
adata.obsm['X_force_directed_layout'] = layout
adata.obsm['force_directed_array'] = adata.obsm['X_force_directed_layout'].values
sc.pl.embedding(adata, basis='force_directed_array', color='age',
               frameon=False,
               save='_dev_harmony_FibroblastsByAge_2.png')

In [None]:
sc.pl.embedding(adata, basis='force_directed_array', color='age',
               frameon=False, groups = ['12w'],
               save='_dev_harmony_fibroblatsage_split12w2.svg')
sc.pl.embedding(adata, basis='force_directed_array', color='age',
               frameon=False, groups = ['17w'],
               save='_dev_harmony_fibroblatsage_split17w2.svg')
sc.pl.embedding(adata, basis='force_directed_array', color='age',
               frameon=False, groups = ['20w'],
               save='_dev_harmony_fibroblatsage_split20w2.svg')
sc.pl.embedding(adata, basis='force_directed_array', color='C_scANVI_orig',
               frameon=False,
               save='_dev_harmony_fibroblasts_celltype2.svg')

In [None]:
adata.obsm['X_umap_orig'] = adata.obsm['X_umap']

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'allages_tendonfibro_pre2.h5ad'))

## Palantir trajectory detection
Palantir is an algorithm developed by the Pe'er lab to align cells along differentiation trajectories. Palantir models differentiation as a stochastic process where stem cells differentiate to terminally differentiated cells by a series of steps through a low dimensional phenotypic manifold. Palantir effectively captures the continuity in cell states and the stochasticity in cell fate determination.

The first step in Palantir trajectory detection is to project data onto diffusion maps. Harmony augmented affinity matrix is used as the input for identifying diffusion maps. Please see https://github.com/dpeerlab/Palantir for more details on Palantir

In [None]:
adata = sc.read_h5ad(os.path.join(RESULTS_FOLDERNAME, 'allages_tendonfibro_pre2.h5ad'))

In [None]:
dm_res = palantir.utils.run_diffusion_maps(adata.obsm["X_aug_aff"], knn=80)
#dm_res = palantir.utils.run_diffusion_maps(pd.DataFrame(adata.obsm["X_aug_aff"].toarray(),index=adata.obs_names))
#dm_res = palantir.utils.run_diffusion_maps(pd.DataFrame(adata.obsm["X_scANVI"],index=adata.obs_names),knn=100)

adata.obsp["T"]=dm_res["T"]
adata.obsm["X_diff"]=dm_res['EigenVectors'].values

In [None]:
sc.pp.neighbors(adata,n_neighbors=80,use_rep="X_diff")
sc.tl.paga(adata,"C_scANVI")
sc.pl.paga(adata,threshold=.02, node_size_scale=2,
           fontsize=6, fontoutline=1, frameon=False, 
           save='_allages_tendonfibro2.png'
          )

In [None]:
sc.tl.umap(adata,
           spread=.6, 
           init_pos='paga'
           )
sc.pl.umap(adata, color='C_scANVI_orig')

In [None]:
sc.pl.umap(adata, color='age')

In [None]:
ms_data = palantir.utils.determine_multiscale_space(dm_res)
adata.obsm["X_msdiff"]=ms_data.values
ms_data

In [None]:
%%time
sc.tl.tsne(adata,use_rep="X_msdiff",
           perplexity=50,
           learning_rate=adata.shape[0]/12
          )

In [None]:
sc.pl.tsne(adata,color="C_scANVI", 
           frameon=False,
           save='_allages_tendonfibro_msdiff2.svg'
          )
#plt.savefig(os.path.join(FIGURES_FOLDERNAME, '20pcw_tendonfibro_msdiff_TSNE.svg'), format='svg')
#plt.savefig(os.path.join(FIGURES_FOLDERNAME, '20pcw_tendonfibro_msdiff_TSNEpng.png'), format='png')
# Image(filename=os.path.join(FIGURES_FOLDERNAME,'msdiff_TSNEpng.png'))

In [None]:
sc.pp.neighbors(adata, n_neighbors=30, use_rep="X_msdiff")
sc.tl.draw_graph(adata, init_pos="X_tsne", 
                 maxiter=500)

In [None]:
sc.pl.draw_graph(adata,
                 color="age", frameon=False,
                 #save='_allagestendonfibro__age_msdiff.svg'
                )

In [None]:
sc.pl.draw_graph(adata,
                 color="C_scANVI_orig", frameon=False,
                 #save='_allagestendonfibro_msdiff.svg'
                )
#plt.savefig(os.path.join(FIGURES_FOLDERNAME, '20pcw_tendonfibro_msdiff_graph.png'), format='png')
# Image(filename=os.path.join(FIGURES_FOLDERNAME,'msdiff_graph.png'))

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'allages_tendonfibro2.h5ad'))

# Palantir Pseudotime

In [None]:
adata = sc.read_h5ad(os.path.join(RESULTS_FOLDERNAME, 'allages_tendonfibro2.h5ad'))

In [None]:
ms_data = pd.DataFrame(adata.obsm['X_msdiff'], index=adata.obs_names)
ms_data

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['MKI67', 'DIAPH2', 'TOP2A', 'CENPK', 'CENPP', 'C_scANVI'], 
                components=["1, 2"], ncols=3)

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['MKI67', 'DIAPH2', 'TOP2A', 'CENPK', 'CENPP'], 
                components=["3, 4"], ncols=3)

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['MKX', 'SCX', 'TNMD', 'GAS2', 'ABI3BP', 'SPARC', 'age'], 
                components=["1, 2"], ncols=3)

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['MKX', 'SCX', 'GAS2', 'ABI3BP', 'SPARC', 'age'], 
                components=["2, 3"], ncols=3)

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['MKX', 'SCX', 'TNMD', 'EGR1', 'GAS2', 'ABI3BP', 'SPARC', 'age', 'C_scANVI'], 
                components=["3, 4"], ncols=3)

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['VIM', 'NES', 'THY1', 'ITGB1', 'TPPP3', 'CD44', 'age', 'C_scANVI_orig'], 
                components=["1, 2"], ncols=3)

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['VIM', 'NES', 'THY1', 'ITGB1', 'TPPP3', 'CD44', 'age', 'C_scANVI'], 
                components=["2,3"], ncols=3)

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['CDKN1B', 'CDKN1A', 'MKI67', 'CD34', 'CDK4', 'age', 'C_scANVI'], 
                components=["3,4"], ncols=3)

In [None]:
adata.var_names_make_unique()
adata.obs_names_make_unique()

In [None]:
import numpy as np

# Find the cell index with the highest expression of the NRK gene - find double expression with MKI67 and CENPP and SCX/MKX/TNMD
max_expression_cell_index = np.argmin(adata[:, 'COL1A1'].X) + np.argmin(adata[:, 'TNMD'].X) + np.argmax(adata[:, 'NANOG'].X) 
cell_id_with_highest_expression = adata.obs_names[max_expression_cell_index]
print("Cell ID with highest gene expression:", cell_id_with_highest_expression)

In [None]:
adata.X = adata.layers['log1p_norm'].copy()

In [None]:
adata.raw = adata.copy()

In [None]:
print(adata.raw.X[0:10, 0:10])

In [None]:
gene_list = ['SCX', 'MKX', 'EGR1'] 

#'CENPK', 'CENPP', 'DIAPH3'
#adding 'NES', 'ITGB1' and markers of senescence 'CDKN1B', 'CDKN1A', 'CD34', 'CDK4' still gives the same cell
sc.tl.score_genes(adata, gene_list, score_name='tnp_score', use_raw=True)
cell_id_with_highest_expression = adata.obs['tnp_score'].idxmax()
print("Cell ID with highest gene expression:", cell_id_with_highest_expression)

In [None]:
adata.obs['startcell'] = 0.2
#ind = adata.obs.index[adata.obsm['X_msdiff'][:,5].argmax()]
ind = cell_id_with_highest_expression
adata.obs.loc[ind, 'startcell'] = 1
print(adata.obs[adata.obs.index == ind][['C_scANVI_orig', 'phase', 'age']])
# Use sc.pl.embedding to visualize with color_column
sc.pl.embedding(adata, basis='msdiff', 
                color='startcell', 
                components=["4,5"],
                cmap='Greys', vmin=0, s=50)

In [None]:
sc.pl.embedding(adata, basis='tsne', color=['startcell', 'phase', 'C_scANVI_orig'],
                cmap='Greys', vmin=0, s=50)

In [None]:
sc.pl.embedding(adata, basis='umap', color=['startcell', 'phase', 'C_scANVI_orig'],
                cmap='Greys', vmin=0, s=10)

In [None]:
pr_res = palantir.core.run_palantir(data=ms_data, early_cell=ind, 
                           knn=100, use_early_cell_as_start=True, 
                           n_jobs=10, num_waypoints=1200)

In [None]:
palantir.plot.plot_palantir_results(adata, pr_res, s=3, embedding_basis='X_umap')
plt.show()

In [None]:
palantir.plot.plot_palantir_results(adata, pr_res, s=3, embedding_basis='X_draw_graph_fa')
plt.show()

In [None]:
adata.obs['palantir_pseudotime'] = pr_res.pseudotime
adata.obs['palantir_entropy'] = pr_res.entropy
adata.uns['palantir_waypoints'] = pr_res.waypoints.values
adata.obsm['palantir_fate_probabilities'] = pr_res.branch_probs

In [None]:
adata

In [None]:
del adata.obs['outlier'], adata.obs['startcell'], adata.obs['mt_outlier']

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'allages_tendonfibro2.h5ad'))

In [None]:
adata=sc.read_h5ad(os.path.join(RESULTS_FOLDERNAME, 'allages_tendonfibro2.h5ad'))

In [None]:
sc.tl.leiden(adata, resolution=0.2, key_added='fibro_leiden02')
sc.pl.umap(adata, color=['fibro_leiden02'], frameon=False)

In [None]:
adata.obsp

In [None]:
dm_res = palantir.utils.run_diffusion_maps(adata.obsm["X_aug_aff"], knn=80)
#dm_res = palantir.utils.run_diffusion_maps(pd.DataFrame(adata.obsm["X_scANVI"],index=adata.obs_names),knn=100)

In [None]:
adata.X

In [None]:
dm_res

In [None]:
imp_df = palantir.utils.run_magic_imputation(adata, dm_res=dm_res)

In [None]:
masks = palantir.presults.select_branch_cells(adata, eps=0)

In [None]:
palantir.plot.plot_branch_selection(adata)
plt.show()

In [None]:
adata.var_names_make_unique()
sc.pl.violin(
    adata,
    keys=["palantir_pseudotime"],
    groupby="C_scANVI",
    rotation=-90,
)

In [None]:
#del adata.varm
adata.write(os.path.join(RESULTS_FOLDERNAME, 'allages_tendonfibro.h5ad'))