In [None]:
# Import dependencies
%matplotlib inline
import os
import numpy as np
import scanpy as sc
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import anndata as ad

import harmony
import harmony.core
import harmony.plot
import palantir
#import scvi

matplotlib.rcParams['font.family'] = 'sans-serif'

# Initialize random seed
import random
random.seed(111)

# Print date and time:
import datetime
e = datetime.datetime.now()
print ("Current date and time = %s" % e)

# set a working directory
wdir = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/embryonic ScAndSp/"
os.chdir( wdir )

# folder structures
SCVI_FOLDERNAME = "scVI/results/"
RESULTS_FOLDERNAME = "Harmony/results/"
FIGURES_FOLDERNAME = "Harmony/figures/"

if not os.path.exists(RESULTS_FOLDERNAME):
    os.makedirs(RESULTS_FOLDERNAME)
if not os.path.exists(FIGURES_FOLDERNAME):
    os.makedirs(FIGURES_FOLDERNAME)

# Set folder for saving figures into
sc.settings.figdir = FIGURES_FOLDERNAME

def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.savefig(os.path.join(folder, fname), format='svg')

# Set other settings
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.set_figure_params(dpi=150, fontsize=10, dpi_save=600)

In [None]:
adata = sc.read_h5ad(os.path.join(SCVI_FOLDERNAME, 'tendonsubset_scANVI.h5ad'))
adata

In [None]:
adata.obs['C_scANVI'].value_counts()

In [None]:
adata.var_names = adata.var_names.astype(str)
adata.var_names_make_unique()

In [None]:
cat = list(adata.obs['norm_sample_stage'].cat.categories)
cat

In [None]:
adata.obs['norm_sample_stage']

In [None]:
# sort by categorical age
adata.obs['age'] = pd.Categorical(adata.obs['norm_sample_stage'], categories=cat, ordered=True)
adata = adata[adata.obs['age'].argsort()]
adata.obs 

In [None]:
adata.obs['norm_sample_stage']

In [None]:
adata.var_names_make_unique()

In [None]:
adata.obs['norm_sample_stage'].cat.categories

In [None]:
timepoint_connections = pd.DataFrame(columns=[0, 1])
index = 0

timepoint_connections.loc[index, :] = ['pcw6.1', 'pcw7.2-1 ']; index += 1
timepoint_connections.loc[index, :] = ['pcw7.2-1 ', 'pcw7.2-2 ']; index += 1
timepoint_connections.loc[index, :] = ['pcw7.2-2 ', 'pcw8.4-1 ']; index += 1
timepoint_connections.loc[index, :] = ['pcw8.4-1 ', 'pcw8.4-2']; index += 1
timepoint_connections.loc[index, :] = ['pcw8.4-2', 'pcw9.0-1 ']; index += 1
timepoint_connections.loc[index, :] = ['pcw9.0-1 ', 'pcw9.0-2 ']; index += 1
timepoint_connections.loc[index, :] = ['pcw9.0-2 ', 'pcw9.3-1 ']; index += 1
timepoint_connections.loc[index, :] = ['pcw9.3-1 ', 'pcw9.3-2']; index += 1
timepoint_connections

In [None]:
tp=adata.obs['norm_sample_stage']
tp=tp.astype(str) 
tp

In [None]:
data_df=pd.DataFrame(adata.obsm["X_scANVI"],index=adata.obs_names)
data_df

specifying the number of neighbors helps define the level of granularity in identifying cell relationships. A smaller value like 10 will result in a more local and fine-grained view of cell-to-cell connections, whereas a larger value would capture more distant or global similarities.

In [None]:
# generate the augmented affinity matrix (aug_aff) and the non-augmented matrix (aff)
aug_aff, aff = harmony.core.augmented_affinity_matrix(data_df, tp, timepoint_connections,
                                                      n_neighbors=20, pc_components=None)

# add the augmented affinity matrix to obsm
adata.obsm['X_aug_aff'] = aug_aff

In [None]:
# computes force directed layout coordinates from the augmented aff matrix
layout = harmony.plot.force_directed_layout(aug_aff, data_df.index)

In [None]:
# add layout coordinates to the anndata object
adata.obsm['X_force_directed_layout'] = layout
adata.obsm['force_directed_array'] = adata.obsm['X_force_directed_layout'].values
sc.pl.embedding(adata, basis='force_directed_array', color='norm_sample_stage',
               frameon=False,
               save='_embryoTendon_Harmony_Age.png')

In [None]:
harmony.plot.plot_timepoints(layout, tp)
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'harmony_plot.svg'), format='svg')
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'harmony_plot.png'), format='png')

In [None]:
adata.obsm['X_umap_orig'] = adata.obsm['X_umap']

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'tendonsubset_harmony.h5ad'))

## Palantir trajectory detection
Palantir is an algorithm developed by the Pe'er lab to align cells along differentiation trajectories. Palantir models differentiation as a stochastic process where stem cells differentiate to terminally differentiated cells by a series of steps through a low dimensional phenotypic manifold. Palantir effectively captures the continuity in cell states and the stochasticity in cell fate determination.

The first step in Palantir trajectory detection is to project data onto diffusion maps. Harmony augmented affinity matrix is used as the input for identifying diffusion maps. Please see https://github.com/dpeerlab/Palantir for more details on Palantir

In [None]:
dm_res = palantir.utils.run_diffusion_maps(adata.obsm["X_aug_aff"], knn=30)
#dm_res = palantir.utils.run_diffusion_maps(pd.DataFrame(adata.obsm["X_aug_aff"].toarray(),index=adata.obs_names))
#dm_res = palantir.utils.run_diffusion_maps(pd.DataFrame(adata.obsm["X_scANVI"],index=adata.obs_names),knn=100)
#adata.obsp["T"]=dm_res["T"]
adata.obsm["X_diff"]=dm_res['EigenVectors'].values

In [None]:
sc.pp.neighbors(adata,n_neighbors=20,use_rep="X_diff")
sc.tl.paga(adata,"C_scANVI")
sc.pl.paga(adata,threshold=.02, node_size_scale=2,
           fontsize=6, fontoutline=1, frameon=False, 
           save='_xdiff.png'
          )

In [None]:
sc.tl.umap(adata,spread=.6, init_pos='paga')
sc.pl.umap(adata, color='C_scANVI')

In [None]:
sc.pl.umap(adata, color='ageint')

In [None]:
ms_data = palantir.utils.determine_multiscale_space(dm_res)
adata.obsm["X_msdiff"]=ms_data.values
ms_data

In [None]:
%%time
sc.tl.tsne(adata,use_rep="X_msdiff",
           perplexity=50,
           learning_rate=adata.shape[0]/12
          )

In [None]:
sc.pl.tsne(adata,color="C_scANVI", 
           frameon=False,
           save='_msdiff.svg'
          )
#plt.savefig(os.path.join(FIGURES_FOLDERNAME, '20pcw_tendonfibro_msdiff_TSNE.svg'), format='svg')
#plt.savefig(os.path.join(FIGURES_FOLDERNAME, '20pcw_tendonfibro_msdiff_TSNEpng.png'), format='png')
# Image(filename=os.path.join(FIGURES_FOLDERNAME,'msdiff_TSNEpng.png'))

In [None]:
sc.pp.neighbors(adata, n_neighbors=20, use_rep="X_msdiff")
sc.tl.draw_graph(adata, init_pos="X_tsne", 
                 maxiter=500)

In [None]:
sc.pl.draw_graph(adata,
                 color="ageint", frameon=False,
                 #save='_allagestendonfibro__age_msdiff.svg'
                )

In [None]:
sc.pl.draw_graph(adata,
                 color="C_scANVI", frameon=False,
                 #save='_allagestendonfibro_msdiff.svg'
                )
#plt.savefig(os.path.join(FIGURES_FOLDERNAME, '20pcw_tendonfibro_msdiff_graph.png'), format='png')
# Image(filename=os.path.join(FIGURES_FOLDERNAME,'msdiff_graph.png'))

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'tendonsubset_harmony.h5ad'))

In [None]:
ms_data = pd.DataFrame(adata.obsm['X_msdiff'], index=adata.obs_names)
ms_data

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['MKI67', 'DIAPH2', 'TOP2A', 'CENPK', 'CENPP', 'C_scANVI'], 
                components=["1, 2"], ncols=3)

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['MKX', 'SCX', 'TNMD', 'GAS2', 'ABI3BP', 'KERA', 'age', 'ageint'], 
                components=["1, 2"], ncols=3)

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['MKX', 'SCX', 'GAS2', 'ABI3BP', 'SPARC', 'age'], 
                components=["2, 3"], ncols=3)

In [None]:
sc.pl.embedding(adata, basis='msdiff', color=['VIM', 'NES', 'THY1', 'ITGB1', 'TPPP3', 'CD44', 'TWIST2', 'C_scANVI'], 
                components=["1, 2"], ncols=3)