### Notebook for the fetal stem cells initial and terminal states identification with CellRank
- **Developed by:** Anna Maguza
- **Place:** Wuerzburg Institute for System Immunology
- **Date:** 6th November 2023

### Import packages

In [1]:
import numpy as np
import pandas as pd

import cellrank as cr
import scanpy as sc
from cellrank.kernels import RealTimeKernel

from moscot.problems.time import TemporalProblem


In [2]:
import matplotlib.pyplot as plt

### Set up the cells

In [3]:
%matplotlib inline

In [4]:
sc.settings.verbosity = 3
sc.logging.print_versions()

sc.settings.set_figure_params(frameon=False, dpi=100)
cr.settings.verbosity = 2

-----
anndata     0.10.3
scanpy      1.9.6
-----
PIL                         10.1.0
absl                        NA
anyio                       NA
arrow                       1.3.0
asttokens                   NA
attr                        23.1.0
attrs                       23.1.0
babel                       2.13.1
brotli                      1.1.0
cellrank                    2.0.0
certifi                     2023.07.22
cffi                        1.16.0
charset_normalizer          3.3.2
chex                        0.1.8
cloudpickle                 3.0.0
colorama                    0.4.6
comm                        0.1.4
cycler                      0.12.1
cython_runtime              NA
dateutil                    2.8.2
debugpy                     1.8.0
decorator                   5.1.1
defusedxml                  0.7.1
docrep                      0.3.2
etils                       1.5.1
executing                   2.0.1
fastjsonschema              NA
flax                        0.7.5
fqd

In [5]:
import warnings

warnings.simplefilter("ignore", category=UserWarning)

### Data Upload

In [6]:
# upload anndata file
input = 'FetalSC_data/Fetal_healthy_stem_cells_CellRank_experimental_time.h5ad'
adata = sc.read_h5ad(input)

# Identify initial and terminal states

### Initialize an estimator

In [7]:
vk = cr.kernels.RealTimeKernel.from_adata(adata, key="T_fwd")

In [8]:
g = cr.estimators.GPCCA(vk)
print(g)

GPCCA[kernel=RealTimeKernel[n=7817], initial_states=None, terminal_states=None]


In [9]:
# Identify macrostates
g.fit(cluster_key="cluster", n_states=[4, 12])

Computing Schur decomposition
Adding `adata.uns['eigendecomposition_fwd']`
       `.schur_vectors`
       `.schur_matrix`
       `.eigendecomposition`
    Finish (0:00:00)
Calculating minChi criterion in interval `[4, 12]`
Computing `6` macrostates
Adding `.macrostates`
       `.macrostates_memberships`
       `.coarse_T`
       `.coarse_initial_distribution
       `.coarse_stationary_distribution`
       `.schur_vectors`
       `.schur_matrix`
       `.eigendecomposition`
    Finish (0:00:00)


GPCCA[kernel=RealTimeKernel[n=7817], initial_states=None, terminal_states=None]

In [None]:
# Identify macrostates
g.plot_macrostates(which="all", discrete=True, legend_loc="right", s=100)

In [None]:
# identify terminal macrostates
g.predict_terminal_states()
g.plot_macrostates(which="terminal", legend_loc="right", s=100)

In [None]:
#While the plot above just shows the cells most confidently assigned to each terminal state, 
#we can also plot the underlying continuous distribution for each macrostate.
g.plot_macrostates(which="terminal", discrete=False)
#Each cell is colored according to the terminal state it most likely belongs to; higher color intensity reflects greater confidence in the assignment.

In [None]:
#identify initial states
g.predict_initial_states(allow_overlap=True)
g.plot_macrostates(which="initial", legend_loc="right", s=100)

In [14]:
#print the estimator
g

GPCCA[kernel=RealTimeKernel[n=7817], initial_states=['ASS1+_SLC40A1+_SC_4'], terminal_states=['ASS1+_SLC40A1+_SC_1', 'ASS1+_SLC40A1+_SC_3', 'RPS10+_RPS17+_SC_2']]

In [None]:
# Plot  coarse grained transition matrix, that classifies cells into macrostates
g.plot_coarse_T()