In [None]:
import scanpy as sc
import numpy as np
import scvi
import matplotlib.pyplot as plt

In [None]:
# load data
adata_full = sc.read_h5ad('./20251008_Cevrim_Human_All_Cells.h5ad')

In [None]:
# subset along fibroblasts + decidual cellls
cell_types_to_keep = ['decidual cells', 'endometrial fibroblasts']
cell_type_mask = adata_full.obs['cell_type_annotations'].isin(cell_types_to_keep)

adata = adata_full[cell_type_mask].copy() # subset along fibroblasts + decidual cells

In [None]:
# filter out mitochondrial and ribosomal genes for HVG selection
adata_hvg = adata[:, (~adata.var['mito']) & (~adata.var['ribo'])].copy()

adata_hvg = adata.copy()
# perform highly-variable gene selection
sc.pp.highly_variable_genes(
    adata_hvg,
    flavor='seurat_v3',
    layer='raw',
    batch_key='sample',
    n_top_genes=3000,
    subset=True
)

# add back highly-variable genes to the larger adata object
adata.uns['hvg'] = adata_hvg.uns['hvg'].copy()
adata.var['highly_variable'] = adata.var.index.isin(list(adata_hvg.var[adata_hvg.var['highly_variable']].index))
adata.var['highly_variable_rank'] = adata.var.index.map(dict([*zip(adata_hvg.var.index, adata_hvg.var['highly_variable_rank'])]))

In [None]:
# create integrated scvi model
scvi.model.SCVI.setup_anndata(adata_hvg, layer="raw", batch_key = 'mck')
model = scvi.model.SCVI(adata_hvg, n_layers=2, n_latent=30, gene_likelihood="nb")

In [None]:
# train it
model.train(early_stopping=True)

In [None]:
# extract the latent representation and store it in adata.obsm
SCVI_LATENT_KEY = "X_scVI"
adata.obsm[SCVI_LATENT_KEY] = model.get_latent_representation()

In [None]:
adata.X = adata.layers['log1p'].copy() # put log1p counts back into .X

In [None]:
sc.pp.neighbors(adata, use_rep = 'X_scVI') # get neighbors graph

In [None]:
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color = ['cell_type_coarse','mck'])

In [None]:
# ensure 'mck' is categorical
adata.obs['mck'] = adata.obs['mck'].astype('category')
cats = list(adata.obs['mck'].cat.categories)

# define custom colors
color_map = {
    "mck_5": "#6EAB95",  # teal
    "mck_6": "#F3B36D",  # peach-gold
}

palette = [color_map.get(c, "#BBBBBB") for c in cats]
adata.uns['mck_colors'] = palette

outdir = "."

# plot and save
with plt.rc_context({"figure.facecolor": "white", "axes.facecolor": "white"}):
    sc.pl.umap(
        adata,
        color="mck",
        size=10,
        frameon=False,
        legend_loc="on data",
        title="UMAP colored by mck",
        show=False,
    )
    fig = plt.gcf()
    fig.savefig(f"{outdir}/integrated_pseudotime_mck.png", dpi=300, bbox_inches="tight")
    plt.close(fig)

In [None]:
# ensure 'cell_type_annotations' is categorical
adata.obs['cell_type_coarse'] = adata.obs['cell_type_coarse'].astype('category')
cats = list(adata.obs['cell_type_coarse'].cat.categories)

# define custom colors
color_map = {
    "decidual cells": "#00FFFF",          # cyan
    "endometrial fibroblasts": "#FF00FF", # magenta
}
palette = [color_map.get(c, "#BBBBBB") for c in cats]
adata.uns['cell_type_coarse_colors'] = palette  # Scanpy expects this naming convention

outdir = "."

# plot and save
with plt.rc_context({"figure.facecolor": "white", "axes.facecolor": "white"}):
    sc.pl.umap(
        adata,
        color="cell_type_coarse",
        size=10,
        frameon=False,
        legend_loc="on data",
        title="UMAP colored by cell_type_coarse",
        show=False,
    )
    fig = plt.gcf()
    fig.savefig(f"{outdir}/integrated_pseudotime_celltype.png", dpi=300, bbox_inches="tight")
    plt.close(fig)


# pseudotime!

In [None]:
# calculate diffusion map
sc.tl.diffmap(adata, n_comps=10)

## choose a root

In [None]:
mouse_root_bc = 'CCGAACGCACTCTCGT'
human_root_bc = 'AACAACCGTCAAGCGA'
root_bc = human_root_bc
root_idx = adata.obs.index.get_loc(root_bc)
adata.uns['iroot'] = root_idx

adata.obs['is_root'] = False
adata.obs.iloc[root_idx, adata.obs.columns.get_loc('is_root')] = True

In [None]:
embedding = 'X_umap'
ax = sc.pl.embedding(adata,basis = 'X_umap', color=None, frameon=False, show=False)
ax.scatter(
    *adata.obsm['X_umap'][root_idx],
    s=60, c='red', edgecolor='black', linewidth=0.6
)

print(f"Using cell #{root_idx!r} ({adata.obs_names[root_idx]}) as root.")

plt.show()

In [None]:
# run pseudotime
sc.tl.dpt(adata, n_dcs=10)

# pseudotime lives in:
adata.obs['dpt_pseudotime'].head()

In [None]:
sc.pl.embedding(adata,basis = 'X_umap', color='dpt_pseudotime', cmap='viridis')

In [None]:
outdir = "."

# plot and save
with plt.rc_context({"figure.facecolor": "white", "axes.facecolor": "white"}):
    sc.pl.umap(
        adata,
        color="dpt_pseudotime",
        cmap="viridis",     # standard viridis
        size=10,
        frameon=False,
        legend_loc=None,    # continuous variable → colorbar, not legend
        title="UMAP colored by pseudotime (DPT)",
        show=False,
    )
    fig = plt.gcf()
    fig.savefig(f"{outdir}/integrated_pseudotime_dpt.png", dpi=300, bbox_inches="tight")
    plt.close(fig)