In [None]:
import scanpy as sc
import numpy as np
import scvi
import matplotlib.pyplot as plt

In [None]:
adata = sc.read_h5ad('20251008_Cevrim_XMens_Only_Fib_Dec_Cells.h5ad')

In [None]:
adata_full = adata.copy()

In [None]:
adata.X = adata.layers['raw'].copy()

In [None]:
timepoints_to_keep = ['1_dpi_late','4_dpi_bleeding']
#cell_types_to_remove = ['decidual cells (apoptotic)']
cell_types_to_remove = []

In [None]:
timepoint_mask = adata_full.obs['timepoint'].isin(timepoints_to_keep)
cell_type_mask = ~adata_full.obs['cell_type_fine'].isin(cell_types_to_remove)

mask = timepoint_mask & cell_type_mask

In [None]:
adata = adata_full[mask].copy()

In [None]:
adata

In [None]:
# filter out mitochondrial and ribosomal genes for HVG selection
adata_hvg = adata.copy()

# perform highly-variable gene selection
print("Performing highly-variable gene selection (flavor='seurat_v3') on the aggregate data...")
sc.pp.highly_variable_genes(
    adata_hvg,
    flavor='seurat_v3',
    layer='raw',
    batch_key='sample',    # Not used at the subobject level.
    n_top_genes=3000,
    subset=True
)

# add back highly-variable genes to the larger adata object
adata.uns['hvg'] = adata_hvg.uns['hvg'].copy()
adata.var['highly_variable'] = adata.var.index.isin(list(adata_hvg.var[adata_hvg.var['highly_variable']].index))
adata.var['highly_variable_rank'] = adata.var.index.map(dict([*zip(adata_hvg.var.index, adata_hvg.var['highly_variable_rank'])]))

In [None]:
sc.pl.umap(adata, color = 'cell_type_fine')

In [None]:
scvi.model.SCVI.setup_anndata(adata_hvg, layer="raw", batch_key = 'sample')

In [None]:
model = scvi.model.SCVI(adata_hvg, n_layers=2, n_latent=30, gene_likelihood="nb")

In [None]:
model.train(early_stopping=True)

In [None]:
# extract the latent representation and store it in adata.obsm.
SCVI_LATENT_KEY = "X_scVI"
print("Extracting latent representation from the trained model...")
adata.obsm[SCVI_LATENT_KEY] = model.get_latent_representation()

In [None]:
adata.X = adata.layers['log1p'].copy()

In [None]:
adata

In [None]:
sc.pp.neighbors(adata, use_rep = 'X_scVI')

In [None]:
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color = ['cell_type_coarse','timepoint'])

In [None]:
sc.pl.umap(adata, color = ['cell_type_fine'])

# pseudotime!

In [None]:
adata.obs['cell_type_fine'].unique()

In [None]:
# calculate diffusion map
sc.tl.diffmap(adata, n_comps=10)

In [None]:
sc.pl.embedding(adata, basis = 'X_umap', color = 'leiden_scVI_3.0')

In [None]:
adata.obs['leiden_scVI_3.0']

choose a root

In [None]:
## one way to choose a root: randomly select it from a subset of interest

# isolate cells of interest
category = 'cell_type_fine'
grouping = 'endometrial fibroblasts (outer)'

uninduced_mask    = adata.obs[category] == grouping
uninduced_indices = np.flatnonzero(uninduced_mask)

# randomly choose an root cell from cells of interest
np.random.seed(42)                 
root_idx        = np.random.choice(uninduced_indices)
adata.uns['iroot'] = root_idx

In [None]:
adata.obs['is_root'] = False                  # default
adata.obs.iloc[root_idx, adata.obs.columns.get_loc('is_root')] = True

In [None]:
ax = sc.pl.embedding(adata,basis = 'X_umap', color=None, frameon=False, show=False)
ax.scatter(
    *adata.obsm['X_umap'][root_idx],
    s=60, c='red', edgecolor='black', linewidth=0.6
)

print(f"Using cell #{root_idx!r} ({adata.obs_names[root_idx]}) as root.")

plt.show()

In [None]:
# run pseudotime
sc.tl.dpt(adata, n_dcs=10)

# pseudotime lives in:
adata.obs['dpt_pseudotime'].head()

In [None]:
sc.pl.embedding(adata,basis = 'X_umap', color='dpt_pseudotime', cmap='viridis')

In [None]:
adata.write_h5ad('mouse_integrated.h5ad')