In [None]:
import scanpy as sc
import numpy as np
import scvi
import matplotlib.pyplot as plt

In [None]:
adata = sc.read_h5ad('20251008_Cevrim_XMens_Only_Fib_Dec_Cells.h5ad')

In [None]:
adata.X = adata.layers['raw'].copy()

In [None]:
# filter out mitochondrial and ribosomal genes for HVG selection.
print("Filtering out mitochondrial and ribosomal genes for HVG selection...")
adata_hvg = adata[:, (~adata.var['mito']) & (~adata.var['ribo'])].copy()
adata_hvg = adata.copy()
print(f"  Retained {adata_hvg.shape[0]} cells and {adata_hvg.shape[1]} genes for HVG selection.")

# Perform highly-variable gene selection (not using sample batch information at the subobject level).
print("Performing highly-variable gene selection (flavor='seurat_v3') on the aggregate data...")
sc.pp.highly_variable_genes(
    adata_hvg,
    flavor='seurat_v3',
    layer='raw',
    batch_key='sample',    # Not used at the subobject level.
    n_top_genes=3000,
    subset=True
)



In [None]:
# Extract the latent representation and store it in adata.obsm.
SCVI_LATENT_KEY = "X_scVI_fibroblasts"
print("Extracting latent representation from the trained model...")
adata.obsm[SCVI_LATENT_KEY] = model.get_latent_representation()

In [None]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

In [None]:
adata.X = adata.layers['log1p'].copy()

In [None]:
sc.pp.neighbors(adata, use_rep = 'X_scVI_fibroblasts')

In [None]:
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color = ['cell_type_coarse','timepoint'])

# pseudotime!

In [None]:
# calculate diffusion map
sc.tl.diffmap(adata, n_comps=10)

choose a root

In [None]:
## one way to choose a root: randomly select it from a subset of interest

# isolate cells of interest
category = 'timepoint'
grouping = 'uninduced'

uninduced_mask    = adata.obs[category] == grouping
uninduced_indices = np.flatnonzero(uninduced_mask)

# randomly choose an root cell from cells of interest
np.random.seed(42)                 
root_idx        = np.random.choice(uninduced_indices)
adata.uns['iroot'] = root_idx

In [None]:
adata.obs['is_root'] = False                  # default
adata.obs.iloc[root_idx, adata.obs.columns.get_loc('is_root')] = True

In [None]:
ax = sc.pl.umap(adata, color=None, frameon=False, show=False)
ax.scatter(
    *adata.obsm['X_umap'][root_idx],
    s=60, c='red', edgecolor='black', linewidth=0.6
)

print(f"Using cell #{root_idx!r} ({adata.obs_names[root_idx]}) as root.")

plt.show()

In [None]:
# run pseudotime
sc.tl.dpt(adata, n_dcs=10)

# pseudotime lives in:
adata.obs['dpt_pseudotime'].head()

In [None]:
sc.pl.umap(adata, color='dpt_pseudotime', cmap='viridis')

In [None]:
adata.write_h5ad('./unintegrated_mouse.h5ad')