# Analysis of RPM (CellTagged Pre- and Post-Cre) and RPMA (CellTagged Pre-Cre) basal-organoid-derived alllograft tumours
## Ireland et al 2024 BioRxiv (Accepted at Nature, 2025)
## Updated 05.2025
### Related to Fig. 3e-l (Final Fig. 2e-l), Fig. 4c-j (Final Fig. 3c-j), Extended Data Fig. 5c,e,f, and Extended Data Fig. 6a-f


In [None]:
#Import other relevant packages
import numpy as np
import pandas as pd
from matplotlib import rcParams
import os
import scanpy as sc

import matplotlib as mpl
import matplotlib.pyplot as plt

#For nice color schemes
import cmocean

#For barplots
import seaborn as sns

#Import scVI
import scvi
from scvi.model.utils import mde

scvi.settings.verbosity = 40

#Import scVI
import scvi
from scvi.model.utils import mde

scvi.settings.verbosity = 40

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

In [None]:
os.chdir('/work/asi16')

## First, perform clustering of all RPM and RPMA allograft tumours for UMAP projection as in Fig.3

## 1. Read in RPM and RPMA Allografts celltagged pre Cre

In [None]:
# Read in nw RPMA TBO Allograft sample and RPM TBO Allograft sample 
# CellTagged pre-Cre
RPMA_allo=sc.read_10x_mtx('8972_RPMA_TBO_scRNAseq_Allo_031824/031924_RPMA_TBO_Allo_count_nocellplex/filtered_feature_bc_matrix', var_names='gene_symbols', cache=True)
RPM_allo=sc.read_10x_mtx('MedGenome_FASTQ_123123/RPM_TBO_Allo/042024_RPM_TBO_Allo_CustomCount/filtered_feature_bc_matrix', var_names='gene_symbols', cache=True)


## 2. Read in new RPM Allografts celltagged post-Cre

In [None]:
#042525 Read in new RPM Allografts including "Allo3" from prior analyses in Fig. 2 (Final Fig. 1), and "Allo4" as an additional biological replicate
# CellTagged post-Cre
RPM_allo3=sc.read_10x_mtx('042225_RPM_TBO_CellTagPostCre/042225_RPMTBOAllo_CTpostCre_1/outs/filtered_feature_bc_matrix', var_names='gene_symbols', cache=True)
RPM_allo4=sc.read_10x_mtx('042225_RPM_TBO_CellTagPostCre/042325_RPM_Allo2_CTpostCre/outs/filtered_feature_bc_matrix', var_names='gene_symbols', cache=True)


In [None]:
# Add metadata to RPMA and RPM
RPMA_allo.obs['Genotype'] = 'RPMA'
RPMA_allo.obs['GenoCT'] = 'RPMA_CTpreCre'
RPMA_allo.obs['Model'] = 'Allograft'
RPMA_allo.obs['Cre'] = 'CT_pre-Cre'
RPMA_allo.obs['UnID'] = 'RPMA_Allo'
RPMA_allo.obs['Batch'] = 'RPMA_Allo'

RPM_allo.obs['Genotype'] = 'RPM'
RPM_allo.obs['GenoCT'] = 'RPM_CTpreCre'
RPM_allo.obs['Model'] = 'Allograft'
RPM_allo.obs['Cre'] = 'CT_pre-Cre'
RPM_allo.obs['UnID'] = 'RPM_Allo_New'
RPM_allo.obs['Batch'] = 'RPM_Allo_New'


RPM_allo3.obs['Genotype'] = 'RPM'
RPM_allo3.obs['GenoCT'] = 'RPM_CTpostCre'
RPM_allo3.obs['Model'] = 'Allograft'
RPM_allo3.obs['Cre'] = 'CT_post-Cre'
RPM_allo3.obs['UnID'] = 'RPM_Allo3'
RPM_allo3.obs['Batch'] = 'RPM_CTpostCre'


RPM_allo4.obs['Genotype'] = 'RPM'
RPM_allo4.obs['GenoCT'] = 'RPM_CTpostCre'
RPM_allo4.obs['Model'] = 'Allograft'
RPM_allo4.obs['Cre'] = 'CT_post-Cre'
RPM_allo4.obs['UnID'] = 'RPM_Allo4'
RPM_allo4.obs['Batch'] = 'RPM_CTpostCre'

## 2. Concatenate all RPM and RPMA basal-organoid-derived allograft datasets

In [None]:
# Concatenate datasets, first without the no cre old RPM sample...
adata= RPMA_allo.concatenate([RPM_allo,RPM_allo3, RPM_allo4], 
                              index_unique=None, join="outer")

In [None]:
adata.obs_names_make_unique()

In [None]:
adata.obs.groupby(["UnID"]).apply(len)

## 3. Perform QC

In [None]:
#QC filtering
adata.var['mito'] = adata.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mito'], percent_top=None, log1p=False, inplace=True)

sc.pp.filter_cells(adata, min_genes=200)
#sc.pp.filter_genes(orgs_all, min_cells=3)

adata.var['mito'] = adata.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mito'], percent_top=None, log1p=False, inplace=True)

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mito'],
             jitter=0.4, multi_panel=True)
sc.pl.scatter(adata, x='total_counts', y='pct_counts_mito')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')

In [None]:
#Filter data by slicing anndata object
adata = adata[adata.obs.n_genes_by_counts < 8000, :]
adata = adata[adata.obs.n_genes_by_counts > 2000, :]
adata = adata[adata.obs.total_counts > 2000, :]
adata = adata[adata.obs.pct_counts_mito < 15, :]

In [None]:
adata.obs.groupby(["UnID"]).apply(len)

In [None]:
adata.obs.groupby(["Batch"]).apply(len)

In [None]:
adata.obs.groupby(["GenoCT"]).apply(len)

In [None]:
adata.obs.groupby(["Genotype"]).apply(len)

## 4. ID HVGs via Poisson gene selection

In [None]:
#Prep for HVG and scvi
#log1p the data
adata.obs["log1p_total_counts"] = np.log1p(adata.obs["total_counts"])
#Create layers
adata.layers["counts"] = adata.X.copy()
adata.layers['norm'] = adata.X.copy(); sc.pp.normalize_total(adata, target_sum=1e4, layer="norm")

In [None]:
#HVG via Scanpy
#Note here that if you run with a batch_key with few cells, will get b'reciprocal condition number error
sc.pp.highly_variable_genes(
    adata,n_top_genes=5000,
    subset=False,
    layer="counts",
    flavor="seurat_v3"
)

In [None]:
adata.var['mean_'] = np.array(adata.X.mean(0))[0]
adata.var['frac_zero'] = 1 - np.array((adata.X > 0).sum(0))[0] / adata.shape[0]

fig, ax = plt.subplots(figsize=(9,6))
ax.scatter(adata.var.mean_, adata.var.frac_zero, s=1)
ax.set_xscale("log")

In [None]:
#Calculate Poisson gene selection
df_poisson = scvi.data.poisson_gene_selection(
    adata, n_top_genes=5000, batch_key="Batch", inplace=False
)

df_poisson[df_poisson.highly_variable].sort_values('prob_zero_enrichment_rank')

pd.crosstab(df_poisson.highly_variable, adata.var.highly_variable)

is_hvg = df_poisson.highly_variable

adata.varm['df_poisson']= df_poisson

adata_query = adata[:, is_hvg].copy()
print(adata_query)

## 5. Set up and train scVI model on HVGs

In [None]:
scvi.model.SCVI.setup_anndata(
    adata_query,
    layer="counts",
    batch_key='Batch',
    continuous_covariate_keys=["pct_counts_mito"]
)

model = scvi.model.SCVI(adata_query)

In [None]:
#Train and run scvi

#Training parameters
train_kwargs = dict(
    early_stopping=True,
    early_stopping_patience=20,
    enable_model_summary=True,
    enable_progress_bar=True,
    enable_checkpointing=True,
    max_epochs=500
)

#Train and run model
#Be sure GPU is enabled to run this
model.train(**train_kwargs)

In [None]:
#Fit model to data
#Get latent representation of model to apply to UMAP
latent = model.get_latent_representation()

adata.obsm["X_scVI_1.1"] = latent

## 6. Perform leiden clustering

In [None]:
#Calculate neighbors using scVI model input
sc.pp.neighbors(adata, use_rep="X_scVI_1.1")
sc.tl.umap(adata, min_dist=0.5)

#Run leiden clustering based on neighbors
sc.tl.leiden(adata, key_added="leiden_scVI_1.1", resolution=1)

## 7. Visualize clustering, QC metrics, and marker expression to determine tumour vs non-tumour clusters

In [None]:
#QC UMAPs
sc.pl.umap(
    adata,
    color=["n_genes_by_counts", "total_counts", "pct_counts_mito", "log1p_total_counts"],
    cmap="cubehelix_r",
    s=3,
    ncols=2,
)

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))
sc.pl.umap(adata, color="leiden_scVI_1.1", legend_loc="on data", legend_fontsize='large',ax=ax, s=30, frameon=False, save=False)
fig, ax = plt.subplots(figsize=(8, 6))
sc.pl.umap(adata, color="UnID", legend_loc="right margin", ax=ax, s=30, frameon=False, save=False)
fig, ax = plt.subplots(figsize=(8, 6))
sc.pl.umap(adata, color="Genotype", legend_loc="right margin", ax=ax, s=30, frameon=False, save=False)
fig, ax = plt.subplots(figsize=(8, 6))
sc.pl.umap(adata, color="GenoCT", legend_loc="right margin", ax=ax, s=30, frameon=False, save=False)

#Additional QC bar graphs
adata.obs['cluster'] = adata.obs["leiden_scVI_1.1"].copy()

#Plot Log1p total counts
fig, ax = plt.subplots(figsize=(20,6))
sns.boxenplot(data=adata.obs, x="cluster", y="log1p_total_counts", ax=ax)

#Plot Pct counts mito
fig, ax = plt.subplots(figsize=(20,6))
sns.boxenplot(data=adata.obs, x="cluster", y="pct_counts_mito", ax=ax)


In [None]:
# Dot plot key cell type markers
more_types=["Col14a1", "Acta2","Myh11","Tagln","Mustn1", #fibroblast
              "Lpl","Lipa","Pparg","Plin2","Ear1","Spp1", #lipofibroblast/osteoblastic
              "Ptprc","Mertk","Mrc1","Ly75","Adgre1","Itgax","Cd68","Csf1r","Mafb","Msr1","Arg1","Adgre4", #Macs/Myeloid
              "Cx3cr1","Itgam","Cd14", #Monocytes
              "S100a9","S100a8","Mmp9","Csf3r","Cxcr2","Ly6g", #Neuts
              "Batf3","Xcr1","Clec9a","Ccl17","Ccl22", #DC
              "Cd3d","Cd3e","Cd3g","Cd28","Cd8a","Cd4","Foxp3", # Tcell
              "Gzma","Ncr1","Gzmb", #NK
              "Fcmr","Cd19","Fcer2a","Pax5","Cd22","Cd79b","Cd79a", #B cells
              "Slamf7", "Prdm1", #Plasma
              "Mcam","Pecam1","Icam2","Cd36","Cd93", #Endothelial
    "Pdpn","Cav1","Cav2","Hopx","Timp3","Sema3f","Serpine1", #AT1
              "Abca3","Muc1","Sftpa1","Sftpb","Sftpd","Scd1", #AT2
              "Scgb1a1","Cyp2f2","Scgb3a2", "Scgb3a1","Lypd2",#Club
              "Muc5ac","Muc5b", # Goblet
              "Tubb4a","Foxa3","Foxj1","Rfx2","Rfx3","Trp73", #Ciliated
              'Krt5', 'Krt17','Krt15','Trp63','Id1','Icam1','Epas1','Aqp3','Sfn','Perp','Fxyd3','Sdc1','Gstm2','F3','Abi3bp','Adh7', # Basal
              'Bex2','Ascl1','Meis2','Hes6','Hoxb5','Foxa2','Sox4','Rora','Isl1','Id4', 'Neurod1','Neurod4','Nhlh1','Nhlh2',#NE/neuronal
              'Pou2f3','Trpm5','Ascl2','Ehf',
              'Lrmp','Gng13','Ltc4s','Alox5ap','Avil','Alox5','Atp2a3','Plk2', #tuft
              "Cftr","Ascl3", 'Stap1','Atp6v1c2','Pparg','Rasd1','Slc12a2', #ionocyte
              "Gja1","Nkx2-1","Epcam", # Lung lineage
              'Yap1','Wwtr1','Sox2','Cd44','Hes1', # Stem-like
             "Venus","fLuc", "Top2a","Mki67",
            'GFP.CDS','CellTag.UTR'] # Tumor markers

sc.set_figure_params(scanpy=True, fontsize=20) 
sc.pl.dotplot(
    adata,figsize=[28,10],
    var_names=more_types,
    groupby='leiden_scVI_1.1',
    use_raw=False,
    layer="norm",show=False,
    color_map="cmo.dense", var_group_rotation=35,standard_scale='var',
    save=False)

In [None]:
# Find cluster markers for each leiden cluster to aid filtering
sc.tl.rank_genes_groups(adata, 'leiden_scVI_1.1', method='wilcoxon', layer='norm', use_raw=False)
pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(50)

In [None]:
adata.write_h5ad("042525_RPM_RPMA_TBOAllo_CellTagAnalysis_New_5kHVG.h5ad")

In [None]:
# First, remove obvious non-tumour clusters or low QC clusters
#27-fibroblast
#26-endothleial
#22-immune
#24-fibroblast
#14,18 low QC

bad_clust=['27','26','22','24','14','18']

#Filter out bad clusters
to_keep=(~adata.obs['leiden_scVI_1.1'].isin(bad_clust))

#Copy over to new anndata object
adata2 = adata[to_keep].copy()

### From here, continue iterating through runs of scvi modeling until no clear low quality cell clusters or non-tumor cells are observed.
### Start back up at "ID HVG and set up and train scvi model" and run through subsetting out "bad clusters".
### Each time clusters are removed, model is run again to recluster.

## ITERATION 2
### Final iteration for UMAP presented for the first time in Fig. 3e (Final Fig. 2e)

In [None]:
#HVG via Scanpy
#Note here that if you run with a batch_key with few cells, will get b'reciprocal condition number error
sc.pp.highly_variable_genes(
    adata2,n_top_genes=5000,
    subset=False,
    layer="counts",
    flavor="seurat_v3"
)

In [None]:
adata2.var['mean_'] = np.array(adata2.X.mean(0))[0]
adata2.var['frac_zero'] = 1 - np.array((adata2.X > 0).sum(0))[0] / adata2.shape[0]

fig, ax = plt.subplots(figsize=(9,6))
ax.scatter(adata2.var.mean_, adata2.var.frac_zero, s=1)
ax.set_xscale("log")

In [None]:
#Calculate Poisson gene selection
df_poisson = scvi.data.poisson_gene_selection(
    adata2, n_top_genes=5000, batch_key="Batch", inplace=False
)

df_poisson[df_poisson.highly_variable].sort_values('prob_zero_enrichment_rank')

pd.crosstab(df_poisson.highly_variable, adata2.var.highly_variable)

is_hvg = df_poisson.highly_variable

adata2.varm['df_poisson']= df_poisson

adata_query = adata2[:, is_hvg].copy()
print(adata_query)

In [None]:
scvi.model.SCVI.setup_anndata(
    adata_query,
    layer="counts",
    batch_key='Batch',
    continuous_covariate_keys=["pct_counts_mito"]
)

model = scvi.model.SCVI(adata_query)

In [None]:
#Train and run scvi

#Training parameters
train_kwargs = dict(
    early_stopping=True,
    early_stopping_patience=20,
    enable_model_summary=True,
    enable_progress_bar=True,
    enable_checkpointing=True,
    max_epochs=500
)

#Train and run model
#Be sure GPU is enabled to run this
model.train(**train_kwargs)

In [None]:
#Fit model to data
#Get latent representation of model to apply to UMAP
latent = model.get_latent_representation()

adata2.obsm["X_scVI_1.2"] = latent

### Perform final leiden clustering

In [None]:
#Calculate neighbors using scVI model input
sc.pp.neighbors(adata2, use_rep="X_scVI_1.2")
sc.tl.umap(adata2, min_dist=1)

#Run leiden clustering based on neighbors
sc.tl.leiden(adata2, key_added="leiden_scVI_1.2", resolution=0.75)


### Visualize UMAP, clustering, sample distribution, QC metrics, and cluster markers

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))
sc.pl.umap(adata2, color="leiden_scVI_1.2", legend_loc="on data", legend_fontsize='large',ax=ax, s=30, frameon=False, save=False)
fig, ax = plt.subplots(figsize=(8, 6))
sc.pl.umap(adata2, color="UnID", legend_loc="right margin", ax=ax, s=30, frameon=False, save=False)
fig, ax = plt.subplots(figsize=(8, 6))

sc.pl.umap(adata2, color="GenoCT", legend_loc="right margin", ax=ax, s=30, frameon=False, save=False)

#Additional QC bar graphs
adata2.obs['cluster'] = adata2.obs["leiden_scVI_1.2"].copy()

#Plot Log1p total counts
fig, ax = plt.subplots(figsize=(20,6))
sns.boxenplot(data=adata2.obs, x="cluster", y="log1p_total_counts", ax=ax)

#Plot Pct counts mito
fig, ax = plt.subplots(figsize=(20,6))
sns.boxenplot(data=adata2.obs, x="cluster", y="pct_counts_mito", ax=ax)


In [None]:
more_types=["Col14a1", "Acta2","Myh11","Tagln","Mustn1", #fibroblast
              "Lpl","Lipa","Pparg","Plin2","Ear1","Spp1", #lipofibroblast/osteoblastic
              "Ptprc","Mertk","Mrc1","Ly75","Adgre1","Itgax","Cd68","Csf1r","Mafb","Msr1","Arg1","Adgre4", #Macs/Myeloid
              "Cx3cr1","Itgam","Cd14", #Monocytes
              "S100a9","S100a8","Mmp9","Csf3r","Cxcr2","Ly6g", #Neuts
              "Batf3","Xcr1","Clec9a","Ccl17","Ccl22", #DC
              "Cd3d","Cd3e","Cd3g","Cd28","Cd8a","Cd4","Foxp3", # Tcell
              "Gzma","Ncr1","Gzmb", #NK
              "Fcmr","Cd19","Fcer2a","Pax5","Cd22","Cd79b","Cd79a", #B cells
              "Slamf7", "Prdm1", #Plasma
              "Mcam","Pecam1","Icam2","Cd36","Cd93", #Endothelial
    "Pdpn","Cav1","Cav2","Hopx","Timp3","Sema3f","Serpine1", #AT1
              "Abca3","Muc1","Sftpa1","Sftpb","Sftpd","Scd1", #AT2
              "Scgb1a1","Cyp2f2","Scgb3a2", "Scgb3a1","Lypd2",#Club
              "Muc5ac","Muc5b", # Goblet
              "Tubb4a","Foxa3","Foxj1","Rfx2","Rfx3","Trp73", #Ciliated
              'Krt5', 'Krt17','Krt15','Trp63','Id1','Icam1','Epas1','Aqp3','Sfn','Perp','Fxyd3','Sdc1','Gstm2','F3','Abi3bp','Adh7', # Basal
              'Bex2','Ascl1','Meis2','Hes6','Hoxb5','Foxa2','Sox4','Rora','Isl1','Id4', 'Neurod1','Neurod4','Nhlh1','Nhlh2',#NE/neuronal
              'Pou2f3','Trpm5','Ascl2','Ehf',
              'Lrmp','Gng13','Ltc4s','Alox5ap','Avil','Alox5','Atp2a3','Plk2', #tuft
              "Cftr","Ascl3", 'Stap1','Atp6v1c2','Pparg','Rasd1','Slc12a2', #ionocyte
              "Gja1","Nkx2-1","Epcam", # Lung lineage
              'Yap1','Wwtr1','Sox2','Cd44','Hes1', # Stem-like
             "Venus","fLuc", "Top2a","Mki67",
            'GFP.CDS','CellTag.UTR'] # Tumor markers
sc.set_figure_params(scanpy=True, fontsize=20) 
sc.pl.dotplot(
    adata2,figsize=[28,10],
    var_names=more_types,
    groupby='leiden_scVI_1.2',
    use_raw=False,
    layer="norm",show=False,
    color_map="cmo.dense", var_group_rotation=35,standard_scale='var',
    save=False)

In [None]:
#QC UMAPs
sc.pl.umap(
    adata2,
    color=["n_genes_by_counts", "total_counts", "pct_counts_mito", "log1p_total_counts"],
    cmap="cubehelix_r",
    s=3,
    ncols=2,
)

In [None]:
#feature plots
more_types=["Col14a1", "Acta2","Myh11","Tagln","Mustn1", #fibroblast
              "Lpl","Lipa","Pparg","Plin2","Ear1","Fabp1","Spp1", #lipofibroblast/osteoblastic
              "Ptprc","Mertk","Marco","Mrc1","Ly75","Adgre1",
            "Itgax","Cd68","Csf1r","Mafb","Msr1","Arg1","Adgre4","Clec4a1", #Macs/Myeloid
              "Cx3cr1","Itgam","Cd14", #Monocytes
              "S100a9","S100a8","Mmp9","Csf3r","Cxcr2","Ly6g", #Neuts
              "Batf3","Xcr1","Clec9a","Ccl17","Ccl22", #DC
              "Cd3d","Cd3e","Cd3g","Cd28","Cd8a","Cd4","Foxp3", # Tcell
              "Gzma","Ncr1","Gzmb", #NK
              "Fcmr","Cd19","Fcer2a","Pax5","Cd22","Cd79b","Cd79a", #B cells
              "Slamf7", "Prdm1", #Plasma
              "Mcam","Pecam1","Icam2","Cd36","Cd93"] #Endothelial

sc.pl.umap(
    adata2,
    color=more_types,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=4,s=30,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    save=False
)

In [None]:
#feature plots
more_types=["Vim","Zeb1","Zeb2","Cd44","Twist1","Twist2","Snai1","Snai2"] #Endothelial

sc.pl.umap(
    adata2,
    color=more_types,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=4,s=30,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    save=False
)

In [None]:
adata2.write_h5ad("042725_RPM_RPMA_TBOAllo_CellTagAnalysis_New_1.2.h5ad")

In [None]:
adata2=sc.read_h5ad("042725_RPM_RPMA_TBOAllo_CellTagAnalysis_New_1.2.h5ad")

In [None]:
# Find cluster markers for each leiden cluster for Supplementary tables
sc.tl.rank_genes_groups(adata2, 'leiden_scVI_1.2', method='wilcoxon', layer='norm', use_raw=False)
pd.DataFrame(adata2.uns['rank_genes_groups']['names']).head(50)

In [None]:
# Extract the ranked genes (names) for all clusters
marker_genes = pd.DataFrame(adata2.uns['rank_genes_groups']['names'])

# Save to CSV
marker_genes.to_csv('042725_adata2_RPM_RPMA_TBO_allNEW_cluster_marker_genes.csv', index=False)

### Plot cell state marker genes (Ext Data Fig. 5f)

In [None]:
genes = ['leiden_scVI_1.2', 'Chga', 'Calca', 'Scg2', 'Pcsk1', 'Uchl1', 'Ddc',
         'Snca', 'Snap25', 'Bex2', 'Ascl1', 'Meis2', 'Hes6', 'Insm1', 'Hoxb5']

plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()


In [None]:
# Visualize neuronal genes

genes = ['Neurod1', 'Nhlh1','Nhlh2','Neurod2','Neurod4']
plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()

In [None]:
# Visualize tuft markers
genes = ['Pou2f3','Trpm5','Ascl2','Avil',
         'Hmx2','Ehf','Hmx3','Lrmp',
         'Gng13','Ltc4s','Rgs13','Alox5ap']

plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=4,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()

In [None]:
# Visualize Stem/Mesenchymal genes

genes = ['Yap1', 'Cd44','Sox2','Hes1','Twist1','Snai1','Snai2','Vim','Sox9'
         ]
plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()

In [None]:
# Visualize ionocyte genes

genes = ['Cftr','Slc12a2', 'Stap1','Tfcp2l1','Pparg','Atp6v1c2',
        'Cd81','Esyt1', 'Ascl3','Foxi1']

plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()

In [None]:
# Visualize TIP markers
genes = ['Serpinf1','Gsta2']

plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()

In [None]:
# Visualize hillock markers
genes = ['Krt13','Krt6a','Dsg3', 'Serpinb2','Lgals3']

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()

In [None]:
# Visualize A2 state markers
genes = ['Calca','Grp','Foxa2','Epcam','Cdh1','Itga3','Flnb','Elf3','Krt7','Nr0b2']
#['Elf3','Nr0b1','Tcf3','Foxa1','Foxa2','Smad4','Fli1','Gata4','Neurod2','Olig2']

plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()

In [None]:
# Visualize basal genes
genes = ['Krt5', 'Krt17','Krt15','Trp63','Icam1','Epas1','Aqp3','Sfn','Perp','Fxyd3','Sdc1','Gstm2','F3',
         'Adh7','Epcam']

plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()

In [None]:
# Visualize Atoh1 genes
genes = ['Atoh1','Ush2a','Lhx3','Rasd2','Pou4f3']

plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()

### Visualize cell fate markers per cluster (for Fig. 3g (Final Fig. 2g))

In [None]:
# Dot plot for Fig. 3g (Final Fig. 2g)

more_types=["Trp63","Krt5","Krt15","Krt17",#basal
            "Ascl1", "Syp","Chga","Insm1","Chgb","Myt1","Sez6","Foxa2","Mycl", #NE
              "Neurod1","Nhlh1","Nhlh2","Neurod2", #neuronal
            'Atoh1','Ush2a','Lhx3','Rasd2','Pou4f3', # Atoh1/Inner Ear
            'Pou2f3','Trpm5','Ascl2','Lrmp','Gng13','Avil','Alox5','Atp2a3', #tuft
              "Cftr", "Ascl3", 'Stap1','Pparg', #ionocyte
              'Yap1','Wwtr1','Sox2','Cd44','Hes1',"Vim", # Stem-like/#Mesenchymal/Yap
             "Top2a","Mki67", "Ube2c","Aspm", #Proliferation
            'Myc',"fLuc"] #tumor markers

sc.set_figure_params(scanpy=True, fontsize=17) 
sc.pl.dotplot(
    adata2,figsize=[20,5],
    var_names=more_types,
    groupby='leiden_scVI_1.2',
    use_raw=False,
    layer="norm",show=False,
    color_map="cmo.dense", var_group_rotation=35,smallest_dot=10,dendrogram=True,
    save=False) 

### Visualize drug targets for Fig. 6h (Final Fig. 5h)

In [None]:
# Visualize drug targets
genes = ['Ncam1','Dll3','Sez6','Tacstd2','Cd276']

plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.umap(
    adata2,
    color=genes,
    use_raw=False,
    legend_loc="on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    s=50,
    show=False,
    return_fig=True
)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()

In [None]:
adata2.obs.groupby(["UnID"]).apply(len)

In [None]:
adata2.obs.groupby(["Genotype"]).apply(len)

### Add an annotation to Fig. 3g (Final Fig. 2g) DotPlot to show frequency of RPM vs RPMA cells per leiden cluster

In [None]:
# Create a DataFrame for plotting
df = adata2.obs[['Genotype','leiden_scVI_1.2', ]]

# Count frequencies per cluster
frequency_table = pd.crosstab(df['leiden_scVI_1.2'],df['Genotype'],)

# Normalize frequencies within each cluster (optional)
frequency_table_normalized = frequency_table.div(frequency_table.sum(axis=1), axis=0)

# Plot the data
plt.figure(figsize=(12, 12))
frequency_table_normalized.plot(kind='bar', stacked=True, figsize=(12, 8))

# Customize plot
#plt.title('Leiden cluster composition per basal model', fontsize=20)
plt.xlabel('Sample', fontsize=20)
plt.ylabel('Proportion', fontsize=20)
plt.legend(title='Leiden cluster', bbox_to_anchor=(1.05, 1), fontsize=16)
plt.tight_layout()
plt.xticks(rotation=45, ha='right')


# Show the plot
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Define your custom cluster and genotype orders
custom_cluster_order = ['8', '7', '18', '9', '4', '20', '11', '14', '1', '19',
                        '0', '6', '17', '15', '3', '2', '13', '12', '10', '5', '16']
custom_genotype_order = ['RPM', 'RPMA']  # Adjust as needed

# Define consistent colors
genotype_colors = {
    'RPM': '#5e3c99',   # purple
    'RPMA': '#ff7f0e'    # orange
}

# Reorder the frequency table
df = adata2.obs[['Genotype', 'leiden_scVI_1.2']]
frequency_table = pd.crosstab(df['leiden_scVI_1.2'], df['Genotype'])

# Ensure correct order
frequency_table = frequency_table.loc[custom_cluster_order, custom_genotype_order]

# Set up the plot
fig, axes = plt.subplots(1, 21, figsize=(84, 4))  # 4x21 inches
axes = axes if isinstance(axes, (list, np.ndarray)) else [axes]

for i, cluster in enumerate(frequency_table.index):
    ax = axes[i]
    data = frequency_table.loc[cluster]
    colors = [genotype_colors[gt] for gt in data.index]
    ax.set_title(f'Cluster {cluster}', fontsize=10)
    ax.pie(data, startangle=90, colors=colors)
    ax.axis('equal')  # Keep pies circular

plt.tight_layout()
plt.show()



In [None]:
adata2=sc.read_h5ad("042725_RPM_RPMA_TBOAllo_CellTagAnalysis_New_1.2.h5ad")

## Perform FA projection and pseudotime for Figure 4 and Extended Data Fig 6

In [None]:
sc.tl.draw_graph(adata2, layout='fa', random_state=0, maxiter=1000, color=["leiden_scVI_1.2"])

### Project data in FA space (Fig. 4c (Final Fig. 3c))

In [None]:
palette=[
  "#E41A1C", # strong red
  "#377EB8", # medium blue
  "#4DAF4A", # green
  "#984EA3", # purple
  "#FF7F00", # orange
  "#FFFF33", # yellow
  "#A65628", # brown
  "#e7298a", # pink
  "#666666", # grey
  "#66C2A5", # teal
  "#FC8D62", # salmon
  "#8DA0CB", # soft blue
  "#E78AC3", # soft pink (different from 8)
  "#A6D854", # light green (but yellowish tint, not green)
  "#FFD92F", # lemon yellow
  "#E5C494", # light brown
  "#B3B3B3", # light grey
  "#1B9E77", # deep teal
  "#D95F02", # dark orange
  "#7570B3", # strong purple
  "#66A61E"  # olive green (NOT same green as before)
]
# Set global figure size
plt.rcParams['figure.figsize'] = [6, 6]  # Increase width and height as needed

# Plot
sc.pl.draw_graph(
    adata2,legend_fontweight='medium',legend_fontsize='x-large',
    color='leiden_scVI_1.2',
    legend_loc='on data',title='Leiden cluster',
    palette=palette,frameon=False,
    size=15  # optional: increase point size for better visibility
)


### Calculate diffustion pseudotime (dpt) with root defined as the basal-like cluster, 17
### For Figure 4g

In [None]:
adata2.uns['iroot'] = np.flatnonzero(adata2.obs['leiden_scVI_1.2']  == '17')[0]

sc.tl.dpt(adata2)
cmap = plt.cm.get_cmap("Spectral")
sc.pl.draw_graph(adata2, color=['leiden_scVI_1.2', 'dpt_pseudotime','Genotype'], legend_loc='right margin', color_map=cmap)

In [None]:
adata2.write_h5ad("042925_RPM_RPMA_TBOAllo_CellTagAnalysis_New_1.2_wFAprojection_DPT.h5ad")

In [None]:
adata2_1=sc.read_h5ad("042925_RPM_RPMA_TBOAllo_CellTagAnalysis_New_1.2_wFAprojection_DPT.h5ad")

In [None]:
print(adata2_1.X.shape)        # Should match Dim[2]
print(len(adata2_1.var_names)) # Should match columns in X


In [None]:
from matplotlib.colors import LinearSegmentedColormap
palette=[
  "#E41A1C", # strong red
  "#377EB8", # medium blue
  "#4DAF4A", # green
  "#984EA3", # purple
  "#FF7F00", # orange
  "#FFFF33", # yellow
  "#A65628", # brown
  "#e7298a", # pink
  "#666666", # grey
  "#66C2A5", # teal
  "#FC8D62", # salmon
  "#8DA0CB", # soft blue
  "#E78AC3", # soft pink (different from 8)
  "#A6D854", # light green (but yellowish tint, not green)
  "#FFD92F", # lemon yellow
  "#E5C494", # light brown
  "#B3B3B3", # light grey
  "#1B9E77", # deep teal
  "#D95F02", # dark orange
  "#7570B3", # strong purple
  "#66A61E"  # olive green (NOT same green as before)
]

cmap = plt.cm.get_cmap("Spectral")
sc.pl.draw_graph(adata2, color=['leiden_scVI_1.2', 'dpt_pseudotime','Genotype'], legend_loc='right margin', color_map=cmap, palette=palette)

In [None]:
sc.pl.draw_graph(adata2, color=['dpt_pseudotime',], 
                 legend_loc='right margin', color_map=cmap,title='Diffusion pseudotime',
                 frameon=False,size=15  # optional: increase point size for better visibility
                )

### Perform CellRank analyses by leiden cluster for Fig. 4i,j (Final Fig. 3i,j) and Extended Data Fig. 6e,f

In [None]:
# Add fates to anndata object based on leiden cluster
# Example mapping: Leiden clusters to custom labels
cluster_map = {
    '0': 'NE_Neuronal',
    '1': 'NE',
    '2': 'SL-stem_prolif',
    '3': 'SL-stem_prolif',
    '4': 'Neuronal',
    '5': 'SL-Mesenchymal',
    '6': 'ATOH1',
    '7': 'NE',
    '8': 'NE_Neuronal',
    '9': 'Neuronal',
    '10': 'SL-Epith_basal_hillock',
    '11': 'NE',
    '12': 'SL-stem_prolif',
    '13': 'SL-stem_prolif',
    '14': 'NE',
    '15': 'Neuronal',
    '16': 'Tuft',
    '17': 'Basal',
    '18': 'NE',
    '19': 'NE',
    '20': 'NE'
}

# Create new column in .obs
adata2.obs['cell_type'] = adata2.obs['leiden_scVI_1.2'].map(cluster_map)


### Visualize FA map by fate, as assigned above, as depicted in Fig. 4c (Final Fig. 3c)

In [None]:
sc.pl.draw_graph(adata2, color=['cell_type'], legend_loc='right margin', color_map=cmap, 
                 palette=pheno_col, frameon=False, size=15)    

In [None]:
pheno_col = [
    "#66A61E",  # already hex
    "#40E0D0",  # turquoise
    "red",  # brown2
    "#68228B",  # darkorchid4
    "#1E90FF",  # dodgerblue
    "#00868B",  # turquoise4
    "#00868B",  # turquoise4
    "#00868B",  # turquoise4
    "#FFA500",  # orange
]
    
    
sc.pl.draw_graph(adata2, color=['dpt_pseudotime','cell_type'], legend_loc='right margin', color_map=cmap, 
                 palette=pheno_col)



### Visualize key fate genes in FA space as in Extended Data Fig. 6d

In [None]:
plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

# Plot without showing immediately
g = sc.pl.draw_graph(adata2, color=['Ascl1','Neurod1','Pou2f3','Atoh1','Yap1','Trp63'], legend_loc='right margin', color_map="cmo.dense", 
                  ncols=6, frameon=False, size=35, vmax='p99.5',  layer="norm",show=False,return_fig=True)

# Make gene names (titles) larger and italic
for ax in g.axes:
    ax.set_title(ax.get_title(), fontsize=45, fontstyle='italic')

plt.tight_layout()
plt.show()





In [None]:
adata2.write_h5ad("050125_RPM_RPMA_TBOAllo_CellTagAnalysis_New_1.2_fate_FAprojection_DPT_final.h5ad")

In [None]:
adata2=sc.read_h5ad("050125_RPM_RPMA_TBOAllo_CellTagAnalysis_New_1.2_fate_FAprojection_DPT_final.h5ad")

In [None]:
# Save raw counts to adata2.raw
adata2.raw = adata2.copy()

In [None]:
adata2.raw.X

In [None]:
# Log1p the count data
sc.pp.normalize_total(adata2, target_sum=1e4)
sc.pp.log1p(adata2)

### Perform CellRank analyses following published tutorial

In [None]:
from cellrank.kernels import PseudotimeKernel

pk = PseudotimeKernel(adata2, time_key="dpt_pseudotime")

In [None]:
pk.compute_transition_matrix()

In [None]:
pk.plot_random_walks(
    seed=0,
    n_sims=100,
    start_ixs={"leiden_scVI_1.2": "17"},
    basis="X_draw_graph_fa",
    legend_loc="right",
    dpi=150
)

In [None]:
from cellrank.estimators import GPCCA
g = GPCCA(pk)
print(g)

In [None]:
g.compute_schur()
g.plot_spectrum(real_only=True)

In [None]:
g.fit(n_states=9, cluster_key="leiden_scVI_1.2")
g.plot_macrostates(which="all",basis='X_draw_graph_fa')

In [None]:
g.predict_terminal_states(allow_overlap=True)
g.plot_macrostates(which="terminal",basis='X_draw_graph_fa')

In [None]:
g.plot_macrostates(which="terminal",basis='X_draw_graph_fa',
                   palette=["#B3B3B3","#1B9E77","#FC8D62","#8DA0CB","#FFFF33", "#A65628","#E41A1C","#66C2A5","#A6D854"])

In [None]:
# How likely it is for each cell to reach its terminal state
g.compute_fate_probabilities()
g.plot_fate_probabilities(legend_loc="right",basis="X_draw_graph_fa")


In [None]:
g.plot_fate_probabilities(legend_loc="right",basis="X_draw_graph_fa", 
                          palette=["#B3B3B3","#1B9E77","#FC8D62","#8DA0CB","#FFFF33", "#A65628","#E41A1C","#66C2A5","#A6D854"])


### Visualize CellRank results in circle plots (Fig. 4i (Final Fig. 3i))

In [None]:
import cellrank as cr

palette=[
  "#E41A1C", # strong red
  "#377EB8", # medium blue
  "#4DAF4A", # green
  "#984EA3", # purple
  "#FF7F00", # orange
  "#FFFF33", # yellow
  "#A65628", # brown
  "#e7298a", # pink
  "#666666", # grey
  "#66C2A5", # teal
  "#FC8D62", # salmon
  "#8DA0CB", # soft blue
  "#E78AC3", # soft pink (different from 8)
  "#A6D854", # light green (but yellowish tint, not green)
  "#FFD92F", # lemon yellow
  "#E5C494", # light brown
  "#B3B3B3", # light grey
  "#1B9E77", # deep teal
  "#D95F02", # dark orange
  "#7570B3", # strong purple
  "#66A61E"]  # olive green (NOT same green as before)


cr.pl.circular_projection(adata2, keys="leiden_scVI_1.2", legend_loc="right", palette=palette)

In [None]:
pheno = ['#68228B','red','#00868B', '#00868B','#1E90FF','#00868B','#66A61E','red',
    '#68228B','#1E90FF','#00868B','red','#00868B','#00868B','red','#1E90FF', '#FFA500',
    '#40E0D0', 'red', 'red','red']

cr.pl.circular_projection(adata2, keys="leiden_scVI_1.2", legend_loc="right", palette=pheno)

### Compute all predicted CellRank drivers for export to Supplementary table, by Leiden cluster

In [None]:
all_drivers = g.compute_lineage_drivers()
all_drivers.head(50)

In [None]:
all_drivers.to_csv("050825_AllDrivers_RPM_RPMA_Allo_CellRank_ByLeiden_log1pnorm.csv")

### Use GAM model to compute specific lineage drivers and generate heatmaps in Fig. 4j (Final Fig. 3j) and Extended Data Fig. 6f

In [None]:
# Use safe model settings
model = cr.models.GAM(adata2, max_iter=6000, spline_order=3,n_knots=10)


In [None]:
# Compute lineage drivers, allowing partially failed genes
tuft_drivers = g.compute_lineage_drivers(
    lineages="16",
    model=model)


In [None]:
genes = tuft_drivers.head(50).index
genes

In [None]:
cr.pl.heatmap(
    adata2,
    model=model,lineages=["16"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 15))

In [None]:
# Compute lineage drivers
ne_drivers = g.compute_lineage_drivers(
    lineages="11",
    model=model)

genes = ne_drivers.head(50).index
genes

In [None]:
cr.pl.heatmap(
    adata2,
    model=model,lineages=["11"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 15))

In [None]:
# Compute lineage drivers
neuronal_drivers = g.compute_lineage_drivers(
    lineages="9",
    model=model)

genes = neuronal_drivers.head(50).index
genes

In [None]:
cr.pl.heatmap(
    adata2,
    model=model,lineages=["9"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 15))

In [None]:
# Compute lineage drivers
ne_n_drivers = g.compute_lineage_drivers(
    lineages="0",
    model=model)

genes = ne_n_drivers.head(50).index
genes

In [None]:
cr.pl.heatmap(
    adata2,
    model=model,lineages=["0"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 15))

In [None]:
# Compute lineage drivers
atoh_drivers = g.compute_lineage_drivers(
    lineages="6",
    model=model)

genes = atoh_drivers.head(50).index
genes

In [None]:
cr.pl.heatmap(
    adata2,
    model=model,lineages=["6"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 15))

In [None]:
# Compute lineage drivers
basal_drivers = g.compute_lineage_drivers(
    lineages="17",
    model=model)

genes = basal_drivers.head(50).index
genes

In [None]:
cr.pl.heatmap(
    adata2,
    model=model,lineages=["17"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 15))

In [None]:
# Compute lineage drivers
epith_hillock_drivers = g.compute_lineage_drivers(
    lineages="10",
    model=model)

genes = epith_hillock_drivers.head(50).index
genes

In [None]:
cr.pl.heatmap(
    adata2,
    model=model,lineages=["10"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 15))

In [None]:
# Compute lineage drivers
stem_drivers = g.compute_lineage_drivers(
    lineages="13",
    model=model)

genes = stem_drivers.head(50).index
genes

In [None]:
cr.pl.heatmap(
    adata2,
    model=model,lineages=["13"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 15))

In [None]:
# Compute lineage drivers
mes_drivers = g.compute_lineage_drivers(
    lineages="5",
    model=model)

genes = mes_drivers.head(50).index
genes

In [None]:
cr.pl.heatmap(
    adata2,
    model=model,lineages=["5"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 15))

## Try top 100, clustered

In [None]:
genes = tuft_drivers.head(100).index

cr.pl.heatmap(
    adata2,
    model=model,lineages=["16"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 20))

In [None]:
genes = ne_drivers.head(100).index

cr.pl.heatmap(
    adata2,
    model=model,lineages=["11"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 20))

In [None]:
genes = neuronal_drivers.head(100).index

cr.pl.heatmap(
    adata2,
    model=model,lineages=["9"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 20))

In [None]:
genes = ne_n_drivers.head(100).index

cr.pl.heatmap(
    adata2,
    model=model,lineages=["0"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 20))

In [None]:
genes = atoh_drivers.head(100).index

cr.pl.heatmap(
    adata2,
    model=model,lineages=["6"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 20))

In [None]:
genes = basal_drivers.head(100).index

cr.pl.heatmap(
    adata2,
    model=model,lineages=["17"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 20))

In [None]:
genes = stem_drivers.head(100).index

cr.pl.heatmap(
    adata2,
    model=model,lineages=["13"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 20))

In [None]:
genes = mes_drivers.head(100).index

cr.pl.heatmap(
    adata2,
    model=model,lineages=["5"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 20))

In [None]:
genes = epith_hillock_drivers.head(100).index

cr.pl.heatmap(
    adata2,
    model=model,lineages=["10"],return_models=False,
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=False,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", cluster_genes=False, figsize=(10, 20))

## End of analysis in Scanpy, move to Seurat in R for calculation of gene signatures and additional plot generation/visualization. Additional CellTag analyses were also performed in R. To do this, convert resulting h5ad anndata object from this script as a Seurat object in R.