# Analysis of RPM and RPMA basal organoid-derived allografts

## Ireland et al 2024 BioRxiv

### Figs 3 and 4; Extended Data Figs 5 and 6

In [None]:
#Import other relevant packages
import numpy as np
import pandas as pd
from matplotlib import rcParams
import os
import scanpy as sc

import matplotlib as mpl
import matplotlib.pyplot as plt

#For nice color schemes
import cmocean

#For barplots
import seaborn as sns

#Import scVI
import scvi
from scvi.model.utils import mde

scvi.settings.verbosity = 40

In [None]:
#Import scVI
import scvi
from scvi.model.utils import mde

scvi.settings.verbosity = 40

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

In [None]:
# Read 
os.chdir('/work/asi16')

## 1. Read in RPM and RPMA basal-organoid-derived allograft tumor data

In [None]:
# 040824 Read in new RPMA TBO Allograft sample and re-aligned RPM TBO Allo samples
RPMA_allo=sc.read_10x_mtx('8972_RPMA_TBO_scRNAseq_Allo_031824/031924_RPMA_TBO_Allo_count_nocellplex/outs/filtered_feature_bc_matrix', var_names='gene_symbols', cache=True)
RPM_allo=sc.read_10x_mtx('MedGenome_FASTQ_123123/RPM_TBO_Allo/042024_RPM_TBO_Allo_CustomCount/outs/filtered_feature_bc_matrix', var_names='gene_symbols', cache=True)
RPM_allo2=sc.read_10x_mtx('02_2024_TBO_Analyses/Old_Xeno/042024_custom_RPMTBO_FXeno_Old/outs/filtered_feature_bc_matrix', var_names='gene_symbols', cache=True)


In [None]:
# 032024 Add metadata to RPMA and RPM
RPMA_allo.obs['Genotype'] = 'RPMA'
RPMA_allo.obs['Model'] = 'Allograft'
RPMA_allo.obs['Cre'] = 'Cre'
RPMA_allo.obs['UnID'] = 'RPMA_Allo'
RPMA_allo.obs['Batch'] = 'RPMA_Allo'

RPM_allo.obs['Genotype'] = 'RPM'
RPM_allo.obs['Model'] = 'Allograft'
RPM_allo.obs['Cre'] = 'Cre'
RPM_allo.obs['UnID'] = 'RPM_Allo_New'
RPM_allo.obs['Batch'] = 'RPM_Allo_New'

RPM_allo2.obs['Genotype'] = 'RPM'
RPM_allo2.obs['Model'] = 'Allograft'
RPM_allo2.obs['Cre'] = 'Cre'
RPM_allo2.obs['UnID'] = 'RPM_Allo_Old'
RPM_allo2.obs['Batch'] = 'RPM_Allo_Old'


## 2. Concatenate tumor data

In [None]:
# 04 2024 Concatenate RPM and RPMA allografts 
#Concatenate datasets
adata= RPM_allo.concatenate([RPM_allo2,RPMA_allo], index_unique=None, join="outer")

In [None]:
adata

In [None]:
adata.obs.groupby(["Batch"]).apply(len)

## 3. Perform QC

In [None]:
# Begin QC and clustering following Scanpy tutorial

In [None]:
#QC filtering
adata.var['mito'] = adata.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mito'], percent_top=None, log1p=False, inplace=True)

In [None]:
#QC filtering RPMA RPM only scanpy flow
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

adata.var['mito'] = adata.var_names.str.startswith('mt-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata, qc_vars=['mito'], percent_top=None, log1p=False, inplace=True)

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mito'],
             jitter=0.4, multi_panel=True)
sc.pl.scatter(adata, x='total_counts', y='pct_counts_mito')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')

In [None]:
#Filter data by slicing anndata object
adata = adata[adata.obs.n_genes_by_counts < 9000, :]
adata = adata[adata.obs.n_genes_by_counts > 500, :]
adata = adata[adata.obs.total_counts > 1000, :]
adata = adata[adata.obs.pct_counts_mito < 15, :]

In [None]:
adata.obs.groupby(["UnID"]).apply(len)

In [None]:
#Prep for HVG and scvi
#log1p the data
adata.obs["log1p_total_counts"] = np.log1p(adata.obs["total_counts"])

In [None]:
#Create layers
adata.layers["counts"] = adata.X.copy()
adata.layers['norm'] = adata.X.copy(); sc.pp.normalize_total(adata, target_sum=1e4, layer="norm")

In [None]:
#HVG via Scanpy
#Note here that if you run with a batch_key with few cells, will get b'reciprocal condition number error
sc.pp.highly_variable_genes(
    adata,n_top_genes=10000,
    subset=False,
    layer="counts",
    flavor="seurat_v3",
    batch_key="Batch"
)

## 4. Set up and train model (scvi)

In [None]:
scvi.model.SCVI.setup_anndata(
    adata,
    layer="counts",
    batch_key='Batch',
    continuous_covariate_keys=["pct_counts_mito"]
)


In [None]:
model = scvi.model.SCVI(adata)

In [None]:
model

In [None]:
model.train()

## 5. Perform leiden clustering

In [None]:
#Fit model to data
#Get latent representation of model to apply to UMAP
latent = model.get_latent_representation()

adata.obsm["X_scVI_1.1"] = latent

#Calculate neighbors using scVI model input
sc.pp.neighbors(adata, use_rep="X_scVI_1.1")
sc.tl.umap(adata, min_dist=0.5)

#Run leiden clustering based on neighbors
sc.tl.leiden(adata, key_added="leiden_scVI_1.1", resolution=1.0)

## 6. Visualize data and marker genes to identify tumor cells only

In [None]:
#QC UMAPs
sc.pl.umap(
    adata,
    color=["n_genes_by_counts", "total_counts", "pct_counts_mito", "log1p_total_counts"],
    cmap="cubehelix_r",
    s=3,
    ncols=2,
)

In [None]:
fig, ax = plt.subplots(figsize=(6, 6))
sc.pl.umap(adata, color="Genotype", cmap="cmo.matter", s=10, ax=ax, vmax="p99.99", frameon=False, save=False)
fig, ax = plt.subplots(figsize=(6, 6))
sc.pl.umap(adata, color="leiden_scVI_1.1", legend_loc="on data", legend_fontsize='xx-large',ax=ax, s=10, frameon=False, save=False)
fig, ax = plt.subplots(figsize=(6, 6))
sc.pl.umap(adata, color="UnID", legend_loc="right margin", ax=ax, s=10, frameon=False, save=False,
           palette={'RPMA_Allo' :'purple','RPM_Allo_New' :'gold','RPM_Allo_Old' :'teal'}, title='Condition')

#Additional QC bar graphs
adata.obs['cluster'] = adata.obs["leiden_scVI_1.1"].copy()

#Plot Log1p total counts
fig, ax = plt.subplots(figsize=(20,6))
sns.boxenplot(data=adata.obs, x="cluster", y="log1p_total_counts", ax=ax)

#Plot Pct counts mito
fig, ax = plt.subplots(figsize=(20,6))
sns.boxenplot(data=adata.obs, x="cluster", y="pct_counts_mito", ax=ax)

In [None]:
# Remove non-tumor and poor qual clusters and re-cluster after saving
adata.write_h5ad("02_2024_TBO_Analyses/adata_RPM_RPMA_TBO_newandold_scVI_1.h5ad")

In [None]:
adata=sc.read_h5ad("02_2024_TBO_Analyses/adata_RPM_RPMA_TBO_newandold_scVI_1.h5ad")
# Clusters 23 here in leiden_scVI_1.1 are fibroblasts; 24=endothelial; 13, 21, 22 immune 

In [None]:
#feature plots
more_types=["Col14a1", "Acta2","Myh11","Tagln","Mustn1", #fibroblast
              "Lpl","Lipa","Pparg","Plin2","Ear1","Fabp1","Spp1", #lipofibroblast/osteoblastic
              "Ptprc","Mertk","Marco","Mrc1","Ly75","Adgre1",
            "Itgax","Cd68","Csf1r","Mafb","Msr1","Arg1","Adgre4","Clec4a1", #Macs/Myeloid
              "Cx3cr1","Itgam","Cd14", #Monocytes
              "S100a9","S100a8","Mmp9","Csf3r","Cxcr2","Ly6g", #Neuts
              "Batf3","Xcr1","Clec9a","Ccl17","Ccl22", #DC
              "Cd3d","Cd3e","Cd3g","Cd28","Cd8a","Cd4","Foxp3", # Tcell
              "Gzma","Ncr1","Gzmb", #NK
              "Fcmr","Cd19","Fcer2a","Pax5","Cd22","Cd79b","Cd79a", #B cells
              "Slamf7", "Prdm1", #Plasma
              "Mcam","Pecam1","Icam2","Cd36","Cd93"] #Endothelial

sc.pl.umap(
    adata,
    color=more_types,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=4,s=30,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    save=False
)

In [None]:
#feature plots
genes = ['leiden_scVI_1.1', 'Trp63', 'Sox2',
        'Mki67', 'Top2a', 'Ascl1',
        'Neurod1', 'Pou2f3', 'Cftr',
        'Ptprc', 'Pecam1', 'Acta2', 'GFP.CDS','CellTag.UTR','Venus','fLuc']

sc.pl.umap(
    adata,
    color=genes,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=4,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    save=False
)

In [None]:
# Find cluster markers for each leiden cluster to aid filtering
sc.tl.rank_genes_groups(adata, 'leiden_scVI_1.1', method='wilcoxon', layer='norm', use_raw=False)
pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(50)

In [None]:
#Identify and subset out low qual, plot doublets, and obvious non-tumor clusters (ptprc+)
# clusters 2, 5, look low qual, maybe 10 but keep it for now...
# cluster 23,24 : fibroblast, cluster 24: endothelial, cluster 13, 21, 22 immune 

bad_clust=['2','5','10','23','24','13','21', '22']

#Filter out bad clusters
to_keep=(~adata.obs['leiden_scVI_1.1'].isin(bad_clust))

#Copy over to new anndata object
adata_2 = adata[to_keep].copy()

### From here, continue iterating through runs of scvi modeling until no clear low quality cell clusters or non-tumor cells are observed.
### Start back up at "set up and train scvi model" and run through subsetting out "bad clusters".
### Each time clusters are removed, model is run again to recluster.

## ITERATION 2 (Final iteration for Fig. 3e,f, RPM and RPMA tumor cells only)

In [None]:
#Re-run clustering

#HVG via Scanpy
#Note here that if you run with a batch_key with few cells, will get b'reciprocal condition number error
sc.pp.highly_variable_genes(
    adata_2,n_top_genes=10000,
    subset=False,
    layer="counts",
    flavor="seurat_v3",
    batch_key="Batch"
)


In [None]:
scvi.model.SCVI.setup_anndata(
    adata_2,
    layer="counts",
    batch_key='Batch',
    continuous_covariate_keys=["pct_counts_mito"]
)


In [None]:
model = scvi.model.SCVI(adata_2)
model.train()

In [None]:
#Fit model to data
#Get latent representation of model to apply to UMAP
latent = model.get_latent_representation()

adata_2.obsm["X_scVI_1.2"] = latent


In [None]:
#Calculate neighbors using scVI model input
sc.pp.neighbors(adata_2, use_rep="X_scVI_1.2")
sc.tl.umap(adata_2, min_dist=0.5)

#Run leiden clustering based on neighbors
sc.tl.leiden(adata_2, key_added="leiden_scVI_1.2", resolution=.5)

In [None]:
#QC UMAPs
sc.pl.umap(
    adata_2,
    color=["n_genes_by_counts", "total_counts", "pct_counts_mito", "log1p_total_counts"],
    cmap="cubehelix_r",
    s=3,
    ncols=2,
)

## Visualize data for Fig. 3 and Ext Data Fig 5 plots

In [None]:
# Visualize data (Fig. 3e,f)

fig, ax = plt.subplots(figsize=(6, 6))
sc.pl.umap(adata_2, color="Genotype", cmap="cmo.matter", s=10, ax=ax, vmax="p99.99", frameon=False, save=False)
fig, ax = plt.subplots(figsize=(6, 6))
sc.pl.umap(adata_2, color="leiden_scVI_1.2", legend_loc="right margin", legend_fontsize='xx-large',ax=ax, s=10, frameon=False, save=False)
fig, ax = plt.subplots(figsize=(6, 6))
sc.pl.umap(adata_2, color="UnID", legend_loc="right margin", ax=ax, s=10, frameon=False, save=False,
           palette={'RPMA_Allo' :'purple','RPM_Allo_New' :'gold','RPM_Allo_Old' :'teal'}, title='Condition')

#Additional QC bar graphs
adata_2.obs['cluster'] = adata_2.obs["leiden_scVI_1.2"].copy()

#Plot Log1p total counts
fig, ax = plt.subplots(figsize=(20,6))
sns.boxenplot(data=adata_2.obs, x="cluster", y="log1p_total_counts", ax=ax)

#Plot Pct counts mito
fig, ax = plt.subplots(figsize=(20,6))
sns.boxenplot(data=adata_2.obs, x="cluster", y="pct_counts_mito", ax=ax)

In [None]:
#feature plots, mesenchymal/stem markers
genes = ['Yap1','Cd44','Sox2','Hes1','Twist1','Snai1','Snai2','Vim','Sox9']
plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)
sc.pl.umap(
    adata_2,
    color=genes,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=5,s=40,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    save=False
)

In [None]:
# Save for R/Seurat import and signature analyses
adata_2.write_h5ad("02_2024_TBO_Analyses/040924_adata_RPM_RPMA_TBO_newandold_scVI_2.h5ad")

In [None]:
adata_2=sc.read_h5ad("040924_adata_RPM_RPMA_TBO_newandold_scVI_2.h5ad")

In [None]:
# Visualize basal genes

genes = ['Krt5', 'Krt17','Krt15','Trp63','Icam1','Epas1','Aqp3','Sfn','Perp','Fxyd3','Sdc1','Gstm2','F3',
         'Adh7','Epcam']
plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

sc.pl.umap(
    adata_2,
    color=genes,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",s=50,
    save=False)

In [None]:
# Visualize key markers
genes = ['Ascl1', 'Neurod1','Pou2f3','Cftr','Top2a','Mki67','Ube2c','Aspm']
plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)

sc.pl.umap(
    adata_2,
    color=genes,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=4,
    frameon=False,
    vmax="p99.5",s=50,
    save=False)

In [None]:
# Visualize NE genes
genes = ['leiden_scVI_1.2', 'Chga', 'Calca','Scg2','Pcsk1','Uchl1','Ddc','Snca','Snap25','Bex2','Ascl1','Meis2','Hes6',
        'Insm1','Hoxb5','Foxa2','Sox4','Rora','Isl1','Id4']
plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)
sc.pl.umap(
    adata_2,
    color=genes,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",s=50,
    save=False)

In [None]:
# Visualize neuronal genes

genes = ['leiden_scVI_1.2', 'Ascl1','Neurod1', 'Nhlh1','Nhlh2','Neurod2','Neurod4','Neurog1','Neurog2','Neurog3']
plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)
sc.pl.umap(
    adata_2,
    color=genes,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",s=50,
    save=False)

In [None]:
# Visualize tuft markers

genes = ['leiden_scVI_1.2', 'Pou2f3','Trpm5','Ascl2','Spib','Hmx2','Ehf','Hmx3',
         'Lrmp','Gnb3','Gng13','Ltc4s','Rgs13','Alox5ap','Avil','Alox5','Atp2a3','Plk2','Sox9','Gnat3','Tas1r3',
        'Mki67','Top2a','Aspm','Ube2c']
plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)
sc.pl.umap(
    adata_2,
    color=genes,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",s=50,
    save=False)

In [None]:
# Visualize Stem/Mesenchymal genes

genes = ['leiden_scVI_1.2', 'Sox2','Cd44','Yap1','Wwtr1','Notch1','Notch2','Hes1'
         ]
plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)
sc.pl.umap(
    adata_2,
    color=genes,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",s=50,
    save=False)

In [None]:
# Visualize ionocyte genes

genes = ['Cftr','Slc12a2', 'Stap1','Atp6v1c2', 'Ldhb','Tfcp2l1','Pparg','Moxd1',
        'Rasd1','P2ry14','Cd81','Esyt1', 'Asgr1','Smbd1','Ascl3','Serpinf1','Gsta2']

plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)
sc.pl.umap(
    adata_2,
    color=genes,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",s=50,
    save=False)

In [None]:
# Visualize mature iono markers Waghray et al Rajagopal paper
genes = ['Atp6v0d1','Atp6v1b1','Atp6v0a4', 'Atp6v1c2','Atp1a1', 'Cftr','Clcnka', 'Clcnkb']

plt.rcParams['figure.figsize'] = [10, 8]
sc.set_figure_params(scanpy=True, fontsize=24)
sc.pl.umap(
    adata_2,
    color=genes,
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    ncols=5,
    frameon=False,
    vmax="p99.5",
    layer="norm",s=50,
    save=False)

In [None]:
# Dot plot for Fig. 3g

more_types=["Trp63","Krt5","Krt15","Krt17",#basal
            "Ascl1", "Syp","Chga","Insm1","Chgb","Myt1","Sez6","Foxa2","Mycl", #NE
              "Neurod1","Nhlh1","Nhlh2","Neurod2", #neuronal
            'Pou2f3','Trpm5','Ascl2','Lrmp','Gng13','Avil','Alox5','Atp2a3', #tuft
              "Cftr", "Ascl3", 'Stap1','Pparg', #ionocyte
              'Yap1','Wwtr1','Sox2','Cd44','Hes1',"Vim", # Stem-like/#Mesenchymal/Yap
             "Top2a","Mki67", "Ube2c","Aspm", #Proliferation
            'Myc',"fLuc"] #tumor markers

sc.set_figure_params(scanpy=True, fontsize=17) 
sc.pl.dotplot(
    adata_2,figsize=[15,3],
    var_names=more_types,
    groupby='leiden_scVI_1.2',
    use_raw=False,
    layer="norm",show=False,
    color_map="cmo.dense", var_group_rotation=35,smallest_dot=10,dendrogram=True,
    save=False) 

# Diffusion pseudotime and FA projection 

## Fig. 4 and Ext Data Fig 6

In [None]:
######### Now do scanpy scvi pseudotime and fa projection #####

In [None]:
adata_2=sc.read_h5ad("040924_adata_RPM_RPMA_TBO_newandold_scVI_2.h5ad")

In [None]:
# Do trajectory/cellrank on everything..

In [None]:
sc.tl.draw_graph(adata_2, layout="fa",color="leiden_scVI_1.2",
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    s=30,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    save=False
)

In [None]:
sc.pl.draw_graph(adata_2, color=["leiden_scVI_1.2",'Genotype'], legend_loc="right margin")

In [None]:
# Remove outliers in FA map
test=pd.DataFrame(adata_2.obsm['X_draw_graph_fa'])
test

In [None]:
test.to_csv("FA_project_all.csv")

In [None]:
bc=pd.DataFrame(adata_2.obs_names)

In [None]:
bc.to_csv("FA_barcodes_all.csv")

In [None]:
bc=['TCTACATCAGTCAACT-1',
'AGGCTGCCAAACCATC-1',
'AGATAGAAGTCAGGGT-1',
'TTGCCTGAGGAGTATT-1',
'TGAGACTAGCTCCACG-1',
'TGCTGAACAAGTGGCA-1',
'GAGAGGTAGTAGAGTT-1',
'GTTAGACGTTTGGCTA-1',
'CATTTCAAGAAGCGGG-1',
'TACCTGCTCCACCTGT-1',
'TTCACGCGTACAAACA-1',
'CATTCATCAGGAGGTT-1',
'GACTTCCTCGTGCAGC-1',
'GCCATTCAGTCACGAG-1']


#Filter out other clusters
to_keep=(~adata_2.obs_names.isin(bc))

#Copy over to new anndata object
sub = adata_2[to_keep].copy()

In [None]:
sub.obs_names_make_unique()

In [None]:
sc.tl.draw_graph(sub, layout="fa",color="leiden_scVI_1.2",
    use_raw=False,
    legend_loc= "on data",
    color_map="cmo.dense",
    s=30,
    frameon=False,
    vmax="p99.5",
    layer="norm",
    save=False
)

In [None]:
sc.pl.draw_graph(sub, color=["leiden_scVI_1.2",'Genotype'], legend_loc="right margin")

In [None]:
sub.uns['iroot'] = np.flatnonzero(sub.obs['leiden_scVI_1.2']  == '10')[0]

sc.tl.dpt(sub)
sc.pl.draw_graph(sub, color=['leiden_scVI_1.2', 'dpt_pseudotime','Genotype'], legend_loc='right margin', color_map="viridis")

In [None]:
# Write data to add to R/Seurat analyses
sub.write_h5ad("092824_RPM_RPMA_Allo_dpt_cellrank2.h5ad")

In [None]:
sub=sc.read_h5ad("092824_RPM_RPMA_Allo_dpt_cellrank2.h5ad")

In [None]:
sc.pl.draw_graph(sub, color=['Ascl1', 'Neurod1','Pou2f3','Yap1'], legend_loc='right margin', color_map="cmo.dense", layer='norm',s=60)

In [None]:
sc.pl.draw_graph(sub, color=['leiden_scVI_1.2', 'Yap1','Genotype'], legend_loc='right margin', color_map="cmo.dense")

In [None]:
adata_2
#Generate signatures 
sc.pp.normalize_total(adata_2)
sc.pp.log1p(adata_2)
sc.tl.rank_genes_groups(adata_2, 'leiden_scVI_1.2', method='t-test')


In [None]:
sc.pl.rank_genes_groups(adata_2, n_genes=20, sharey=False)

In [None]:
# Extract top 500 marker genes for leiden clusters from data 
sc.tl.rank_genes_groups(adata_2,'leiden_scVI_1.2', method='wilcoxon', n_genes=500)

result = adata_2.uns['rank_genes_groups']
groups = result['names'].dtype.names
markergenes=pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'pvals']})

In [None]:
markergenes.to_csv('/hpc/home/asi16/leiden_TBOAllo_scRNAseq_100724.csv' )

# Perform CellRank on dpt

## Fig 4 and Ext Data Fig 6

In [None]:
# Perform cellrank for likelihoods
import cellrank as cr

sc.settings.set_figure_params(frameon=False, dpi=100)
cr.settings.verbosity = 2

In [None]:
import warnings

warnings.simplefilter("ignore", category=UserWarning)

In [None]:
from cellrank.kernels import PseudotimeKernel

In [None]:
pk = PseudotimeKernel(sub, time_key="dpt_pseudotime")
pk

In [None]:
pk.compute_transition_matrix()

In [None]:
# visualize random walks on Markov chain starting with basal=like cells
pk.plot_random_walks(
    seed=0,
    n_sims=150,
    basis="X_draw_graph_fa",
    start_ixs={"leiden_scVI_1.2": "10"},
    legend_loc="right",
    dpi=150,
)

In [None]:
from cellrank.estimators import GPCCA
g = GPCCA(pk)
print(g)

In [None]:
g.compute_schur()
g.plot_spectrum(real_only=True)

In [None]:
g.fit(n_states=11, cluster_key="leiden_scVI_1.2")
g.plot_macrostates(which="all",basis='X_draw_graph_fa')

In [None]:
g.set_initial_states(states=["10"])

In [None]:
g.predict_terminal_states(allow_overlap=True)
g.plot_macrostates(which="terminal",basis='X_draw_graph_fa')

In [None]:
# How likely it is for each cell to reach its terminal state
g.compute_fate_probabilities()

In [None]:
g.plot_fate_probabilities(legend_loc="right",basis="X_draw_graph_fa")
                          #palette=['turquoise','#00868b','#00868b','orange','red','#00868b','blue','blue','purple','#00868b',"#00868b"])

In [None]:
cr.pl.circular_projection(sub, keys="leiden_scVI_1.2", legend_loc="right",palette=['#00868b','#00868b','purple','red','blue','#00868b','#00868b','#00868b','#00868b','blue','turquoise','orange'])

In [None]:
ne_drivers = g.compute_lineage_drivers(lineages="3")
ne_drivers.head(50)
ne_drivers.to_csv("ne_drivers_rpm_rpma.csv")


In [None]:
ne_drivers.head(50)

In [None]:
tuft_drivers = g.compute_lineage_drivers(lineages="11")
tuft_drivers.head(50)


In [None]:
tuft_drivers.to_csv("tuft_drivers_rpm_rpma.csv")

In [None]:
n_drivers = g.compute_lineage_drivers(lineages="9")
n_drivers.head(50)

In [None]:
n_drivers.to_csv("n_drivers_rpm_rpma.csv")

In [None]:
b_drivers = g.compute_lineage_drivers(lineages="10")
b_drivers.head(50)

In [None]:
b_drivers.to_csv("basal_drivers_rpm_rpma.csv")

In [None]:
an_drivers = g.compute_lineage_drivers(lineages="2")
an_drivers.head(50)

In [None]:
an_drivers.to_csv("AN_drivers_rpm_rpma.csv")

In [None]:
drivers8 = g.compute_lineage_drivers(lineages="8")
drivers8.head(50)

In [None]:
drivers8.to_csv("C8_drivers_iono_rpm_rpma.csv")

In [None]:
TN_drivers = g.compute_lineage_drivers(lineages=["6"])
TN_drivers.head(50)


In [None]:
TN_drivers.to_csv("TN_drivers_c6_rpm_rpma.csv")


In [None]:
all_drivers = g.compute_lineage_drivers()
all_drivers.head(50)

In [None]:
all_drivers.to_csv("092824_AllDrivers_RPM_RPMA_Allo_CellRank.csv")

In [None]:
model = cr.models.GAM(sub, max_iter=6000)

In [None]:
# compute putative drivers for the tuft trajectory
tuft_drivers = g.compute_lineage_drivers(lineages="11")
genes=tuft_drivers.head(50).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["11"],
    data_key="norm",
    genes=genes,
    time_key="dpt_pseudotime",
    cbar=False,
    show_all_genes=True
)

In [None]:
# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["11"],
    data_key="norm",
    genes=genes,
    time_key="dpt_pseudotime",
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(12, 16))


In [None]:
# compute putative drivers for the basal trajectory
basal_drivers = g.compute_lineage_drivers(lineages="8")
genes=basal_drivers.head(50).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["8"],
    data_key="norm",
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=True,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15), save="c8_drivers.png")

In [None]:
# compute putative drivers for the basal trajectory
basal_drivers = g.compute_lineage_drivers(lineages="5_1")
genes=basal_drivers.head(50).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["5_1"],
    data_key="norm",
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=True,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15), save="c5smg_drivers.png")

In [None]:
# compute putative drivers for the basal trajectory
basal_drivers = g.compute_lineage_drivers(lineages="5_2")
genes=basal_drivers.head(50).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["5_2"],
    data_key="norm",
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=True,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15), save="c5_2_smg_drivers.png")

In [None]:
# compute putative drivers for the basal trajectory
basal_drivers = g.compute_lineage_drivers(lineages="10")
genes=basal_drivers.head(50).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["10"],
    data_key="norm",
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=True,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15), save="basal_drivers.png")

In [None]:
# compute putative drivers for the tuft trajectory
tuft_drivers = g.compute_lineage_drivers(lineages="11")
genes=tuft_drivers.head(50).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["11"],
    data_key="norm",
    genes=genes, cbar=False,
    time_key="dpt_pseudotime",
    show_fate_probabilities=True,
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15), save="Tuft_Drivers.png")

In [None]:
model = cr.models.GAM(sub, max_iter=20000)

In [None]:
a_drivers = g.compute_lineage_drivers(lineages="3")
genes=a_drivers.head(50).index

# compute putative drivers for the tuft trajectory
# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["3"],
    data_key="norm",
    genes=genes,cbar=False,
    show_fate_probabilities=True,
    time_key="dpt_pseudotime",
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15),save="a_drivers.png")

In [None]:
# compute putative drivers for the tuft trajectory
an_drivers = g.compute_lineage_drivers(lineages="2")
genes=an_drivers.head(200).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["2"],
    data_key="norm",
    show_fate_probabilities=True,
    genes=genes,cbar=False,
    time_key="dpt_pseudotime",
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15),save="an_drivers.png")

In [None]:
# compute putative drivers for the tuft trajectory
n_drivers = g.compute_lineage_drivers(lineages="4")
genes=n_drivers.head(50).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["4"],
    data_key="norm",
    show_fate_probabilities=True,
    genes=genes,cbar=False,
    time_key="dpt_pseudotime",
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15),save="n4_drivers.png")

In [None]:
# compute putative drivers for the tuft trajectory
n_drivers = g.compute_lineage_drivers(lineages="9")
genes=n_drivers.head(200).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["9"],
    data_key="norm",
    show_fate_probabilities=True,
    genes=genes,cbar=False,
    time_key="dpt_pseudotime",
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15),save="n9_drivers.png")

In [None]:
# compute putative drivers for the tuft trajectory
tn_drivers = g.compute_lineage_drivers(lineages="6")
genes=tn_drivers.head(50).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["6"],
    data_key="norm",
    genes=genes,cbar=False,
    show_fate_probabilities=True,
    time_key="dpt_pseudotime",
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15),save="tn6_drivers.png")

In [None]:
# compute putative drivers for the tuft trajectory
tn_drivers = g.compute_lineage_drivers(lineages="0")
genes=tn_drivers.head(50).index

# plot heatmap
cr.pl.heatmap(
    sub,
    model=model,lineages=["0"],
    data_key="norm",
    genes=genes,cbar=False,
    show_fate_probabilities=True,
    time_key="dpt_pseudotime",
    show_all_genes=True, cluster_key="leiden_scVI_1.2", figsize=(10, 15),save="tn0_drivers.png")

In [None]:
genes

## Done