# Downstream analyses healthy

Hélène Reich 05/08/2024

- EEC subclusters and DGE
- ISC,TA and Progenitors subclusters and DGE
- Paneth, Paneth-Goblet and Progenitors subclusters and DGE

In [None]:
# General
import scipy as sci
import numpy as np
import pandas as pd
import logging
import time
import pickle
from itertools import chain
import h5py
import scipy.sparse as sparse
import anndata as ad
import gc
import scipy.stats as stats
import torch

# Plotting
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.colors as mcolors
from matplotlib import rcParams
from matplotlib.pyplot import rc_context
from matplotlib import cm
import seaborn as sb

# Analysis
import scanpy as sc
import scanpy.external as sce
import scvi


In [None]:
# Warnings
import warnings
warnings.filterwarnings('ignore') #(action='once')

## setup matplotlib

In [None]:
# Settings

## Directory
base_dir = '/mnt/hdd/Notebooks/Gut_project/'
sc.settings.figdir = base_dir + 'Figures'
sc.settings.cachedir = base_dir + 'Cache'

## Scanpy settings
sc.settings.verbosity = 3
sc.logging.print_header()
sc.logging.print_versions()

import warnings
warnings.filterwarnings('ignore')

In [None]:
%run utils.ipynb

In [None]:
mymap = load_RdOrYl_cmap_settings(transparent=False)

### load Adata

In [None]:
adata = sc.read_h5ad('joint_diseased_healthy_with_layers_metadata_corrected_anno_updated.h5ad')

In [None]:
## add metadata
metadata_df =read_excel_metadata(f'/mnt/hdd/data/metadata_mouse_gut.xlsx')
# Ensure folder name is the index in metadata for easier access
metadata_df.drop(metadata_df[metadata_df['kit'] == 'Multiome_ATAC_v1'].index, inplace=True)
#metadata_df.drop(metadata_df[metadata_df['condition'].isin(['Ctr','Ctr/WT'])].index, inplace=True)
metadata_df.set_index('folder name', inplace=True)
metadata_df.drop(['Sample Pooling - confounded with Project?','date','Project Name','Link_id','sample name','Cell Count [cells/µl]','Viable Cells [%]','Lib. Concentration [ng/µl]','Lib. Molarity [nM]','Average Lib. Size [bp]','cDNA Cycles','Lib. Cycles','10x Sample Index','Sequencing Depth [reads/cell]','exclusion, reason'], axis=1, inplace=True)

In [None]:
# Function to update adata.obs with metadata using a lambda function
for col in metadata_df.columns:
    try:
        adata.obs[col] = adata.obs['sample'].apply(lambda x: metadata_df.at[x, col])
    except KeyError as err:
        print(f'no such key: {err} in col {col}')

In [None]:
metadata = ['Project','kit','enriched','diet','condition','line','strain', 'phase','pretty name']

In [None]:
sc.pl.umap(adata, color=["atlas"]+ metadata, size =6, legend_fontsize=8.5, ncols = 5, wspace = 0.4,frameon=True, save='Umap_transfer_learned_superposed_metadata.png')

In [None]:
sc.pl.umap(adata, color=["Bmi1","Tert", "Hopx", "Lrig1"],size =4,legend_fontsize=8.5, layer='log_dca_counts', ncols = 4, wspace = 0.2,frameon=True,cmap=mymap, save='Umap_transfer_learned_superposed_plus4markers.png')

In [None]:
genes = ['Ctse', 'Slc26a3', 'Golm1', 'Tff2', 'Muc1', 'Dmbt1']

In [None]:
sc.pl.umap(adata, color=genes,size =4,legend_fontsize=8.5, layer='log_dca_counts', ncols = 4, wspace = 0.2,frameon=True,cmap=mymap, save='Umap_transfer_learned_superposed_Mcellmarkers.png')

In [None]:
genes = ['Gata4','Nr1i3','Onecut2', 'Creb3l3', 'Gna11', 'Arg2']

In [None]:
sc.pl.umap(adata, color=genes,size =4,legend_fontsize=8.5, layer='log_dca_counts', ncols = 10, wspace = 0.2,frameon=True,cmap=mymap, save='Umap_transfer_learned_superposed_proxmarkers.png')

In [None]:
genes = ['Osr2' ,'Bex1', 'Jund', 'Nr1h4']

In [None]:
sc.pl.umap(adata, color=genes,size =4,legend_fontsize=8.5,  layer='log_dca_counts',ncols = 10, wspace = 0.2,frameon=True,cmap=mymap, save='Umap_transfer_learned_superposed_distmarkers.png')

In [None]:
genes = ['Sis' ,'Slc2a2', 'Ada', 'Lama3']

In [None]:
sc.pl.umap(adata, color=genes,size =4,legend_fontsize=8.5, ncols = 10, layer='log_dca_counts', wspace = 0.2,frameon=True,cmap=mymap, save='Umap_transfer_learned_superposed_cryptvillusmarkers.png')

### joint analysis

In [None]:
adata.X = adata.layers['sct_logcounts']

In [None]:
sc.pl.umap(adata, color=['Ghrl', 'condition'],layer= 'log_dca_counts', size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=5, cmap = mymap, legend_fontsize=9)

### Isc to  EEC

In [None]:
EECs = ['ISC','TA','TA (prox.))','Goblet/EEC prog. (early)',
'K-cell (Gip+)',
'EC (mature)',
'EC (immature)',
'EEC (Peptide/immature)' ,
'L/I-cell (Glp1+/Cck+)' ,
'EEC prog. (mid)',
'EC prog. (late)',
'D-cell (Sst+)',
'EEC prog. (late/Peptide)',
'EC 2' ,
'X-cell (Ghrl+)']
Paneth = ['Paneth','Paneth prog.', 'Goblet-Paneth-like', 'Goblet-Paneth-like(cycling)']
Progenitors = ['Goblet/EEC prog. (early)','Paneth prog.', 'Tuft prog.','Tuft prog. 2']

In [None]:
adata_EEC = adata[adata.obs['cell_type_annotation_lv1'].isin(EECs)].copy()

In [None]:
adata_EEC.X = adata_EEC.layers['sct_logcounts']

In [None]:
sc.pl.violin(adata_EEC,groupby='condition',keys='Ghrl',rotation=90)

In [None]:
sc.pl.umap(adata_EEC, color=['Ghrl', 'atlas'],layer= 'log_dca_counts', size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=5, cmap = mymap, legend_fontsize=9)

#### re cluster and stacked violin

In [None]:
sc.pp.neighbors(adata_EEC, use_rep='X_scarches_emb')
sc.tl.leiden(adata_EEC) #, resolution=1.5)

In [None]:
sc.tl.paga(adata_EEC, groups='leiden')

In [None]:
del adata_EEC.uns['leiden_colors']

In [None]:
sc.pl.paga(adata_EEC,  fontsize=5, save = 'paga_joint_EEC_subs.png')#, fontoutline=True, threshold=0.05, max_edge_width=3, min_edge_width=0.01, node_size_scale=3,

In [None]:
sc.tl.umap(adata_EEC, init_pos='paga', min_dist=0.2)

#### mindist 0.2

In [None]:
sc.pl.umap(adata_EEC, color=['leiden', 'doublet_calls','atlas','Ghrl', 'Lgr5','Arg2','Ada','Sis'],layer= 'log_dca_counts', size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=2, cmap = mymap, legend_fontsize=9)

#### default mindist

In [None]:
sc.pl.umap(adata_EEC, color=['leiden', 'mt_frac','doublet_calls'],layer= 'log_dca_counts', size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=5, cmap = mymap, legend_fontsize=9)

#### originsal umap

In [None]:
sc.pl.umap(adata_EEC, color=['leiden'],layer= 'log_dca_counts', size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=5, cmap = mymap, legend_fontsize=9)

In [None]:
# Create a custom colormap
custom_cmap = mcolors.ListedColormap(adata_EEC.uns['leiden_colors'], name='leiden_cmap')

In [None]:
markers = ['Sox4','Neurog3','Neurod1','Ghrl','Sst','Gcg','Gal','Gip','Cck','Sct','Tac1','Tph1','Nts','Reg4']
sc.pl.stacked_violin(adata_EEC, markers, groupby='leiden', layer='sct_logcounts',cmap = mymap, row_palette = adata_EEC.uns['leiden_colors'])#, palette=adata.uns['leiden_colors'])#, dendrogram=True)

In [None]:
sc.pl.stacked_violin(adata_EEC, markers, groupby='cell_type_annotation_lv1', layer='sct_logcounts',cmap = mymap)#, palette=adata.uns['leiden_colors'])#, dendrogram=True)

In [None]:
plot=sc.pl.stacked_violin(
    adata_EEC,
    markers,
    groupby=['leiden', 'atlas'],  # Group by both 'leiden' clusters and 'atlas' categories
    layer='sct_logcounts',
    cmap=mymap,  # Use a red colormap
 return_fig=True)
plot.add_totals().show()

In [None]:
plot = sc.pl.stacked_violin(
    adata_EEC,
    markers,
    groupby=['cell_type_annotation_lv1', 'atlas'],  # Group by both 'leiden' clusters and 'atlas' categories
    layer='sct_logcounts',
    cmap=mymap,  # Use a red colormap
return_fig=True)
plot.add_totals().show()

In [None]:
plot = sc.pl.stacked_violin(
    adata_EEC,
    markers,
    groupby=['cell_type_annotation_lv1', 'enriched'],  # Group by both 'leiden' clusters and 'atlas' categories
    layer='sct_logcounts',
    cmap=mymap,  # Use a red colormap
return_fig=True)
plot.add_totals().show()

In [None]:
cell_counts = adata_EEC.obs.groupby(['cell_type_annotation_lv1', 'enriched']).size().reset_index(name='cell_count')

# Calculate total counts per enriched level
total_counts = cell_counts.groupby('enriched')['cell_count'].transform('sum')

# Calculate percentages
cell_counts['percentage'] = (cell_counts['cell_count'] / total_counts) * 100

# Step 2: Plot using seaborn
plt.figure(figsize=(12, 6))
sb.barplot(
    data=cell_counts,
    x='cell_type_annotation_lv1', y='percentage', hue='enriched', 
)
plt.xticks(rotation=90)
plt.title('Percentage of Cells (per EECs) per Cell Type by Enriched Level')
plt.ylabel('Percentage of Cells (per EECs)')
plt.xlabel('Cell Type')
plt.legend(title='Enriched Level')
plt.show()

In [None]:
cell_counts = adata_EEC.obs.groupby(['cell_type_annotation_lv1', 'Status']).size().reset_index(name='cell_count')

# Calculate total counts per enriched level
total_counts = cell_counts.groupby('Status')['cell_count'].transform('sum')

# Calculate percentages
cell_counts['percentage'] = (cell_counts['cell_count'] / total_counts) * 100

# Step 2: Plot using seaborn
plt.figure(figsize=(12, 6))
sb.barplot(
    data=cell_counts,
    x='cell_type_annotation_lv1', y='percentage', hue='Status', 
)
plt.xticks(rotation=90)
plt.title('Percentage of Cells (per EECs) per Cell Type by Status')
plt.ylabel('Percentage of Cells (per EECs)')
plt.xlabel('Cell Type')
plt.legend(title='Status')
plt.show()

In [None]:
cell_counts = adata_EEC.obs.groupby(['cell_type_annotation_lv1', 'enriched']).size().reset_index(name='cell_count')
cell_counts_total = adata.obs.groupby(['cell_type_annotation_lv1', 'enriched']).size().reset_index(name='cell_count')

# Calculate total counts per enriched level
total_counts = cell_counts_total.groupby('enriched')['cell_count'].transform('sum')

# Calculate percentages
cell_counts['percentage'] = (cell_counts['cell_count'] / total_counts) * 100

# Step 2: Plot using seaborn
plt.figure(figsize=(12, 6))
sb.barplot(
    data=cell_counts,
    x='cell_type_annotation_lv1', y='percentage', hue='enriched', 
)
plt.xticks(rotation=90)
plt.title('Percentage of Cells (total) per Cell Type by Enriched Level')
plt.ylabel('Percentage of Cells (total)')
plt.xlabel('Cell Type')
plt.legend(title='Enriched Level')
plt.show()

In [None]:
cell_counts = adata_EEC.obs.groupby(['cell_type_annotation_lv1', 'Status']).size().reset_index(name='cell_count')
cell_counts_total = adata.obs.groupby(['cell_type_annotation_lv1', 'Status']).size().reset_index(name='cell_count')

# Calculate total counts per enriched level
total_counts = cell_counts_total.groupby('Status')['cell_count'].transform('sum')

# Calculate percentages
cell_counts['percentage'] = (cell_counts['cell_count'] / total_counts) * 100

# Step 2: Plot using seaborn
plt.figure(figsize=(12, 6))
sb.barplot(
    data=cell_counts,
    x='cell_type_annotation_lv1', y='percentage', hue='Status', 
)
plt.xticks(rotation=90)
plt.title('Percentage of Cells (total) per Cell Type by Status')
plt.ylabel('Percentage of Cells (total)')
plt.xlabel('Cell Type')
plt.legend(title='Status')
plt.show()

#### Diffusion Pseudotime

In [None]:
sc.tl.score_genes(adata_EEC, gene_list=['Lgr5','Olfm4','Slc12a2','Clca3b','Cps1'], score_name='ISC_score', use_raw=False)

In [None]:
sc.pl.umap(adata_EEC, color=['ISC_score'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo', title='ISC score', save = 'umap_progenitor_score_with_ISC.png')

In [None]:
sc.tl.diffmap(adata_EEC, n_comps=20)

In [None]:
sc.pl.diffmap(adata_EEC, components=['0,1','1,2','3,4','5,6','7,8','9,10','11,12','13,14','15,16','17,18','19,20'], color='ISC_score', color_map='turbo')

In [None]:
sc.pl.diffmap(adata_EEC, components=['0,1','1,2','3,4','5,6','7,8','9,10','11,12','13,14','15,16','17,18','19,20'], color='cell_type_annotation_lv1', wspace=1.8)

In [None]:
sc.pl.diffmap(adata_EEC, components=['9,1'], color=['phase','ISC_score','cell_type_annotation_lv1', 'Spdef'])

In [None]:
stem_mask = np.isin(adata_EEC.obs['cell_type_annotation_lv1'], 'ISC')
max_stem_id = np.argmin(adata_EEC.obsm['X_diffmap'][stem_mask,8])
root_id = np.arange(len(stem_mask))[stem_mask][max_stem_id]
adata_EEC.uns['iroot'] = root_id

In [None]:
import cellrank as cr
import scvelo as scv

In [None]:
scv.pl.scatter(
    adata_EEC,
    basis='diffmap',
    c=[root_id, 'phase', 'cell_type_annotation_lv1'],
    legend_loc='right',
    components=['8,0'], wspace=0.75
)

scv.pl.scatter(
    adata_EEC,
    basis='umap',
    c=[root_id, 'phase', 'cell_type_annotation_lv1','Lgr5'],
    legend_loc='right',
    components=['1, 0'], wspace=0.75
)


In [None]:
adata_EEC.obs_names[root_id]

In [None]:
sc.tl.dpt(adata_EEC, n_dcs=20)

In [None]:
adata_EEC.obs['dpt_pseudotime_g2m'] = adata_EEC.obs['dpt_pseudotime'].copy()

In [None]:
sc.pl.umap(adata_EEC, color=['dpt_pseudotime_g2m'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata_EEC, use_raw=False, keys=['dpt_pseudotime_g2m'], groupby='cell_type_annotation_lv1', rotation=90)

In [None]:
dptk = cr.kernels.PseudotimeKernel(adata_EEC, time_key='dpt_pseudotime_g2m')
dptk.compute_transition_matrix(n_jobs=-1)
dptk.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none',save = 'transition_dpt_EEC.png')
#dptk.write('adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_dpt_pseudotime.plk', write_adata=False)

In [None]:
sc.tl.embedding_density(adata, basis='umap', groupby='Status')

In [None]:
adata.obs['Status'].value_counts()


In [None]:
sc.pl.embedding_density(adata, basis='umap', key='umap_density_Status', group='diseased_Celsr1_mut')
sc.pl.embedding_density(adata, basis='umap', key='umap_density_Status', group='healthy_Dock7')
sc.pl.embedding_density(adata, basis='umap', key='umap_density_Status', group='diseased_VSG')
sc.pl.embedding_density(adata, basis='umap', key='umap_density_Status', group='diseased')

In [None]:
adata.obs['Status'].value_counts()
sc.pl.embedding_density(adata, basis='umap', key='umap_density_Status', group='healthy', frameon=True)
sc.pl.embedding_density(adata, basis='umap', key='umap_density_Status', group='diseased', frameon=True)

In [None]:
sc.pl.umap(adata,color='Igfbp4',cmap=mymap,layer='log_dca_counts', frameon=True)

In [None]:
sc.pl.umap(adata,color='Slc18a1',cmap=mymap,layer='log_dca_counts', frameon=True)

In [None]:
adata.obs['Status_strain'] = adata.obs['Status'].astype(str) + '_' + adata.obs['strain'].astype(str)

In [None]:
plot_composition(adata, y_key='cell_type_annotation_lv1', x_key='Status', x_rotation=90)

In [None]:
adata.obs['Status_strain'] = adata.obs['Status_strain'].astype('category')

In [None]:
plot_composition(adata, y_key='cell_type_annotation_lv1', x_key='Status_strain', x_rotation=90)

## only EECs

In [None]:
EECs = ['Goblet/EEC prog. (early)',
'K-cell (Gip+)',
'EC (mature)',
'EC (immature)',
'EEC (Peptide/immature)' ,
'L/I-cell (Glp1+/Cck+)' ,
'EEC prog. (mid)',
'EC prog. (late)',
'D-cell (Sst+)',
'EEC prog. (late/Peptide)',
'EC 2' ,
'X-cell (Ghrl+)']
Paneth = ['Paneth','Paneth prog.', 'Goblet-Paneth-like', 'Goblet-Paneth-like(cycling)']
Progenitors = ['Goblet/EEC prog. (early)','Paneth prog.', 'Tuft prog.','Tuft prog. 2']

### joint analysis

In [None]:
adata_EEC = adata[adata.obs['cell_type_annotation_lv1'].isin(EECs)].copy()

In [None]:
plot_composition(adata_EEC, y_key='cell_type_annotation_lv1', x_key='Status_strain', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='cell_type_annotation_lv1', x_key='Status', x_rotation=90)

In [None]:
adata_EEC.X = adata_EEC.layers['sct_logcounts']

In [None]:
adata

In [None]:
adata_EEC

In [None]:
sc.pl.violin(adata_EEC,groupby='Status',keys='Ghrl',rotation=90)

In [None]:
sc.pl.violin(adata_EEC,groupby='Status',keys='Gcg',rotation=90)

In [None]:
sc.pl.violin(adata_EEC,groupby='Status',keys='Sst',rotation=90)

In [None]:
sc.pl.violin(adata_EEC,groupby='condition',keys='Ghrl',rotation=90)

In [None]:
sc.pl.violin(adata_EEC,groupby='condition',keys='Sst',rotation=90)

In [None]:
sc.pl.violin(adata_EEC,groupby='condition',keys='Gcg',rotation=90)

In [None]:
sc.pl.violin(adata_EEC,groupby='condition',keys='Gip',rotation=90)

In [None]:
sc.pl.violin(adata_EEC,groupby='condition',keys='Cck',rotation=90)

In [None]:
sc.pl.umap(adata_EEC, color=['Ghrl', 'Status', 'atlas','line','doublet_calls', 'condition'],layer= 'log_dca_counts', size=5, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=2, cmap = mymap, legend_fontsize=9,save ='umaps_joint_EEC_analysis_Ghrl_metadata.png')

## re cluster and stacked violin

In [None]:
sc.pp.neighbors(adata_EEC, use_rep='X_scarches_emb')
sc.tl.leiden(adata_EEC, resolution=1.5)

In [None]:
sc.tl.paga(adata_EEC, groups='cell_type_annotation_lv1')

In [None]:
del adata_EEC.uns['leiden_colors']

In [None]:
sc.pl.paga(adata_EEC, fontsize=5, save = 'paga_joint_EEC_subs_cell_type.png')#, fontoutline=True, threshold=0.05, max_edge_width=3, min_edge_width=0.01, node_size_scale=3,

In [None]:
sc.pl.paga(adata_EEC,  fontsize=5, save = 'paga_joint_EEC_subs.png')#, fontoutline=True, threshold=0.05, max_edge_width=3, min_edge_width=0.01, node_size_scale=3,

In [None]:
sc.tl.umap(adata_EEC, init_pos='paga', min_dist=0.3)

#### mindist 0.2

In [None]:
sc.pl.umap(adata_EEC, color=['leiden','cell_type_annotation_lv1', 'doublet_calls','Ang4','atlas','Ghrl'],layer= 'log_dca_counts', size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=2, cmap = mymap, legend_fontsize=9, save='umap_joint_EECs.png')

#### mindist = 0.3 and paga based on celltype

In [None]:
sc.pl.umap(adata_EEC, color=['Spdef','Neurog3','Ghrl','cell_type_annotation_lv1', 'doublet_calls','Ang4','atlas','leiden'], layer='log_dca_counts', use_raw=False,size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, cmap=mymap,legend_fontsize=9)

#### default mindist

In [None]:
sc.pl.umap(adata_EEC, color=['leiden', 'mt_frac','doublet_calls'],layer= 'log_dca_counts', size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=5, cmap = mymap, legend_fontsize=9)

### metadata

In [None]:
adata_EEC.obs['doublet_calls'] = adata_EEC.obs['doublet_calls'].astype('category')

In [None]:
adata_EEC.uns['doublet_calls_colors'] = np.array([mpl.colors.to_hex(color, keep_alpha=True) for color in mymap(np.linspace(0,2,8))])

In [None]:
sc.pl.umap(adata_EEC, color=['atlas','Project','enriched','phase','kit','Status','line','strain', 'doublet_calls', 'enrichment proportion'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=5, legend_fontsize=8, color_map=mymap,wspace = 0.4, save = 'EEC_subs_joint_metadata.png', frameon= True)

In [None]:
plot_composition(adata_EEC, y_key='doublet_calls', x_key='leiden', x_rotation=90)

In [None]:
adata_EEC = adata_EEC[~adata_EEC.obs['leiden'].isin(['19','20'])].copy() #20 because apart

### wo dblts and outsider

In [None]:
sc.pl.umap(adata_EEC, color=['leiden'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, legend_fontsize=8, color_map=mymap,wspace = 0.4)

### recluster

In [None]:
sc.pp.neighbors(adata_EEC, use_rep='X_scarches_emb')
sc.tl.leiden(adata_EEC, resolution=1.5)

In [None]:
sc.tl.paga(adata_EEC, groups='cell_type_annotation_lv1')

In [None]:
del adata_EEC.uns['leiden_colors']

In [None]:
sc.pl.paga(adata_EEC,  fontsize=5, save = 'paga_joint_EEC_subs_wo_19and20.png')#, fontoutline=True, threshold=0.05, max_edge_width=3, min_edge_width=0.01, node_size_scale=3,

In [None]:
sc.pl.paga(adata_EEC,  fontsize=5, save = 'paga_joint_EEC_subs_wo_15and16.png')#, fontoutline=True, threshold=0.05, max_edge_width=3, min_edge_width=0.01, node_size_scale=3,

In [None]:
sc.tl.umap(adata_EEC, init_pos='paga', min_dist=0.2)

#### mindist 0.2

In [None]:
sc.pl.umap(adata_EEC, color=['leiden','cell_type_annotation_lv1', 'doublet_calls','Neurog3','atlas','Ghrl'],layer= 'log_dca_counts', size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=2, cmap = mymap, legend_fontsize=9, save='umap_joint_EECs_wo_1920.png')

In [None]:
sc.pl.umap(adata_EEC, color=['Project','enriched','phase','kit','line','strain', 'doublet_calls', 'enrichment proportion'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, legend_fontsize=8, color_map=mymap,wspace = 0.4, save = 'EEC_subs_joint_metadata_wo_1920.png', frameon= True)

In [None]:
sc.pl.umap(adata_EEC, color=['leiden'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,legend_loc='on data', frameon= True)

## rank genes

In [None]:
sc.tl.rank_genes_groups(adata_EEC, 'leiden', method='wilcoxon', layer = 'sct_logcounts', use_raw=False)

In [None]:
adata_EEC.X= adata_EEC.layers['sct_logcounts']

In [None]:
sc.tl.dendrogram(adata_EEC, groupby='leiden')

In [None]:
sc.pl.rank_genes_groups_dotplot(adata_EEC, n_genes=5, key="rank_genes_groups", groupby="leiden")


### TFs

In [None]:
with open('TF_mouse_all.txt', 'r') as file:
    TFs = file.read().splitlines()

In [None]:
ranked_genes = adata_EEC.uns['rank_genes_groups']['names']

In [None]:
n = -1
differentially_expressed_tfs = {}
N =10
for group in ranked_genes.dtype.names:
    n+=1
    differentially_expressed_tfs[n] = []
    for i, gene in enumerate(ranked_genes[group]):
        if gene in TFs and i < N:
            if gene not in differentially_expressed_tfs[n]:
                differentially_expressed_tfs[n].append(gene)

print(differentially_expressed_tfs)

In [None]:
# Flatten the list of genes
all_tfs = [gene for genes in differentially_expressed_tfs.values() for gene in genes]
all_tfs = list(set(all_tfs))  # Remove duplicates

In [None]:
np.max(adata_EEC.X)

In [None]:
#sc.pl.rank_genes_groups_dotplot(adata_EEC, var_names = all_tfs)
sc.pl.dotplot(adata_EEC, all_tfs, groupby='leiden',dendrogram=True, layer='sct_logcounts',use_raw=False, save='EEC_joint_TFs_without1920.png')

In [None]:
sc.pl.umap(adata_EEC, color=['cell_type_annotation_lv1','leiden'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0.6, frameon= True)

In [None]:
adata_EEC

#### exclude 18

In [None]:
adata_EEC = adata_EEC[~adata_EEC.obs['leiden'].isin(['18'])].copy() #18 because apart and only 55 cells

In [None]:
adata_EEC

In [None]:
sc.pp.neighbors(adata_EEC, use_rep='X_scarches_emb')
sc.tl.leiden(adata_EEC, resolution=1.5)

In [None]:
sc.tl.paga(adata_EEC, groups='cell_type_annotation_lv1')

In [None]:
del adata_EEC.uns['leiden_colors']

In [None]:
sc.pl.paga(adata_EEC,  fontsize=5, save = 'paga_joint_EEC_subs_wo_1920and18.png')#, fontoutline=True, threshold=0.05, max_edge_width=3, min_edge_width=0.01, node_size_scale=3,

In [None]:
sc.tl.umap(adata_EEC, init_pos='paga', min_dist=0.4)

#### mindist 0.2

In [None]:
adata_EEC.uns['doublet_calls_colors'] = np.array([mpl.colors.to_hex(color, keep_alpha=True) for color in mymap(np.linspace(0,2,8))])

In [None]:
sc.pl.umap(adata_EEC, color=['leiden','cell_type_annotation_lv1', 'doublet_calls','Neurog3','atlas','Ghrl'],layer= 'log_dca_counts', size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=2, cmap = mymap, legend_fontsize=8.5, save='umap_joint_EECs_wo_192018.png')

In [None]:
sc.pl.umap(adata_EEC, color=['Project','enriched','phase','kit','line','strain', 'doublet_calls', 'enrichment proportion'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, legend_fontsize=8, color_map=mymap,wspace = 0.4, save = 'EEC_subs_joint_metadata_wo_192018.png', frameon= True)

## rank genes

In [None]:
sc.tl.rank_genes_groups(adata_EEC, 'leiden', method='wilcoxon', layer = 'sct_logcounts', use_raw=False)

In [None]:
adata_EEC.X= adata_EEC.layers['sct_logcounts']

In [None]:
sc.tl.dendrogram(adata_EEC, groupby='leiden')

In [None]:
sc.pl.rank_genes_groups_dotplot(adata_EEC, n_genes=5, key="rank_genes_groups", groupby="leiden")


# fine anntoation level 2 EEC

first exclude Goblet signed cells in progenitors
then, recluster and annotate clusters in appropriate resolution

In [None]:
sc.tl.leiden(adata_EEC, resolution=1.5)

In [None]:
adata_EEC.X =adata_EEC.layers['log_dca_counts']

In [None]:
sc.pl.umap(adata_EEC, color=['Spdef','Neurog3','leiden'], use_raw=False,size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap)

In [None]:
sc.tl.leiden(adata_EEC, restrict_to=('leiden', ['3','1','12','13']), resolution=1.5, key_added='leiden_sub_goblet')

In [None]:
sc.pl.umap(adata_EEC, color=['Spdef','Neurog3','leiden_sub_goblet'], use_raw=False,size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap)

In [None]:

gene_of_interest = 'Spdef' 

with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata_EEC, use_raw=False, keys=['Spdef'], groupby='leiden_sub_goblet', rotation=90, show=False)
    plt.title(f'Expression of {gene_of_interest} per Leiden cluster')
    plt.xlabel('Leiden Cluster')
    plt.ylabel('Expression Level')
    plt.show()

In [None]:
Goblet_clusters = ['3-1-12-13,0','3-1-12-13,4','3-1-12-13,5','3-1-12-13,7','3-1-12-13,8','3-1-12-13,10','3-1-12-13,11','3-11-8-13,12']
adata_EEC = adata_EEC[~adata_EEC.obs['leiden_sub_goblet'].isin(Goblet_clusters)]
adata_EEC

In [None]:
sc.pp.neighbors(adata_EEC, use_rep='X_scarches_emb')
sc.tl.leiden(adata_EEC, resolution=1.5)

In [None]:
sc.tl.paga(adata_EEC, groups='cell_type_annotation_lv1')

In [None]:
sc.pl.paga(adata_EEC,  fontsize=4)#, fontoutline=True, threshold=0.05, max_edge_width=3, min_edge_width=0.01, node_size_scale=3,

In [None]:
sc.tl.umap(adata_EEC, init_pos='paga', min_dist=0.4)

In [None]:
sc.pl.umap(adata_EEC, color=['atlas','Project','enriched','phase','kit','Status','line','strain', 'doublet_calls', 'enrichment proportion'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=5, legend_fontsize=8, color_map=mymap,wspace = 0.4, save = 'EEC_subs_without192018_GC_joint_metadata.png', frameon= True)

In [None]:
sc.pl.umap(adata_EEC, color=['Status','condition','cell_type_annotation_lv1','leiden'], size=10, add_outline=True,legend_fontsize=9, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0.6, frameon=True)

In [None]:
sc.pl.umap(adata_EEC, color=['cell_type_annotation_lv1'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0.6, save = 'EEC_subs_joint_without_GC_anno.png', legend_fontsize = 9, frameon=True)


In [None]:
sc.pl.umap(adata_EEC, color=['leiden'], size=8, add_outline=True, alpha=1, outline_width=(0.3, 0.0), legend_loc='on data', save = 'EEC_subs_joint_without_GC_leiden.png', legend_fontsize = 16, frameon=True)


In [None]:
del adata_EEC.raw
gc.collect()

In [None]:
sc.tl.rank_genes_groups(adata_EEC, 'leiden', method='wilcoxon', layer = 'sct_logcounts', use_raw=False)

# to visualize the results

sc.pl.rank_genes_groups(adata_EEC)

In [None]:
sc.tl.dendrogram(adata_EEC, groupby='leiden')

In [None]:
adata_EEC.X = adata_EEC.layers['sct_logcounts']

In [None]:
sc.pl.rank_genes_groups_dotplot(adata_EEC, n_genes=5, key="rank_genes_groups", groupby="leiden")


In [None]:
adata_EEC = adata_EEC[~adata_EEC.obs['leiden'].isin(['17'])].copy() # because apart and low quality cells

In [None]:
sc.tl.rank_genes_groups(adata_EEC, 'leiden', method='wilcoxon', layer = 'sct_logcounts', use_raw=False)

# to visualize the results

sc.pl.rank_genes_groups(adata_EEC)

In [None]:
sc.tl.dendrogram(adata_EEC, groupby='leiden')

In [None]:
adata_EEC.X = adata_EEC.layers['sct_logcounts']

In [None]:
sc.pl.rank_genes_groups_dotplot(adata_EEC, n_genes=5, key="rank_genes_groups", groupby="leiden")


In [None]:
sc.pl.umap(adata_EEC, color=['atlas','Status','Project','enriched','phase','kit','condition','line','strain', 'enrichment proportion'], size=5, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=5, legend_fontsize=8, color_map=mymap,wspace = 0.4, save = 'EEC_subs_without19201817_GC_joint_metadata.png', frameon= True)

In [None]:
sc.pl.umap(adata_EEC, color=['Status','kit','condition','cell_type_annotation_lv1','leiden'], size=2, add_outline=True,legend_fontsize=9, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0.6, frameon=True)

### TFs

In [None]:
with open('TF_mouse_all.txt', 'r') as file:
    TFs = file.read().splitlines()

In [None]:
ranked_genes = adata_EEC.uns['rank_genes_groups']['names']

In [None]:
n = -1
differentially_expressed_tfs = {}
N =10
for group in ranked_genes.dtype.names:
    n+=1
    differentially_expressed_tfs[n] = []
    for i, gene in enumerate(ranked_genes[group]):
        if gene in TFs and i < N:
            if gene not in differentially_expressed_tfs[n]:
                differentially_expressed_tfs[n].append(gene)

print(differentially_expressed_tfs)

In [None]:
# Flatten the list of genes
all_tfs = [gene for genes in differentially_expressed_tfs.values() for gene in genes]
all_tfs = list(set(all_tfs))  # Remove duplicates

In [None]:
np.max(adata_EEC.X)

In [None]:
#sc.pl.rank_genes_groups_dotplot(adata_EEC, var_names = all_tfs)
sc.pl.dotplot(adata_EEC, all_tfs, groupby='leiden',dendrogram=True, layer='sct_logcounts',use_raw=False, save='EEC_joint_TFs_without192018_GC.png')

In [None]:
sc.pl.umap(adata_EEC, color=['cell_type_annotation_lv1','leiden'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0.6, frameon= True)

In [None]:
sc.pl.umap(adata_EEC, color=all_tfs, size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=5, cmap=mymap, layer='log_dca_counts', save= 'umap_joint_TFs_EEC_wo19201817GC.png', frameon= True)

In [None]:
sc.pl.umap(adata_EEC, color=['Ghrl','Sst','Gcg','Gip','Pyy','Cck', 'Nts','Sct','Tac1','Tph1','Npw', 'Spdef','Reg4'],layer= 'log_dca_counts',size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=5, cmap=mymap, save = 'EEC_subs_joint_hormones_expr_wo19201817GC.png', frameon= True)

In [None]:
adata_EEC.obs['leiden'].value_counts()

### comboplots

In [None]:
sc.pl.umap(adata_EEC,color='pretty name')

In [None]:
plot_composition(adata_EEC, y_key='pretty name', x_key='leiden', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='Status', x_key='cell_type_annotation_lv1', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='condition', x_key='cell_type_annotation_lv1', x_rotation=90)

In [None]:
pd.set_option('display.max_columns', 50)

In [None]:
plot_composition(adata_EEC, y_key='atlas', x_key='cell_type_annotation_lv1', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='atlas', x_key='leiden', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='Status', x_key='leiden', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='condition', x_key='leiden', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='enriched', x_key='leiden', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='line', x_key='leiden', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='strain', x_key='leiden', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='enrichment proportion', x_key='leiden', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='Project', x_key='leiden', x_rotation=90)

In [None]:
plot_composition(adata_EEC, y_key='kit', x_key='leiden', x_rotation=90)

In [None]:
adata_EEC.obs['condition'].value_counts()

In [None]:
hormones= ['Ghrl','Sst','Gcg','Gip','Cck', 'Nts','Sct','Tac1','Tph1','Reg4']

In [None]:
for hormone in hormones:
    sc.pl.violin(adata_EEC,groupby='atlas',keys=hormone,rotation=90, show=False)

plt.show()

### re annotation

In [None]:
annotation_key = 'cell_type_annotation_lv1'

In [None]:
clusters_manual_dict = {'12':'Cck ++ I-cells',
                        '16':'N-cells (Nts+)',
                        '10':'Reg4 ++ ECs',
                        '5':'Tac1 ++ ECs',
                        '3':'EC3 (Glis3+)',
                        '7':'EC3 (Glis3+)',
                        '13':'EC2 (Igfbp4+)',
                        '11':'Goblet/EEC prog. (early, Hmgb2 +)'}

In [None]:
adata_EEC.obs[annotation_key] = adata_EEC.obs[annotation_key].astype(str)
for cluster in clusters_manual_dict.keys():
    adata_EEC.obs[annotation_key][adata_EEC.obs['leiden']==cluster] = clusters_manual_dict[cluster]

In [None]:
adata_EEC.uns['cell_type_annotation_lv1' + '_colors'] = [
    '#a8e6a1',  # Goblet/EEC prog. (early) - light green (unchanged)
    '#f7c4eb',  # Goblet/EEC prog. (early, hmgb2) - pink (unchanged)
    '#d9edf7',  # EEC prog. (mid) - very light blue (lighter for progenitors)
    '#a1d4f5',  # EEC prog. (late/Peptide) - lighter blue
    '#5cadd6',  # EEC (Peptide/immature) - medium blue
    '#53b2ae',  # X-cell (Ghrl+) - teal (unchanged)
    '#3f84cf',  # K-cell (Gip+) - darker blue (to distinguish from other EECs)
    '#2762b8',  # L/I-cell (Glp1+/Cck+) - dark blue (unchanged)
    '#1c4fa3',  # Cck ++ I-cells - indigo (unchanged)
    '#27458e',  # N-cells (Nts+) - dark indigo (unchanged)
    '#001f4f',  # D-cell (Sst+) - darkest blue (unchanged)
    '#e1d8f5',  # EC prog. (late) - light violet (lighter for progenitors)
    '#c4a7e9',  # EC (immature) - medium violet
    '#a97edb',  # EC (Tac1+) - violet
    '#8152b5',  # EC (Npw+/Glis3+) - purple (unchanged)
    '#725dae',  # EC (Reg4++) - darker purple (unchanged)
    '#df5a90'   # EC (Igfbp4+) - pink
]


In [None]:
sc.pl.umap(adata_EEC, color=['cell_type_annotation_lv1'], size=8, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0., legend_fontsize = 9, frameon=True)

In [None]:
adata_EEC.obs[annotation_key].cat.categories

In [None]:
adata_EEC.obs[annotation_key].value_counts()

In [None]:
adata_EEC.obs[annotation_key] = adata_EEC.obs[annotation_key].cat.reorder_categories([
'Goblet/EEC prog. (early)','Goblet/EEC prog. (early, Hmgb2 +)', 'EEC prog. (mid)', 'EEC prog. (late/Peptide)', 'EEC (Peptide/immature)', 
'X-cell (Ghrl+)',  'K-cell (Gip+)', 'L/I-cell (Glp1+/Cck+)','Cck ++ I-cells','N-cells (Nts+)', 'D-cell (Sst+)',
'EC prog. (late)', 'EC (immature)','EC3 (Glis3+)','Reg4 ++ ECs', 'Tac1 ++ ECs', 'EC2 (Igfbp4+)'])

In [None]:
sc.tl.umap(adata_EEC,n_components=3,min_dist=0.3)

In [None]:
adata_EEC.obsm['X_umap']

In [None]:
sc.pl.umap(adata_EEC, color=['leiden'], size=2, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0.6, projection='3d', legend_fontsize = 8, frameon=True)

In [None]:
sc.pl.umap(adata_EEC, color=['cell_type_annotation_lv1'], size=1, add_outline=True, alpha=1, outline_width=(0.3, 0.0), cmap=mymap,wspace=0.6, projection='3d', legend_fontsize = 8)

In [None]:
sc.tl.paga(adata_EEC,groups='cell_type_annotation_lv1')

In [None]:
sc.pl.paga(adata_EEC, fontsize=4)

In [None]:
sc.tl.umap(adata_EEC,init_pos='paga',min_dist=0.3)

In [None]:
sc.pl.umap(adata_EEC, color=['cell_type_annotation_lv1'], size=5, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0.6, save = 'EEC_subs_joint_without_GC_anno2.png', legend_fontsize = 9, frameon=True)

#### without recalculation

In [None]:
sc.pl.umap(adata_EEC, color=['leiden'], size=8, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0., legend_fontsize = 9, frameon=True)

In [None]:
sc.pl.umap(adata_EEC, color=['cell_type_annotation_lv1'], size=8, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0., legend_fontsize = 9, frameon=True)

In [None]:
sc.pl.umap(adata_EEC, color=['cell_type_annotation_lv1'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0.6, save = 'EEC_subs_joint_without_GC_anno2.png', legend_fontsize = 9, frameon=True)

In [None]:
with rc_context({'figure.figsize':(10,4)}):
    sc.pl.violin(adata, keys=['Xist'], groupby='pretty name', rotation=90)

## stacked violin plots

In [None]:
del adata_EEC.uns['leiden_colors']

In [None]:
sc.pl.umap(adata_EEC, color=['leiden'], size=10, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=3, cmap=mymap,wspace=0.6, frameon= True)

In [None]:
# Create a custom colormap
custom_cmap = mcolors.ListedColormap(adata_EEC.uns['leiden_colors'], name='leiden_cmap')

In [None]:
markers = ['Sox4','Hmgb2','Spdef','Dll1','Neurog3','Neurod1','Ghrl','Sst','Gcg','Gip','Cck','Nts','Sct','Isl1','Tac1','Tph1','Glis3','Reg4','Igfbp4', 'Slc18a1','Lmx1a']
sc.pl.stacked_violin(adata_EEC, markers, groupby='cell_type_annotation_lv1', layer='sct_logcounts',cmap = mymap,save='stacked_violin_new_anno_markers.png')#, palette=adata.uns['leiden_colors'])#, dendrogram=True)

In [None]:
markers = ['Sox4','Hmgb2','Spdef','Dll1','Neurog3','Neurod1','Ghrl','Sst','Gcg','Gip','Cck','Nts','Sct','Tac1','Tph1','Glis3','Reg4','Igfbp4']
sc.pl.stacked_violin(adata_EEC, markers, groupby='leiden', layer='sct_logcounts',cmap = mymap, row_palette = adata_EEC.uns['leiden_colors'])#, palette=adata.uns['leiden_colors'])#, dendrogram=True)

In [None]:
markers = ['Sox4','Hmgb2','Spdef','Dll1','Neurog3','Neurod1','Ghrl','Sst','Gcg','Gip','Cck','Nts','Sct','Tac1','Tph1','Glis3','Reg4','Igfbp4']
sc.pl.stacked_violin(adata_EEC, markers, groupby='leiden', layer='sct_logcounts',cmap = mymap, row_palette = adata_EEC.uns['leiden_colors'])#, palette=adata.uns['leiden_colors'])#, dendrogram=True)

In [None]:
sc.pl.stacked_violin(adata_EEC, markers, groupby='cell_type_annotation_lv1', layer='sct_logcounts',cmap = mymap)#, palette=adata.uns['leiden_colors'])#, dendrogram=True)

In [None]:
plot=sc.pl.stacked_violin(
    adata_EEC,
    markers,
    groupby=['leiden', 'atlas'],  # Group by both 'leiden' clusters and 'atlas' categories
    layer='sct_logcounts',
    cmap=mymap,  # Use a red colormap
 return_fig=True)
plot.add_totals().show()

In [None]:
plot = sc.pl.stacked_violin(
    adata_EEC,
    markers,
    groupby=['cell_type_annotation_lv1', 'atlas'],  # Group by both 'leiden' clusters and 'atlas' categories
    layer='sct_logcounts',
    cmap=mymap,  # Use a red colormap
return_fig=True)
plot.add_totals().show()

In [None]:
plot = sc.pl.stacked_violin(
    adata_EEC,
    markers,
    groupby=['cell_type_annotation_lv1', 'enriched'],  # Group by both 'leiden' clusters and 'atlas' categories
    layer='sct_logcounts',
    cmap=mymap,  # Use a red colormap
return_fig=True)
plot.add_totals().show()

### statistical testing

In [None]:
adata_EEC.X = adata_EEC.layers['raw_counts']

In [None]:
sc.pp.normalize_total(adata_EEC, target_sum=1e6)
sc.pp.log1p(adata_EEC)
sc.pp.pca(adata_EEC)

In [None]:
adata_EEC.obs["lib_size"] = np.sum(adata_EEC.layers["raw_counts"], axis=1)
adata_EEC.obs["log_lib_size"] = np.log(adata_EEC.obs["lib_size"])

In [None]:
sc.pl.pca(adata_EEC, color=adata_EEC.obs, ncols=1, size=10)

### Diffusion Pseudotime

In [None]:
sc.tl.score_genes(adata_EEC, gene_list=['Lgr5','Olfm4','Slc12a2','Clca3b','Cps1','Spdef','Neurog3','Sox4'], score_name='ISC_score', use_raw=False)

In [None]:
sc.pl.umap(adata_EEC, color=['ISC_score', 'phase'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo', title='ISC score', save = 'umap_joint_EEC_progenitor_score_phase.png')

In [None]:
sc.tl.diffmap(adata_EEC, n_comps=20)

In [None]:
sc.pl.diffmap(adata_EEC, components=['0,1','1,2','3,4','5,6','7,8','9,10','11,12','13,14','15,16','17,18','19,20'], color='ISC_score', color_map='turbo')

In [None]:
sc.pl.diffmap(adata_EEC, components=['0,1','1,2','3,4','5,6','7,8','9,10','11,12','13,14','15,16','17,18','19,20'], color='cell_type_annotation_lv1', wspace=1.8)

In [None]:
sc.pl.diffmap(adata_EEC, components=['11,1'], color=['phase','ISC_score','cell_type_annotation_lv1', 'Spdef'])

In [None]:
stem_mask = np.isin(adata_EEC.obs['cell_type_annotation_lv1'], 'Goblet/EEC prog. (early)')
max_stem_id = np.argmax(adata_EEC.obsm['X_diffmap'][stem_mask,10])
root_id = np.arange(len(stem_mask))[stem_mask][max_stem_id]
adata_EEC.uns['iroot'] = root_id

In [None]:
import cellrank as cr
import scvelo as scv

In [None]:
scv.pl.scatter(
    adata_EEC,
    basis='diffmap',
    c=[root_id, 'phase', 'cell_type_annotation_lv1'],
    legend_loc='right',
    components=['10,0'], wspace=0.75
)

scv.pl.scatter(
    adata_EEC,
    basis='umap',
    c=[root_id, 'phase', 'cell_type_annotation_lv1'],
    legend_loc='right',
    components=['1, 0'], wspace=0.75
)


In [None]:
adata_EEC.obs_names[root_id]

In [None]:
sc.tl.dpt(adata_EEC, n_dcs=20)

In [None]:
sc.pl.umap(adata_EEC, color=['dpt_pseudotime'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata_EEC, use_raw=False, keys=['dpt_pseudotime'], groupby='leiden', rotation=90)

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata_EEC, use_raw=False, keys=['dpt_pseudotime'], groupby='cell_type_annotation_lv1', rotation=90)

In [None]:
dptk = cr.kernels.PseudotimeKernel(adata_EEC, time_key='dpt_pseudotime')
dptk.compute_transition_matrix(n_jobs=-1)
dptk.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none',save = 'transition_dpt_EEC_only_joint.png')
#dptk.write('adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_dpt_pseudotime.plk', write_adata=False)

In [None]:
dptk.plot_projection(color='leiden', recompute=True, legend_loc='none',save = 'transition_dpt_EEC_only_joint_leiden.png')


In [None]:
adata_EEC.obs.drop(['sample number Minas'],axis=1, inplace=True)

#### save object

In [None]:
adata_EEC.write('adata_EEC_joint_dpt_filtered.h5ad')

In [None]:
adata_EEC= sc.read_h5ad('adata_EEC_joint_dpt_filtered.h5ad')

In [None]:
adata_EEC_filt = adata_EEC.copy()

### Diffusion Pseudotime with all GC/EEC prog.

In [None]:
sc.tl.score_genes(adata_EEC, gene_list=['Lgr5','Olfm4','Slc12a2','Clca3b','Cps1','Spdef','Neurog3','Sox4'], score_name='ISC_score', use_raw=False)

In [None]:
sc.pl.umap(adata_EEC, color=['ISC_score'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo', title='ISC score', save = 'umap_progenitor_score.png')

In [None]:
sc.tl.diffmap(adata_EEC, n_comps=20)

In [None]:
sc.pl.diffmap(adata_EEC, components=['0,1','1,2','3,4','5,6','7,8','9,10','11,12','13,14','15,16','17,18','19,20'], color='ISC_score', color_map='turbo')

In [None]:
sc.pl.diffmap(adata_EEC, components=['0,1','1,2','3,4','5,6','7,8','9,10','11,12','13,14','15,16','17,18','19,20'], color='cell_type_annotation_lv1', wspace=1.8)

In [None]:
sc.pl.diffmap(adata_EEC, components=['11,1'], color=['phase','ISC_score','cell_type_annotation_lv1', 'Spdef'])

In [None]:
stem_mask = np.isin(adata_EEC.obs['cell_type_annotation_lv1'], 'Goblet/EEC prog. (early)')
max_stem_id = np.argmin(adata_EEC.obsm['X_diffmap'][stem_mask,10])
root_id = np.arange(len(stem_mask))[stem_mask][max_stem_id]
adata_EEC.uns['iroot'] = root_id

In [None]:
import cellrank as cr
import scvelo as scv

In [None]:
scv.pl.scatter(
    adata_EEC,
    basis='diffmap',
    c=[root_id, 'phase', 'cell_type_annotation_lv1'],
    legend_loc='right',
    components=['10,0'], wspace=0.75
)

scv.pl.scatter(
    adata_EEC,
    basis='umap',
    c=[root_id, 'phase', 'cell_type_annotation_lv1'],
    legend_loc='right',
    components=['1, 0'], wspace=0.75
)


In [None]:
adata_EEC.obs_names[root_id]

In [None]:
sc.tl.dpt(adata_EEC, n_dcs=20)

In [None]:
adata_EEC.obs['dpt_pseudotime_g2m'] = adata_EEC.obs['dpt_pseudotime'].copy()

In [None]:
sc.pl.umap(adata_EEC, color=['dpt_pseudotime_g2m'], size=7, add_outline=True, alpha=1, outline_width=(0.3, 0.0), ncols=4, color_map='turbo')

In [None]:
with rc_context({'figure.figsize': (6, 4)}):
    sc.pl.violin(adata_EEC, use_raw=False, keys=['dpt_pseudotime_g2m'], groupby='cell_type_annotation_lv1', rotation=90)

In [None]:
dptk = cr.kernels.PseudotimeKernel(adata_EEC, time_key='dpt_pseudotime_g2m')
dptk.compute_transition_matrix(n_jobs=-1)
dptk.plot_projection(color='cell_type_annotation_lv1', recompute=True, legend_loc='none',save = 'transition_dpt_EEC_only.png')
#dptk.write('adata_markedDoublets_mergedPeaks_normalized_initialAnno_rmDoublets_integrated_imputed_annotated_crKernel_dpt_pseudotime.plk', write_adata=False)