CRUCIAL NOTE!
This script overwrites the object fed to it from the previous script to save on space. The information is preserved and the umaps are embedded, but it's important to know.

In [None]:
import scanpy as sc
import anndata as ad
import matplotlib.pyplot as plt

In [None]:
sc.settings.verbosity = 3
fig_res = 300
sc.set_figure_params(dpi = fig_res, frameon = 0)

In [None]:
path = '/home/achmed/cache/external_datasets/'

In [None]:
epithelium = sc.read(path + 'external_epithelium_proc.h5ad')
healthy_epithelium = sc.read(path + 'external_healthy_epithelium_proc.h5ad')
IM_epithelium = sc.read(path + 'external_IM_epithelium_proc.h5ad')

In [None]:
str(epithelium)

# Calculating all the things

In [None]:
epithelium.obs['leiden'] = epithelium.obs['cell_type'].copy()
healthy_epithelium.obs['leiden'] = healthy_epithelium.obs['cell_type'].copy()
IM_epithelium.obs['leiden'] = IM_epithelium.obs['cell_type'].copy()

## Integrated epithelium

In [None]:
# Running all the things with PAGA as the basis. It's different number of neighbors and minimum distances. All embedded in the object.
n_components_list = [10, 15, 30, 50, 100]
min_dist_list = [0.1, 0.3, 0.5]

# UMAP embedding and storing in AnnData's obsm
for n_components in n_components_list:
    # Refresh a copy of the object
    adata_copy = epithelium.copy()  
    # Calculate neighbors
    sc.pp.neighbors(adata_copy, n_neighbors = n_components, use_rep = 'X_pca_harmony')
    # Calculate paga
    sc.tl.paga(adata_copy)
    sc.pl.paga(adata_copy)
    
    for min_dist in min_dist_list:
        # Calculate UMAP
        sc.tl.umap(adata_copy, n_components = n_components, min_dist = min_dist, init_pos = 'paga')
        # Store UMAP embeddings in obsm with appropriate name
        obsm_name = f'X_umap_neighbors_{n_components}_min_dist_{min_dist}md'
        epithelium.obsm[obsm_name] = adata_copy.obsm['X_umap'].copy()  # Copy the computed UMAP embeddings


In [None]:
# Plotting all UMAPs
fig, axes = plt.subplots(len(n_components_list), 3, figsize=(15, len(n_components_list) * 4))

for i, n_components in enumerate(n_components_list):
    for j, min_dist in enumerate(min_dist_list):
        # Plot UMAP
        ax = axes[i, j]
        sc.pl.embedding(epithelium, color='cell_type', ax=ax, show=False, basis=f'X_umap_neighbors_{n_components}_min_dist_{min_dist}md')
        ax.set_title(f'X_umap_neighbors_{n_components}_min_dist_{min_dist}md')

plt.tight_layout()
plt.show()


^^^^ I think the best one is 30 neighbors with 0.5 minimum distance

In [None]:
sc.pl.tsne(epithelium, color = 'cell_type')

In [None]:
epithelium.write_h5ad(path + 'external_epithelium_proc.h5ad')

In [None]:
sc.pl.embedding(epithelium, basis = 'X_umap_MinDist_0.3_N_Neighbors_15', color = ['STMN1', 'LGR5', 'OLFM4', 'MKI67', 'SMOC2', 'Detailed_Cell_Type'], use_raw = False)

## Healthy epithelium

In [None]:
# Running all the things with PAGA as the basis. It's different number of neighbors and minimum distances. All embedded in the object.
n_components_list = [10, 15, 30, 50, 100]
min_dist_list = [0.1, 0.3, 0.5]

# UMAP embedding and storing in AnnData's obsm
for n_components in n_components_list:
    # Refresh a copy of the object
    adata_copy = healthy_epithelium.copy()  
    # Calculate neighbors
    sc.pp.neighbors(adata_copy, n_neighbors = n_components, use_rep = 'X_pca_harmony')
    # Calculate paga
    sc.tl.paga(adata_copy)
    sc.pl.paga(adata_copy)
    
    for min_dist in min_dist_list:
        # Calculate UMAP
        sc.tl.umap(adata_copy, n_components = n_components, min_dist = min_dist, init_pos = 'paga')
        # Store UMAP embeddings in obsm with appropriate name
        obsm_name = f'X_umap_neighbors_{n_components}_min_dist_{min_dist}md'
        healthy_epithelium.obsm[obsm_name] = adata_copy.obsm['X_umap'].copy()  # Copy the computed UMAP embeddings


In [None]:
# Plotting all UMAPs
fig, axes = plt.subplots(len(n_components_list), 3, figsize=(15, len(n_components_list) * 4))

for i, n_components in enumerate(n_components_list):
    for j, min_dist in enumerate(min_dist_list):
        # Plot UMAP
        ax = axes[i, j]
        sc.pl.embedding(healthy_epithelium, color='cell_type', ax=ax, show=False, basis=f'X_umap_neighbors_{n_components}_min_dist_{min_dist}md')
        ax.set_title(f'X_umap_neighbors_{n_components}_min_dist_{min_dist}md')

plt.tight_layout()
plt.show()


^^^^ I think the best one is 30 neighbors with 0.5 minimum distance

In [None]:
sc.pl.tsne(healthy_epithelium, color = 'cell_type')

In [None]:
healthy_epithelium.write_h5ad(path + 'external_healthy_epithelium_proc.h5ad')

In [None]:
sc.pl.embedding(healthy_epithelium, basis = 'X_umap_MinDist_0.3_N_Neighbors_15', color = ['STMN1', 'LGR5', 'OLFM4', 'MKI67', 'SMOC2', 'Detailed_Cell_Type'], use_raw = False)

## Now the IM epithelium

In [None]:
# Running all the things with PAGA as the basis. It's different number of neighbors and minimum distances. All embedded in the object.
n_components_list = [10, 15, 30, 50, 100]
min_dist_list = [0.1, 0.3, 0.5]

# UMAP embedding and storing in AnnData's obsm
for n_components in n_components_list:
    # Refresh a copy of the object
    adata_copy = IM_epithelium.copy()  
    # Calculate neighbors
    sc.pp.neighbors(adata_copy, n_neighbors = n_components, use_rep = 'X_pca_harmony')
    # Calculate paga
    sc.tl.paga(adata_copy)
    sc.pl.paga(adata_copy)
    
    for min_dist in min_dist_list:
        # Calculate UMAP
        sc.tl.umap(adata_copy, n_components = n_components, min_dist = min_dist, init_pos = 'paga')
        # Store UMAP embeddings in obsm with appropriate name
        obsm_name = f'X_umap_neighbors_{n_components}_min_dist_{min_dist}md'
        IM_epithelium.obsm[obsm_name] = adata_copy.obsm['X_umap'].copy()  # Copy the computed UMAP embeddings


In [None]:
# Plotting all UMAPs
fig, axes = plt.subplots(len(n_components_list), 3, figsize=(15, len(n_components_list) * 4))

for i, n_components in enumerate(n_components_list):
    for j, min_dist in enumerate(min_dist_list):
        # Plot UMAP
        ax = axes[i, j]
        sc.pl.embedding(IM_epithelium, color='cell_type', ax=ax, show=False, basis=f'X_umap_neighbors_{n_components}_min_dist_{min_dist}md')
        ax.set_title(f'X_umap_neighbors_{n_components}_min_dist_{min_dist}md')

plt.tight_layout()
plt.show()


^^^^ I think the best one is 30 neighbors with 0.5 minimum distance

In [None]:
sc.pl.tsne(IM_epithelium, color = 'cell_type')

In [None]:
IM_epithelium.write_h5ad(path + 'external_IM_epithelium_proc.h5ad')

In [None]:
sc.pl.embedding(IM_epithelium, basis = 'X_umap_MinDist_0.3_N_Neighbors_15', color = ['STMN1', 'LGR5', 'OLFM4', 'MKI67', 'SMOC2', 'Detailed_Cell_Type'], use_raw = False)