### Notebook for the of Fawkner-Corbett_2021 Visium data with `squidpy`

- **Developed by:** Anna Maguza
- **Affilation:** Faculty of Medicine, Würzburg University
- **Created date:** 22nd August 2024
- **Last modified date:** 13th October 2024

#### Load packages

In [66]:
import anndata as ad
import scanpy as sc
import squidpy as sq
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy.stats import rankdata
import os

#### Set up cells

In [67]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 160, color_map = 'RdPu', dpi_save = 180, vector_friendly = True, format = 'svg')

-----
anndata     0.10.5.post1
scanpy      1.9.8
-----
PIL                         10.2.0
anyio                       NA
arrow                       1.3.0
asciitree                   NA
asttokens                   NA
attr                        23.2.0
attrs                       23.2.0
babel                       2.14.0
backcall                    0.2.0
beta_ufunc                  NA
binom_ufunc                 NA
brotli                      1.1.0
certifi                     2023.11.17
cffi                        1.16.0
charset_normalizer          3.3.2
cloudpickle                 3.0.0
comm                        0.2.1
cycler                      0.12.1
cython_runtime              NA
dask                        2024.1.1
dask_image                  2023.08.1
datashader                  0.16.0
datatree                    0.0.13
dateutil                    2.8.2
debugpy                     1.8.0
decorator                   5.1.1
defusedxml                  0.7.1
docrep                   

+ Define functions

In [68]:
def get_top_n_cell_types(row, n=1):
    sorted_types = row.sort_values(ascending=False)
    top_n = sorted_types.head(n)
    return ', '.join(top_n.index.str.replace('meanscell_abundance_w_sf_', ''))

In [69]:
def create_expanded_anndata(adata, n_cells_per_spot=10):
    # Get cell type abundances
    abundances = adata.obsm['means_cell_abundance_w_sf']
    
    # Create a list to store new observations
    new_obs = []
    new_obs_names = []
    
    for idx, row in abundances.iterrows():
        # Normalize abundances to sum to 1
        normalized_abundances = row / row.sum()
        
        # Calculate the number of cells for each type
        cell_counts = np.random.multinomial(n_cells_per_spot, normalized_abundances)
        
        for cell_type, count in zip(row.index, cell_counts):
            for i in range(count):
                new_obs.append({
                    'original_barcode': idx,
                    'cell_type': cell_type.replace('meanscell_abundance_w_sf_', '')
                })
                new_obs_names.append(f"{idx}-{str(i+1).zfill(2)}")
    
    # Create a new AnnData object
    new_adata = sc.AnnData(
        X=np.zeros((len(new_obs), adata.n_vars)),
        obs=pd.DataFrame(new_obs, index=new_obs_names)
    )
    
    # Add spatial coordinates
    original_coords = adata.obsm['spatial']
    new_coords = np.zeros((len(new_obs), original_coords.shape[1]))
    
    for i, obs in enumerate(new_obs):
        orig_idx = adata.obs_names.get_loc(obs['original_barcode'])
        new_coords[i] = original_coords[orig_idx]
    
    new_adata.obsm['spatial'] = new_coords
    
    return new_adata

## Adult colon samples

#### Sample A1

In [70]:
dir_path = 'data/Visium_fawkner_corbett'

In [71]:
samples = ['A1_sp', 'A2_sp', 'A3_sp', 'A4_sp', 'A6_sp', 'A7_sp', 'A8_sp', 'A9_sp']

In [72]:
anndata_objects = {}

# Loop through the samples and load each .h5ad file
for sample in samples:
    file_path = os.path.join(dir_path, f'{sample}.h5ad')
    if os.path.exists(file_path):
        anndata_objects[sample] = ad.read_h5ad(file_path)
    else:
        print(f'File {file_path} does not exist.')

In [73]:
for sample, ad_obj in anndata_objects.items():
    globals()[sample] = ad_obj

### Adult colon slides

In [74]:
A1_sp.obs['top_cell_types'] = A1_sp.obsm['means_cell_abundance_w_sf'].apply(get_top_n_cell_types, axis=1)
A1_sp.obs['top_cell_types'] = A1_sp.obs['top_cell_types'].astype('category')

A2_sp.obs['top_cell_types'] = A2_sp.obsm['means_cell_abundance_w_sf'].apply(get_top_n_cell_types, axis=1)
A2_sp.obs['top_cell_types'] = A2_sp.obs['top_cell_types'].astype('category')

In [75]:
adult_colon_adata = ad.concat([A1_sp, A2_sp])

In [76]:
sq.gr.spatial_neighbors(adult_colon_adata, n_rings=2, coord_type="grid", n_neighs=6)
sq.gr.nhood_enrichment(adult_colon_adata, cluster_key="top_cell_types") 

Creating graph using `grid` coordinates and `None` transform and `1` libraries.
Adding `adata.obsp['spatial_connectivities']`
       `adata.obsp['spatial_distances']`
       `adata.uns['spatial_neighbors']`
Finish (0:00:00)
Calculating neighborhood enrichment using `1` core(s)


100%|█████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 2005.01/s]


Adding `adata.uns['top_cell_types_nhood_enrichment']`
Finish (0:00:00)


  zscore = (count - perms.mean(axis=0)) / perms.std(axis=0)


In [78]:
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(4.5, 5))
    sq.pl.nhood_enrichment(adult_colon_adata, cluster_key="top_cell_types", figsize=(10, 10), cmap="RdYlBu_r")
    plt.savefig( f"{dir_path}/adult_colon_adata_nhood_enrichment.png", bbox_inches="tight")
    plt.close()

  row_labels = adata.obs[key][row_order]


In [34]:
sq.gr.co_occurrence(adult_colon_adata, cluster_key="top_cell_types")
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(4.5, 5))
    sq.pl.co_occurrence(adult_colon_adata, cluster_key="top_cell_types", clusters="Stem cells", figsize=(10, 10))
    plt.savefig( f"{dir_path}/adult_colon_adata_co_occurrence_Stem.png", bbox_inches="tight")
    plt.close() 

Calculating co-occurrence probabilities for `50` intervals `1` split combinations using `1` core(s)


100%|█████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.98s/]

Adding `adata.uns['top_cell_types_co_occurrence']`
Finish (0:00:02)
ERROR: Unable to fetch palette, reason: 'top_cell_types_colors'. Using `None`.





### 12 PCW whole colon slides (A3, A8, A9)

In [79]:
A3_sp.obs['top_cell_types'] = A3_sp.obsm['means_cell_abundance_w_sf'].apply(get_top_n_cell_types, axis=1)
A3_sp.obs['top_cell_types'] = A3_sp.obs['top_cell_types'].astype('category')

A8_sp.obs['top_cell_types'] = A8_sp.obsm['means_cell_abundance_w_sf'].apply(get_top_n_cell_types, axis=1)
A8_sp.obs['top_cell_types'] = A8_sp.obs['top_cell_types'].astype('category')

A9_sp.obs['top_cell_types'] = A9_sp.obsm['means_cell_abundance_w_sf'].apply(get_top_n_cell_types, axis=1)
A9_sp.obs['top_cell_types'] = A9_sp.obs['top_cell_types'].astype('category')

In [80]:
pcw12_colon_adata = ad.concat([A3_sp, A8_sp, A9_sp])

In [81]:
sq.gr.spatial_neighbors(pcw12_colon_adata, n_rings=2, coord_type="grid", n_neighs=6)
sq.gr.nhood_enrichment(pcw12_colon_adata, cluster_key="top_cell_types") 

Creating graph using `grid` coordinates and `None` transform and `1` libraries.
Adding `adata.obsp['spatial_connectivities']`
       `adata.obsp['spatial_distances']`
       `adata.uns['spatial_neighbors']`
Finish (0:00:00)
Calculating neighborhood enrichment using `1` core(s)


100%|█████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1828.10/s]


Adding `adata.uns['top_cell_types_nhood_enrichment']`
Finish (0:00:00)


  zscore = (count - perms.mean(axis=0)) / perms.std(axis=0)


In [82]:
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(4.5, 5))
    sq.pl.nhood_enrichment(pcw12_colon_adata, cluster_key="top_cell_types", figsize=(10, 10), cmap="RdYlBu_r")
    plt.savefig( f"{dir_path}/pcw12_colon_adata_nhood_enrichment.png", bbox_inches="tight")
    plt.close()

  row_labels = adata.obs[key][row_order]


In [43]:
sq.gr.co_occurrence(pcw12_colon_adata, cluster_key="top_cell_types")
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(4.5, 5))
    sq.pl.co_occurrence(pcw12_colon_adata, cluster_key="top_cell_types", clusters="FXYD3+_CKB+_SC", figsize=(10, 10))
    plt.savefig( f"{dir_path}/pcw12_colon_adata_co_occurrence_FXYD3+_CKB+_SC.png", bbox_inches="tight")
    plt.close() 

Calculating co-occurrence probabilities for `50` intervals `1` split combinations using `1` core(s)


100%|█████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.05s/]

Adding `adata.uns['top_cell_types_co_occurrence']`
Finish (0:00:04)
ERROR: Unable to fetch palette, reason: 'top_cell_types_colors'. Using `None`.





### 12 PCW whole intestine slides (A6, A7)

In [83]:
A6_sp.obs['top_cell_types'] = A6_sp.obsm['means_cell_abundance_w_sf'].apply(get_top_n_cell_types, axis=1)
A6_sp.obs['top_cell_types'] = A6_sp.obs['top_cell_types'].astype('category')

A7_sp.obs['top_cell_types'] = A7_sp.obsm['means_cell_abundance_w_sf'].apply(get_top_n_cell_types, axis=1)
A7_sp.obs['top_cell_types'] = A7_sp.obs['top_cell_types'].astype('category')

In [84]:
pcw12_intestine_adata = ad.concat([A6_sp, A7_sp])

In [85]:
sq.gr.spatial_neighbors(pcw12_intestine_adata, n_rings=2, coord_type="grid", n_neighs=6)
sq.gr.nhood_enrichment(pcw12_intestine_adata, cluster_key="top_cell_types") 

Creating graph using `grid` coordinates and `None` transform and `1` libraries.
Adding `adata.obsp['spatial_connectivities']`
       `adata.obsp['spatial_distances']`
       `adata.uns['spatial_neighbors']`
Finish (0:00:00)
Calculating neighborhood enrichment using `1` core(s)


100%|█████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 2415.46/s]


Adding `adata.uns['top_cell_types_nhood_enrichment']`
Finish (0:00:00)


  zscore = (count - perms.mean(axis=0)) / perms.std(axis=0)


In [86]:
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(4.5, 5))
    sq.pl.nhood_enrichment(pcw12_intestine_adata, cluster_key="top_cell_types", figsize=(10, 10), cmap="RdYlBu_r")
    plt.savefig( f"{dir_path}/pcw12_intestine_adata_nhood_enrichment.png", bbox_inches="tight")
    plt.close()

  row_labels = adata.obs[key][row_order]


In [48]:
sq.gr.co_occurrence(pcw12_intestine_adata, cluster_key="top_cell_types")
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(4.5, 5))
    sq.pl.co_occurrence(pcw12_intestine_adata, cluster_key="top_cell_types", clusters="FXYD3+_CKB+_SC", figsize=(10, 10))
    plt.savefig( f"{dir_path}/pcw12_intestine_adata_co_occurrence_FXYD3+_CKB+_SC.png", bbox_inches="tight")
    plt.close() 

Calculating co-occurrence probabilities for `50` intervals `1` split combinations using `1` core(s)


100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.41/s]

Adding `adata.uns['top_cell_types_co_occurrence']`
Finish (0:00:00)
ERROR: Unable to fetch palette, reason: 'top_cell_types_colors'. Using `None`.





In [50]:
sq.gr.co_occurrence(pcw12_intestine_adata, cluster_key="top_cell_types")
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(4.5, 5))
    sq.pl.co_occurrence(pcw12_intestine_adata, cluster_key="top_cell_types", clusters="MTRNR2L12+ASS1+_SC", figsize=(10, 10))
    plt.savefig( f"{dir_path}/pcw12_intestine_adata_co_occurrence_MTRNR2L12+ASS1+_SC.png", bbox_inches="tight")
    plt.close() 

Calculating co-occurrence probabilities for `50` intervals `1` split combinations using `1` core(s)


100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.43/s]

Adding `adata.uns['top_cell_types_co_occurrence']`
Finish (0:00:00)
ERROR: Unable to fetch palette, reason: 'top_cell_types_colors'. Using `None`.





In [51]:
sq.gr.co_occurrence(pcw12_intestine_adata, cluster_key="top_cell_types")
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(4.5, 5))
    sq.pl.co_occurrence(pcw12_intestine_adata, cluster_key="top_cell_types", clusters="RPS10+_RPS17+_SC", figsize=(10, 10))
    plt.savefig( f"{dir_path}/pcw12_intestine_adata_co_occurrence_RPS10+_RPS17+_SC.png", bbox_inches="tight")
    plt.close() 

Calculating co-occurrence probabilities for `50` intervals `1` split combinations using `1` core(s)


100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.50/s]

Adding `adata.uns['top_cell_types_co_occurrence']`
Finish (0:00:00)
ERROR: Unable to fetch palette, reason: 'top_cell_types_colors'. Using `None`.





### 19 PCW whole colon slide (A4)

In [87]:
A4_sp.obs['top_cell_types'] = A4_sp.obsm['means_cell_abundance_w_sf'].apply(get_top_n_cell_types, axis=1)
A4_sp.obs['top_cell_types'] = A4_sp.obs['top_cell_types'].astype('category')

In [88]:
sq.gr.spatial_neighbors(A4_sp, n_rings=2, coord_type="grid", n_neighs=6)
sq.gr.nhood_enrichment(A4_sp, cluster_key="top_cell_types") 

Creating graph using `grid` coordinates and `None` transform and `1` libraries.
Adding `adata.obsp['spatial_connectivities']`
       `adata.obsp['spatial_distances']`
       `adata.uns['spatial_neighbors']`
Finish (0:00:00)
Calculating neighborhood enrichment using `1` core(s)


100%|█████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 3105.89/s]


Adding `adata.uns['top_cell_types_nhood_enrichment']`
Finish (0:00:00)


  zscore = (count - perms.mean(axis=0)) / perms.std(axis=0)


In [89]:
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(4.5, 5))
    sq.pl.nhood_enrichment(A4_sp, cluster_key="top_cell_types", figsize=(10, 10), cmap="RdYlBu_r")
    plt.savefig( f"{dir_path}/pcw19_colon_adata_nhood_enrichment.png", bbox_inches="tight")
    plt.close()

  row_labels = adata.obs[key][row_order]


In [55]:
sq.gr.co_occurrence(A4_sp, cluster_key="top_cell_types")
with plt.rc_context():
    sc.set_figure_params(dpi=300, figsize=(4.5, 5))
    sq.pl.co_occurrence(A4_sp, cluster_key="top_cell_types", clusters="FXYD3+_CKB+_SC", figsize=(10, 10))
    plt.savefig( f"{dir_path}/pcw19_colon_co_occurrence_FXYD3+_CKB+_SC.png", bbox_inches="tight")
    plt.close() 

Calculating co-occurrence probabilities for `50` intervals `1` split combinations using `1` core(s)


100%|█████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.27/s]

Adding `adata.uns['top_cell_types_co_occurrence']`
Finish (0:00:00)
ERROR: Unable to fetch palette, reason: 'top_cell_types_colors'. Using `None`.



