### Notebook for the creation of the full anndata objects for each sample from Fawkner-Corbett_2021 Visium data 

- **Developed by:** Anna Maguza
- **Affilation:** Wuerzburg Institute for System Immunology
- **Date:** 27th January 2024
- **Last modified date:** 5th August 2024

#### Import packages

In [14]:
import anndata as ad
import scanpy as sc
import squidpy as sq
import pandas as pd
from scipy.io import mmread
import matplotlib.pyplot as plt
import numpy as np
import scipy as sci
from scipy.sparse import coo_matrix
from PIL import Image

#### Set up cells

In [15]:
sc.settings.verbosity = 3
sc.logging.print_versions()
sc.settings.set_figure_params(dpi = 160, color_map = 'RdPu', dpi_save = 180, vector_friendly = True, format = 'svg')

-----
anndata     0.10.5.post1
scanpy      1.9.8
-----
PIL                         10.2.0
anyio                       NA
arrow                       1.3.0
asciitree                   NA
asttokens                   NA
attr                        23.2.0
attrs                       23.2.0
babel                       2.14.0
backcall                    0.2.0
beta_ufunc                  NA
binom_ufunc                 NA
brotli                      1.1.0
certifi                     2023.11.17
cffi                        1.16.0
charset_normalizer          3.3.2
cloudpickle                 3.0.0
comm                        0.2.1
cycler                      0.12.1
cython_runtime              NA
dask                        2024.1.1
dask_image                  2023.08.1
datashader                  0.16.0
datatree                    0.0.13
dateutil                    2.8.2
debugpy                     1.8.0
decorator                   5.1.1
defusedxml                  0.7.1
docrep                   

#### Load anndata object

In [16]:
data_dir = '/../../../gut_project/raw_data/Fawkner-Corbett_2021/Visium_data/'
images_dir = '/../../../gut_project/raw_data/Fawkner-Corbett_2021/Visium_data/Downloaded_metadata_and_countmatrix'
results_dir = '/../../../gut_project/Processed_data/Gut_data/Visium_fawkner_corbett/slides_anndata_objects'

In [17]:
adata = sc.read_h5ad(f'{data_dir}/Fawkner_Corbett_2021_raw_all_samples_with_QC.h5ad')

#### Create full object for each sample

##### Sample A1

+ Filter sample

In [18]:
a1 = adata[adata.obs['Sample_ID'] == 'A1', :]

+ Upload images

In [19]:
hires = np.asarray(Image.open(f'{images_dir}/GSM4797916_A1/A1/spatial/tissue_hires_image.png'))
lowres = np.asarray(Image.open(f'{images_dir}//GSM4797916_A1/A1/spatial/tissue_lowres_image.png'))

+ Upload coordinates

In [20]:
coords = pd.read_csv(
        f'{images_dir}/GSM4797916_A1/A1/spatial/tissue_positions_list.csv',
        header=None)

coords.columns = ['barcode', "in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

+ Adjust barcodes in coordinates

In [21]:
# split barcode column into two columns by '-' 
coords[['barcode', 'barcode2']] = coords.barcode.str.split("-",expand=True)

# delete barcode_2 column
coords = coords.drop(['barcode2'], axis=1)

# add '_A1' to the end of each barcode
coords['barcode'] = coords['barcode'] + '_A1'

+ Filter out cells that are not present in spatial_coords or anndata object

In [22]:
# leave only barcodes that are in adata
coords = coords[coords['barcode'].isin(a1.obs_names)]

# delete cells from a1 that are not in spatial_coords
a1 = a1[a1.obs_names.isin(coords['barcode'])]

# Set the index of spatial_coords to 'barcode'
coords.set_index('barcode', inplace=True)

In [23]:
# The reindex method aligns the DataFrame to the specified index
coords = coords.reindex(a1.obs.index)

# Now, ordered_spatial_coords has the same order as adata.obs
# You can then assign its values to adata.obsm
a1.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

  a1.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values


In [24]:
spatial_key = "spatial"
library_id = "tissue42"
a1.uns[spatial_key] = {library_id: {}}
a1.uns[spatial_key][library_id]["images"] = {}
a1.uns[spatial_key][library_id]["images"] = {"hires": hires, "lowres": lowres}
a1.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 54.37959101469902, "tissue_hires_scalef": 0.28401023, "fiducial_diameter_fullres": 87.84395471605225, "tissue_lowres_scalef": 0.08520307}

+ Write anndata object

In [25]:
a1.write_h5ad(f'{results_dir}/A1_raw.h5ad')

##### Sample A2

+ Filter sample

In [26]:
a2 = adata[adata.obs['Sample_ID'] == 'A2', :]

+ Upload images

In [27]:
hires = np.asarray(Image.open(f'{images_dir}/GSM4797917_A2/A2/spatial/tissue_hires_image.png'))
lowres = np.asarray(Image.open(f'{images_dir}/GSM4797917_A2/A2/spatial/tissue_lowres_image.png'))

+ Upload coordinates

In [28]:
coords = pd.read_csv(
        f'{images_dir}/GSM4797917_A2/A2/spatial/tissue_positions_list.csv',
        header=None)

coords.columns = ['barcode', "in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

+ Adjust barcodes in coordinates

In [29]:
# split barcode column into two columns by '-' 
coords[['barcode', 'barcode2']] = coords.barcode.str.split("-",expand=True)

# delete barcode_2 column
coords = coords.drop(['barcode2'], axis=1)

# add '_A2' to the end of each barcode
coords['barcode'] = coords['barcode'] + '_A2'

+ Filter out cells that are not present in spatial_coords or anndata object

In [30]:
# leave only barcodes that are in adata
coords = coords[coords['barcode'].isin(a2.obs_names)]

# delete cells from a1 that are not in spatial_coords
a2 = a2[a2.obs_names.isin(coords['barcode'])]

# Set the index of spatial_coords to 'barcode'
coords.set_index('barcode', inplace=True)

In [31]:
# The reindex method aligns the DataFrame to the specified index
coords = coords.reindex(a2.obs.index)

# Now, ordered_spatial_coords has the same order as adata.obs
# You can then assign its values to adata.obsm
a2.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

  a2.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values


In [32]:
spatial_key = "spatial"
library_id = "tissue42"
a2.uns[spatial_key] = {library_id: {}}
a2.uns[spatial_key][library_id]["images"] = {}
a2.uns[spatial_key][library_id]["images"] = {"hires": hires, "lowres": lowres}
a2.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 54.039344274655456, "tissue_hires_scalef": 0.28236622, "fiducial_diameter_fullres": 87.29432536675112, "tissue_lowres_scalef": 0.08470987}

+ Write anndata object

In [33]:
a2.write_h5ad(f'{results_dir}/A2_raw.h5ad')

##### Sample A3

+ Filter sample

In [34]:
a3 = adata[adata.obs['Sample_ID'] == 'A3', :]

+ Upload images

In [38]:
hires = np.asarray(Image.open(f'{images_dir}/GSM4797918_A3/A3/spatial/tissue_hires_image.png'))
lowres = np.asarray(Image.open(f'{images_dir}/GSM4797918_A3/A3/spatial/tissue_lowres_image.png'))

+ Upload coordinates

In [39]:
coords = pd.read_csv(
        f'{images_dir}/GSM4797918_A3/A3/spatial/tissue_positions_list.csv',
        header=None)

coords.columns = ['barcode', "in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

+ Adjust barcodes in coordinates

In [40]:
# split barcode column into two columns by '-' 
coords[['barcode', 'barcode2']] = coords.barcode.str.split("-",expand=True)

# delete barcode_2 column
coords = coords.drop(['barcode2'], axis=1)

# add '_A3' to the end of each barcode
coords['barcode'] = coords['barcode'] + '_A3'

+ Filter out cells that are not present in spatial_coords or anndata object

In [41]:
# leave only barcodes that are in adata
coords = coords[coords['barcode'].isin(a3.obs_names)]

# delete cells from a1 that are not in spatial_coords
a3 = a3[a3.obs_names.isin(coords['barcode'])]

# Set the index of spatial_coords to 'barcode'
coords.set_index('barcode', inplace=True)

In [42]:
# The reindex method aligns the DataFrame to the specified index
coords = coords.reindex(a3.obs.index)

# Now, ordered_spatial_coords has the same order as adata.obs
# You can then assign its values to adata.obsm
a3.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

  a3.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values


In [43]:
spatial_key = "spatial"
library_id = "tissue42"
a3.uns[spatial_key] = {library_id: {}}
a3.uns[spatial_key][library_id]["images"] = {}
a3.uns[spatial_key][library_id]["images"] = {"hires": hires, "lowres": lowres}
a3.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 54.03811058939226, "tissue_hires_scalef": 0.28236622, "fiducial_diameter_fullres": 87.29233249055673, "tissue_lowres_scalef": 0.08470987}

+ Write anndata object

In [44]:
a3.write_h5ad(f'{results_dir}/A3_raw.h5ad')

##### Sample A4

+ Filter sample

In [45]:
a4 = adata[adata.obs['Sample_ID'] == 'A4', :]

+ Upload images

In [46]:
hires = np.asarray(Image.open(f'{images_dir}/GSM4797919_A4/A4/spatial/tissue_hires_image.png'))
lowres = np.asarray(Image.open(f'{images_dir}/GSM4797919_A4/A4/spatial/tissue_lowres_image.png'))

+ Upload coordinates

In [47]:
coords = pd.read_csv(
        f'{images_dir}/GSM4797919_A4/A4/spatial/tissue_positions_list.csv',
        header=None)

coords.columns = ['barcode', "in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

+ Adjust barcodes in coordinates

In [48]:
# split barcode column into two columns by '-' 
coords[['barcode', 'barcode2']] = coords.barcode.str.split("-",expand=True)

# delete barcode_2 column
coords = coords.drop(['barcode2'], axis=1)

# add '_A3' to the end of each barcode
coords['barcode'] = coords['barcode'] + '_A4'

+ Filter out cells that are not present in spatial_coords or anndata object

In [49]:
# leave only barcodes that are in adata
coords = coords[coords['barcode'].isin(a4.obs_names)]

# delete cells from a1 that are not in spatial_coords
a4 = a4[a4.obs_names.isin(coords['barcode'])]

# Set the index of spatial_coords to 'barcode'
coords.set_index('barcode', inplace=True)

In [50]:
# The reindex method aligns the DataFrame to the specified index
coords = coords.reindex(a4.obs.index)

# Now, ordered_spatial_coords has the same order as adata.obs
# You can then assign its values to adata.obsm
a4.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

  a4.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values


In [51]:
spatial_key = "spatial"
library_id = "tissue42"
a4.uns[spatial_key] = {library_id: {}}
a4.uns[spatial_key][library_id]["images"] = {}
a4.uns[spatial_key][library_id]["images"] = {"hires": hires, "lowres": lowres}
a4.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 54.380229959942206, "tissue_hires_scalef": 0.28058362, "fiducial_diameter_fullres": 87.84498685836817, "tissue_lowres_scalef": 0.08417509}

+ Write anndata object

In [52]:
a4.write_h5ad(f'{results_dir}/A4_raw.h5ad')

##### Sample A6

+ Filter sample

In [53]:
a5 = adata[adata.obs['Sample_ID'] == 'A6', :]

+ Upload images

In [54]:
hires = np.asarray(Image.open(f'{images_dir}/GSM4797920_A6/A6/spatial/tissue_hires_image.png'))
lowres = np.asarray(Image.open(f'{images_dir}/GSM4797920_A6/A6/spatial/tissue_lowres_image.png'))

+ Upload coordinates

In [55]:
coords = pd.read_csv(
        f'{images_dir}/GSM4797920_A6/A6/spatial/tissue_positions_list.csv',
        header=None)

coords.columns = ['barcode', "in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

+ Adjust barcodes in coordinates

In [56]:
# split barcode column into two columns by '-' 
coords[['barcode', 'barcode2']] = coords.barcode.str.split("-",expand=True)

# delete barcode_2 column
coords = coords.drop(['barcode2'], axis=1)

# add '_A5' to the end of each barcode
coords['barcode'] = coords['barcode'] + '_A6'

+ Filter out cells that are not present in spatial_coords or anndata object

In [57]:
# leave only barcodes that are in adata
coords = coords[coords['barcode'].isin(a5.obs_names)]

# delete cells from a1 that are not in spatial_coords
a5 = a5[a5.obs_names.isin(coords['barcode'])]

# Set the index of spatial_coords to 'barcode'
coords.set_index('barcode', inplace=True)

In [58]:
# The reindex method aligns the DataFrame to the specified index
coords = coords.reindex(a5.obs.index)

# Now, ordered_spatial_coords has the same order as adata.obs
# You can then assign its values to adata.obsm
a5.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

  a5.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values


In [59]:
spatial_key = "spatial"
library_id = "tissue42"
a5.uns[spatial_key] = {library_id: {}}
a5.uns[spatial_key][library_id]["images"] = {}
a5.uns[spatial_key][library_id]["images"] = {"hires": hires, "lowres": lowres}
a5.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 71.75987296531034, "tissue_hires_scalef": 0.20080322, "fiducial_diameter_fullres": 115.9197947901167, "tissue_lowres_scalef": 0.060240965}

+ Write anndata object

In [60]:
a5.write_h5ad(f'{results_dir}/A6_raw.h5ad')

##### Sample A7

+ Filter sample

In [61]:
a7 = adata[adata.obs['Sample_ID'] == 'A7', :]

+ Upload images

In [62]:
hires = np.asarray(Image.open(f'{images_dir}/GSM4797921_A7/A7/spatial/tissue_hires_image.png'))
lowres = np.asarray(Image.open(f'{images_dir}/GSM4797921_A7/A7/spatial/tissue_lowres_image.png'))

+ Upload coordinates

In [63]:
coords = pd.read_csv(
        f'{images_dir}/GSM4797921_A7/A7/spatial/tissue_positions_list.csv',
        header=None)

coords.columns = ['barcode', "in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

+ Adjust barcodes in coordinates

In [64]:
# split barcode column into two columns by '-' 
coords[['barcode', 'barcode2']] = coords.barcode.str.split("-",expand=True)

# delete barcode_2 column
coords = coords.drop(['barcode2'], axis=1)

# add '_A5' to the end of each barcode
coords['barcode'] = coords['barcode'] + '_A7'

+ Filter out cells that are not present in spatial_coords or anndata object

In [65]:
# leave only barcodes that are in adata
coords = coords[coords['barcode'].isin(a7.obs_names)]

# delete cells from a1 that are not in spatial_coords
a7 = a7[a7.obs_names.isin(coords['barcode'])]

# Set the index of spatial_coords to 'barcode'
coords.set_index('barcode', inplace=True)

In [66]:
# The reindex method aligns the DataFrame to the specified index
coords = coords.reindex(a7.obs.index)

# Now, ordered_spatial_coords has the same order as adata.obs
# You can then assign its values to adata.obsm
a7.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

  a7.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values


In [67]:
spatial_key = "spatial"
library_id = "tissue42"
a7.uns[spatial_key] = {library_id: {}}
a7.uns[spatial_key][library_id]["images"] = {}
a7.uns[spatial_key][library_id]["images"] = {"hires": hires, "lowres": lowres}
a7.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 73.76498465185034, "tissue_hires_scalef": 0.1996805, "fiducial_diameter_fullres": 119.15882136068132, "tissue_lowres_scalef": 0.059904154}

+ Write anndata object

In [68]:
a7.write_h5ad(f'{results_dir}/A7_raw.h5ad')

##### Sample A8

+ Filter sample

In [69]:
a8 = adata[adata.obs['Sample_ID'] == 'A8', :]

+ Upload images

In [70]:
hires = np.asarray(Image.open(f'{images_dir}/GSM4797922_A8/A8/spatial/tissue_hires_image.png'))
lowres = np.asarray(Image.open(f'{images_dir}/GSM4797922_A8/A8/spatial/tissue_lowres_image.png'))

+ Upload coordinates

In [71]:
coords = pd.read_csv(
        f'{images_dir}/GSM4797922_A8/A8/spatial/tissue_positions_list.csv',
        header=None)

coords.columns = ['barcode', "in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

+ Adjust barcodes in coordinates

In [72]:
# split barcode column into two columns by '-' 
coords[['barcode', 'barcode2']] = coords.barcode.str.split("-",expand=True)

# delete barcode_2 column
coords = coords.drop(['barcode2'], axis=1)

# add '_A5' to the end of each barcode
coords['barcode'] = coords['barcode'] + '_A8'

+ Filter out cells that are not present in spatial_coords or anndata object

In [73]:
# leave only barcodes that are in adata
coords = coords[coords['barcode'].isin(a8.obs_names)]

# delete cells from a1 that are not in spatial_coords
a8 = a8[a8.obs_names.isin(coords['barcode'])]

# Set the index of spatial_coords to 'barcode'
coords.set_index('barcode', inplace=True)

In [74]:
# The reindex method aligns the DataFrame to the specified index
coords = coords.reindex(a8.obs.index)

# Now, ordered_spatial_coords has the same order as adata.obs
# You can then assign its values to adata.obsm
a8.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

  a8.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values


In [75]:
spatial_key = "spatial"
library_id = "tissue42"
a8.uns[spatial_key] = {library_id: {}}
a8.uns[spatial_key][library_id]["images"] = {}
a8.uns[spatial_key][library_id]["images"] = {"hires": hires, "lowres": lowres}
a8.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 78.27009331746386, "tissue_hires_scalef": 0.19922303, "fiducial_diameter_fullres": 126.43630458974931, "tissue_lowres_scalef": 0.059766907}

+ Write anndata object

In [76]:
a8.write_h5ad(f'{results_dir}/A8_raw.h5ad')

##### Sample A9

+ Filter sample

In [77]:
a9 = adata[adata.obs['Sample_ID'] == 'A9', :]

+ Upload images

In [78]:
hires = np.asarray(Image.open(f'{images_dir}/GSM4797923_A9/A9/spatial/tissue_hires_image.png'))
lowres = np.asarray(Image.open(f'{images_dir}/GSM4797923_A9/A9/spatial/tissue_lowres_image.png'))

+ Upload coordinates

In [79]:
coords = pd.read_csv(
        f'{images_dir}/GSM4797923_A9/A9/spatial/tissue_positions_list.csv',
        header=None)

coords.columns = ['barcode', "in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

+ Adjust barcodes in coordinates

In [80]:
# split barcode column into two columns by '-' 
coords[['barcode', 'barcode2']] = coords.barcode.str.split("-",expand=True)

# delete barcode_2 column
coords = coords.drop(['barcode2'], axis=1)

# merge '_A9' to the barcode
coords['barcode'] = coords['barcode'].astype(str) + '_A9'

+ Filter out cells that are not present in spatial_coords or anndata object

In [81]:
# leave only barcodes that are in adata
coords = coords[coords['barcode'].isin(a9.obs_names)]

# delete cells from a1 that are not in spatial_coords
a9 = a9[a9.obs_names.isin(coords['barcode'])]

# Set the index of spatial_coords to 'barcode'
coords.set_index('barcode', inplace=True)

In [82]:
# The reindex method aligns the DataFrame to the specified index
coords = coords.reindex(a9.obs.index)

# Now, ordered_spatial_coords has the same order as adata.obs
# You can then assign its values to adata.obsm
a9.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

  a9.obsm['spatial'] = coords[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values


In [83]:
spatial_key = "spatial"
library_id = "tissue42"
a9.uns[spatial_key] = {library_id: {}}
a9.uns[spatial_key][library_id]["images"] = {}
a9.uns[spatial_key][library_id]["images"] = {"hires": hires, "lowres": lowres}
a9.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 73.99311369743836, "tissue_hires_scalef": 0.20671834, "fiducial_diameter_fullres": 119.52733751124659, "tissue_lowres_scalef": 0.062015504}

+ Write anndata object

In [84]:
a9.write_h5ad(f'{results_dir}/A9_raw.h5ad')