## Notebook for Peng-2022 data spatial transcriptomics data processing 
### Developed by: Anna Maguza

### Institute of Computational Biology - Computational Health Centre - Hemlholtz Munich

### 11 November 2022

#### Upload necessary packages

In [None]:
import anndata as ad
import scanpy as sc
import squidpy as sq
import pandas as pd
from scipy.io import mmread
import matplotlib.pyplot as plt
import numpy as np
import scipy as sci
from scipy.sparse import coo_matrix
from PIL import Image

#### Setup cells

In [None]:
sc.logging.print_header()
print(f"squidpy=={sq.__version__}")

# Colorectal cancer picture 1

#### Data Upload 

In [None]:
#Uploading barcodes
barcodes =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon1/barcodes.tsv',sep='\t', header=None, index_col=0)

In [None]:
hires = np.asarray(Image.open('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon1/tissue_hires_image.png'))

In [None]:
lowres = np.asarray(Image.open('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon1/tissue_lowres_image.png'))

In [None]:
#Uploading coordinates
coordinates =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon1/coordinates.tsv',delim_whitespace=True, index_col=0)

In [None]:
#Uploading genes
genes =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon1/genes.tsv',sep='\t', header=None)

In [None]:
#Uploading matrix
matrix = mmread('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon1/matrix.mtx')

In [None]:
matrix1 = sci.sparse.csr_matrix(matrix)

In [None]:
coords = pd.read_csv(
        "/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon1/tissue_positions_list.csv",
        header=None,
        index_col=0,)

In [None]:
coords.columns = ["in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

In [None]:
Peng = ad.AnnData(X=np.transpose(matrix1), var=genes, obs=barcodes, obsm={"spatial": coordinates})

In [None]:
Peng.obs.index.name = "index"

In [None]:
Peng.obs = pd.merge(Peng.obs, coords, how="left", left_index=True, right_index=True)

In [None]:

Peng.obsm["spatial"] = Peng.obs[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

In [None]:
Peng.obs.drop(columns=["pxl_row_in_fullres", "pxl_col_in_fullres"], inplace=True)

In [None]:
spatial_key = "spatial"
library_id = "tissue42"
Peng.uns[spatial_key] = {library_id: {}}
Peng.uns[spatial_key][library_id]["images"] = {}
Peng.uns[spatial_key][library_id]["images"] = {"hires": hires, "lowres": lowres}
Peng.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 130.2320992627276, "tissue_hires_scalef": 0.12207031, "fiducial_diameter_fullres": 210.37492957825225, "tissue_lowres_scalef": 0.036621094}

In [None]:
sc.pp.normalize_total(Peng)

In [None]:
sc.pp.log1p(Peng)

In [None]:
sc.pp.pca(Peng)

In [None]:
sc.pp.neighbors(Peng)

In [None]:
sc.tl.umap(Peng)

In [None]:
sc.tl.leiden(Peng)

In [None]:
sq.pl.spatial_scatter(Peng, color="leiden", shape=None, figsize=(10, 10))

In [None]:
sq.pl.spatial_scatter(Peng, color="leiden")

In [None]:
sq.gr.spatial_neighbors(Peng, coord_type="generic")
sq.gr.nhood_enrichment(Peng, cluster_key="leiden")
sq.pl.nhood_enrichment(Peng, cluster_key="leiden", method="ward")

# Colorectal cancer picture 2

#### Data Upload 

In [None]:
#Uploading barcodes
barcodes2 =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon2/barcodes.tsv',sep='\t', header=None, index_col=0)

In [None]:
hires2 = np.asarray(Image.open('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon2/tissue_hires_image.png'))

In [None]:
lowres2 = np.asarray(Image.open('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon2/tissue_lowres_image.png'))

In [None]:
#Uploading coordinates
coordinates2 =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon2/coordinates.tsv',delim_whitespace=True, index_col=0)

In [None]:
#Uploading genes
genes2 =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon2/genes.tsv',sep='\t', header=None)

In [None]:
#Uploading matrix
matrix2 = mmread('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon2/matrix.mtx')

In [None]:
matrix2 = sci.sparse.csr_matrix(matrix2)

In [None]:
coords2 = pd.read_csv(
        "/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon2/tissue_positions_list.csv",
        header=None,
        index_col=0,)

In [None]:
coords2.columns = ["in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

In [None]:
Peng2 = ad.AnnData(X=np.transpose(matrix2), var=genes2, obs=barcodes2, obsm={"spatial": coordinates2})

In [None]:
Peng2.obs.index.name = "index"

In [None]:
Peng2.obs = pd.merge(Peng2.obs, coords2, how="left", left_index=True, right_index=True)

In [None]:

Peng2.obsm["spatial"] = Peng2.obs[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

In [None]:
Peng2.obs.drop(columns=["pxl_row_in_fullres", "pxl_col_in_fullres"], inplace=True)

In [None]:
spatial_key = "spatial"
library_id = "tissue42"
Peng2.uns[spatial_key] = {library_id: {}}
Peng2.uns[spatial_key][library_id]["images"] = {}
Peng2.uns[spatial_key][library_id]["images"] = {"hires": hires2, "lowres": lowres2}
Peng2.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 130.15841600880097, "tissue_hires_scalef": 0.12112403, "fiducial_diameter_fullres": 210.25590278344774, "tissue_lowres_scalef": 0.036337208}

In [None]:
sc.pp.normalize_total(Peng2)

In [None]:
sc.pp.log1p(Peng2)

In [None]:
sc.pp.pca(Peng2)

In [None]:
sc.pp.neighbors(Peng2)

In [None]:
sc.tl.umap(Peng2)

In [None]:
sc.tl.leiden(Peng2)

In [None]:
sq.pl.spatial_scatter(Peng2, color="leiden", shape=None, figsize=(10, 10))

In [None]:
sq.pl.spatial_scatter(Peng2, color="leiden")

In [None]:
sq.gr.spatial_neighbors(Peng2, coord_type="generic")
sq.gr.nhood_enrichment(Peng2, cluster_key="leiden")
sq.pl.nhood_enrichment(Peng2, cluster_key="leiden", method="ward")

# Colorectal cancer picture 3

#### Data Upload 

In [None]:
#Uploading barcodes
barcodes3 =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon3/barcodes.tsv',sep='\t', header=None, index_col=0)

In [None]:
hires3 = np.asarray(Image.open('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon3/tissue_hires_image.png'))

In [None]:
lowres3 = np.asarray(Image.open('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon3/tissue_lowres_image.png'))

In [None]:
#Uploading coordinates
coordinates3 =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon3/coordinates.tsv',delim_whitespace=True, index_col=0)

In [None]:
#Uploading genes
genes3 =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon3/genes.tsv',sep='\t', header=None)

In [None]:
#Uploading matrix
matrix3 = mmread('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon3/matrix.mtx')

In [None]:
matrix3 = sci.sparse.csr_matrix(matrix3)

In [None]:
coords3 = pd.read_csv(
        "/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon3/tissue_positions_list.csv",
        header=None,
        index_col=0,)

In [None]:
coords3.columns = ["in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

In [None]:
Peng3 = ad.AnnData(X=np.transpose(matrix3), var=genes3, obs=barcodes3, obsm={"spatial": coordinates3})

In [None]:
Peng3.obs.index.name = "index"

In [None]:
Peng3.obs = pd.merge(Peng3.obs, coords3, how="left", left_index=True, right_index=True)

In [None]:

Peng3.obsm["spatial"] = Peng3.obs[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

In [None]:
spatial_key = "spatial"
library_id = "tissue42"
Peng3.uns[spatial_key] = {library_id: {}}
Peng3.uns[spatial_key][library_id]["images"] = {}
Peng3.uns[spatial_key][library_id]["images"] = {"hires": hires3, "lowres": lowres3}
Peng3.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 130.15207564120408, "tissue_hires_scalef": 0.12112403, "fiducial_diameter_fullres": 210.24566065117583, "tissue_lowres_scalef": 0.036337208}

In [None]:
sc.pp.normalize_total(Peng3)

In [None]:
sc.pp.log1p(Peng3)

In [None]:
sc.pp.pca(Peng3)

In [None]:
sc.pp.neighbors(Peng3)

In [None]:
sc.tl.umap(Peng3)

In [None]:
sc.tl.leiden(Peng3)

In [None]:
sq.pl.spatial_scatter(Peng3, color="leiden", shape=None, figsize=(10, 10))

In [None]:
sq.pl.spatial_scatter(Peng3, color="leiden", figsize=(10, 10))

In [None]:
sq.gr.spatial_neighbors(Peng3, coord_type="generic")
sq.gr.nhood_enrichment(Peng3, cluster_key="leiden")
sq.pl.nhood_enrichment(Peng3, cluster_key="leiden", method="ward")

# Colorectal cancer picture 4

#### Data Upload 

In [None]:
#Uploading barcodes
barcodes4 =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon4/barcodes.tsv',sep='\t', header=None, index_col=0)

In [None]:
hires4 = np.asarray(Image.open('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon4/tissue_hires_image.png'))

In [None]:
lowres4 = np.asarray(Image.open('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon4/tissue_lowres_image.png'))

In [None]:
#Uploading coordinates
coordinates4 =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon4/coordinates.tsv',delim_whitespace=True, index_col=0)

In [None]:
#Uploading genes
genes4 =pd.read_csv('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon4/genes.tsv',sep='\t', header=None)

In [None]:
#Uploading matrix
matrix4 = mmread('/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon4/matrix.mtx')

In [None]:
matrix4 = sci.sparse.csr_matrix(matrix4)

In [None]:
coords4 = pd.read_csv(
        "/Users/annamaguza/Desktop/Desktop-Anna/LMU/Master-Thesis/Spatial transcriptomics data/ST/ST-colon4/tissue_positions_list.csv",
        header=None,
        index_col=0,)

In [None]:
coords4.columns = ["in_tissue", "array_row", "array_col", "pxl_col_in_fullres", "pxl_row_in_fullres"]

In [None]:
Peng4 = ad.AnnData(X=np.transpose(matrix4), var=genes4, obs=barcodes4, obsm={"spatial": coordinates4})

In [None]:
Peng4.obs.index.name = "index"

In [None]:
Peng4.obs = pd.merge(Peng4.obs, coords4, how="left", left_index=True, right_index=True)

In [None]:

Peng4.obsm["spatial"] = Peng4.obs[["pxl_row_in_fullres", "pxl_col_in_fullres"]].values

In [None]:
spatial_key = "spatial"
library_id = "tissue42"
Peng4.uns[spatial_key] = {library_id: {}}
Peng4.uns[spatial_key][library_id]["images"] = {}
Peng4.uns[spatial_key][library_id]["images"] = {"hires": hires4, "lowres": lowres4}
Peng4.uns[spatial_key][library_id]["scalefactors"] = {"spot_diameter_fullres": 130.0637145542045, "tissue_hires_scalef": 0.12207031, "fiducial_diameter_fullres": 210.10292351063802, "tissue_lowres_scalef": 0.036621094}

In [None]:
sc.pp.normalize_total(Peng4)

In [None]:
sc.pp.log1p(Peng4)

In [None]:
sc.pp.pca(Peng4)

In [None]:
sc.pp.neighbors(Peng4)

In [None]:
sc.tl.umap(Peng4)

In [None]:
sc.tl.leiden(Peng4)

In [None]:
sq.pl.spatial_scatter(Peng4, color="leiden", shape=None, figsize=(10, 10))

In [None]:
sq.pl.spatial_scatter(Peng4, color="leiden", figsize=(10, 10))

In [None]:
sq.gr.spatial_neighbors(Peng4, coord_type="generic")
sq.gr.nhood_enrichment(Peng4, cluster_key="leiden")
sq.pl.nhood_enrichment(Peng4, cluster_key="leiden", method="ward")