# stLearn - Integrate multiple spatial transcriptomics datasets

In this tutorial, we will integrate two related samples together.



In [None]:
import stlearn as st
import scanpy as sc
import numpy as np
import harmonypy as hm

st.settings.set_figure_params(dpi=150)

## Read data

In this tutorial, we are using the Breast cancer datasets with 2 sections of block A.

Source:
 * https://www.10xgenomics.com/datasets/human-breast-cancer-block-a-section-1-1-standard-1-1-0 and
 * https://www.10xgenomics.com/datasets/human-breast-cancer-block-a-section-2-1-standard-1-1-0


In [None]:
block1 = sc.datasets.visium_sge(sample_id="V1_Breast_Cancer_Block_A_Section_1")
block1 = st.convert_scanpy(block1)

In [None]:
block2 = sc.datasets.visium_sge(sample_id="V1_Breast_Cancer_Block_A_Section_2")
block2 = st.convert_scanpy(block2)

## Processing data

In [None]:
# concatenate 2 samples
adata_concat = block1.concatenate(block2)

In [None]:
# Preprocessing
# Filter genes
sc.pp.filter_genes(adata_concat, min_cells=3)
# Normalize data
sc.pp.normalize_total(adata_concat, target_sum=1e4)
# Log transformation
sc.pp.log1p(adata_concat)
# Store raw data
adata_concat.raw = adata_concat
# Extract top highly variable genes
sc.pp.highly_variable_genes(adata_concat, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata_concat = adata_concat[:, adata_concat.var.highly_variable]
# Scale data
sc.pp.scale(adata_concat, max_value=10)

In [None]:
# Run dimensionality reduction
sc.pp.pca(adata_concat, n_comps=30, svd_solver='arpack')

## Run integration with harmony

In [None]:
# Prepare metadata and PCA
meta_data = adata_concat.obs
data_mat = adata_concat.obsm["X_pca"]

In [None]:
# Run harmony
ho = hm.run_harmony(data_mat, meta_data, "batch")

In [None]:
# Mapping back the result to the adata object
adata_concat.obsm["X_pca"] = ho.Z_corr.T

## Perform clustering and visualize the results by UMAP

In [None]:
# Build KNN and run UMAP
sc.pp.neighbors(adata_concat, n_pcs=30)
sc.tl.umap(adata_concat)

In [None]:
# Run clustering with leiden
sc.tl.leiden(adata_concat, resolution=0.4)

In [None]:
# Plotting UMAP
sc.pl.umap(adata_concat, color=["batch","leiden"])

## Map the result back to the original samples


In [None]:
st.settings.set_figure_params(dpi=150)
# Map leiden clusteirng result to block A section 1
block1.obs["leiden"] = adata_concat.obs[adata_concat.obs.batch=="0"].leiden.values

In [None]:
# Plotting the clusteirng result
st.pl.cluster_plot(block1,use_label="leiden")

In [None]:
# Map leiden clustering result to block A section 2
block2.obs["leiden"] = adata_concat.obs[adata_concat.obs.batch=="1"].leiden.values

In [None]:
# Plotting the clustering result
st.pl.cluster_plot(block2,use_label="leiden")

## Manually combine the images and change the coordinates

This is a way to plotting 2 samples in the same adata object. 

In [None]:
# Initialize the spatial 
adata_concat.uns["spatial"] = block1.uns["spatial"]

In [None]:
# Horizontally stack 2 images from section 1 and section 2 datasets
combined = np.hstack([block1.uns["spatial"]["V1_Breast_Cancer_Block_A_Section_1"]["images"]["hires"],
                      block2.uns["spatial"]["V1_Breast_Cancer_Block_A_Section_2"]["images"]["hires"]])

In [None]:
# Map the image to the concatnated adata object
adata_concat.uns["spatial"]["V1_Breast_Cancer_Block_A_Section_1"]["images"]["hires"] = combined

In [None]:
# Manually change the coordinate of spots to the right
adata_concat.obs.loc[adata_concat.obs.batch == "1","imagecol"] = adata_concat.obs.loc[adata_concat.obs.batch == "1","imagecol"].values + 2000

In [None]:
# Change to the .obsm["spatial"]
factor = adata_concat.uns["spatial"]["V1_Breast_Cancer_Block_A_Section_1"]["scalefactors"]["tissue_hires_scalef"]
adata_concat.obsm["spatial"] = adata_concat.obs[["imagecol","imagerow"]].values / factor

In [None]:
st.settings.set_figure_params(dpi=200)

In [None]:
# Plot the gene
st.pl.gene_plot(adata_concat, gene_symbols="KRT5",crop=False, size=1.4,cell_alpha=1)

In [None]:
# Plot the clusters
st.pl.cluster_plot(adata_concat, use_label="leiden", crop=False, size=1.4,cell_alpha=1)