# Spatial Statistics with Voyager

Based on the following tutorials:
* https://pachterlab.github.io/voyager/articles/visium_10x.html
* https://pachterlab.github.io/voyager/articles/vig1_visium_basic.html
* https://pachterlab.github.io/voyager/articles/vig2_visium.html
* https://pachterlab.github.io/voyager/articles/visium_10x_spatial.html
* https://pachterlab.github.io/voyager/articles/multispati.html

In [None]:
library(dplyr)
library(Voyager)
library(SpatialExperiment)
library(SpatialFeatureExperiment)
library(SingleCellExperiment)
library(ggplot2)
library(scater)
library(rlang)
library(scran)
library(scuttle)
library(terra)
library(sf)
library(rmapshaper)
library(scran)
library(stringr)
library(EBImage)
library(patchwork)
library(bluster)
library(rjson)
theme_set(theme_bw())

In [None]:
# Layout
custom_theme <- function() {
  theme_bw() +
    theme(
      legend.text = element_text(size = 14),
      legend.title = element_text(size = 16, face = "bold"),
      axis.text = element_text(size = 12),
      axis.title = element_text(size = 14, face = "bold"),
      legend.position = "right",
      legend.box.just = "right"
    )
}
options(repr.plot.width = 20, repr.plot.height = 16)

In [None]:
data_dir <- R.utils::getAbsolutePath('../../data')
mouse_dir <- glue::glue("{data_dir}/Visium_Mouse_Olfactory_Bulb/outs")

## Visium Files

### Scale Factors

The scalefactors_json.json file contains image metadata:
* **tissue_hires_scalef** and **tissue_lowres_scalef** are the ratio of the size of the high resolution (but not full resolution) and low resolution H&E image to the full resolution image.
* **fiducial_diameter_fullres** is the diameter of each fiducial spot used to align the spots to the H&E image in pixels in the full resolution image.
* **spot_diameter_fullres** is the diameter of each Visium spot in the full resolution H&E image in pixels. 

In [None]:
fromJSON(file = glue::glue("{mouse_dir}/spatial/scalefactors_json.json"))

### Tissue Metadata

The tissue_positions_list.csv file contains information about each spot/barcode:
* **in_tissue** indicates whether each spot is in tissue (in_tissue, 1 means yes and 0 means no) as automatically detected by 
Space Ranger or manually annotated in the Loupe browser.
* **array_row** and **array_col** are the coordinates on the matrix of spots,
* **pxl_row_in_fullres** and **pxl_col_in_fullres** are the coordinates of the spots in the full resolution 
image.

In [None]:
head(read.csv(glue::glue("{mouse_dir}/spatial/tissue_positions.csv")))

# Read Visium Data

In [None]:
#Original way to load Visium Data
#raw_sfe <- SpatialFeatureExperiment::read10xVisiumSFE(dirs = mouse_dir, samples = ".", type = "sparse", data = "raw")
#Voyager::plotImage(raw_sfe)
#transposed_raw_sfe <- SpatialFeatureExperiment::transpose(raw_sfe)

In [None]:
# Read pre-processed file
raw_sfe <- readRDS(glue::glue("{data_dir}/Visium_Mouse_Olfactory_Bulb.rds"))
transposed_raw_sfe <- raw_sfe

# Perform QC

In [None]:
is_mt <- str_detect(rowData(transposed_raw_sfe)$symbol, "^mt-")
sum(is_mt)
colData(transposed_raw_sfe)$nGenes <- colSums(counts(transposed_raw_sfe) > 0)
colData(transposed_raw_sfe)$nCounts <- colSums(counts(transposed_raw_sfe))
qc_sfe <- scuttle::addPerCellQCMetrics(transposed_raw_sfe, subsets = list(mito = is_mt))
# Normally MITO % is set to 20 - see what effect this has compared to 30.
processed_sfe <- transposed_raw_sfe[, qc_sfe$subsets_mito_percent < 20]
processed_sfe <- processed_sfe[rowSums(counts(processed_sfe)) > 0,]
colData(processed_sfe)$nCounts <- colSums(counts(processed_sfe))

In [None]:
p1 <- scater::plotColData(transposed_raw_sfe, "sum", x = "in_tissue", color_by = "in_tissue") + custom_theme()
p2 <- scater::plotColData(transposed_raw_sfe, "detected", x = "in_tissue", color_by = "in_tissue") + custom_theme()
p3 <- scater::plotColData(transposed_raw_sfe, "subsets_mito_percent", x = "in_tissue", color_by = "in_tissue") + custom_theme()
plot <- (p1 + p2 + p3) + patchwork::plot_layout(ncol = 3, guides = "collect")
show_plot(plot, "qc_1.png")

In [None]:
plot <- scater::plotColData(transposed_raw_sfe, x = "sum", y = "subsets_mito_percent", bins = 100) + custom_theme()
show_plot(plot, "qc_2.png")

In [None]:
p1 <- scater::plotColData(processed_sfe, "sum", x = "in_tissue", color_by = "in_tissue") + custom_theme()
p2 <- scater::plotColData(processed_sfe, "detected", x = "in_tissue", color_by = "in_tissue") + custom_theme()
p3 <- scater::plotColData(processed_sfe, "subsets_mito_percent", x = "in_tissue", color_by = "in_tissue") + custom_theme()
plot <- (p1 + p2 + p3) + patchwork::plot_layout(ncol = 3, guides = "collect")
show_plot(plot, "qc_3.png")

In [None]:
plot <- scater::plotColData(processed_sfe, x = "sum", y = "subsets_mito_percent", bins = 100) + custom_theme()
show_plot(plot, "qc_4.png")

# Before and after QC by Percentage

In [None]:
p1 <- Voyager::plotSpatialFeature(qc_sfe, c("sum"), image_id = "lowres", maxcell = 5e4, ncol = 2) + custom_theme()
p2 <- Voyager::plotSpatialFeature(qc_sfe, c("detected"), image_id = "lowres", maxcell = 5e4, ncol = 2) + custom_theme()
p3 <- Voyager::plotSpatialFeature(qc_sfe, c("subsets_mito_percent"), image_id = "lowres", maxcell = 5e4, ncol = 2) + custom_theme()
p4 <- Voyager::plotSpatialFeature(processed_sfe, c("sum"), image_id = "lowres", maxcell = 5e4, ncol = 2) + custom_theme()
p5 <- Voyager::plotSpatialFeature(processed_sfe, c("detected"), image_id = "lowres", maxcell = 5e4, ncol = 2) + custom_theme()
p6 <- Voyager::plotSpatialFeature(processed_sfe, c("subsets_mito_percent"), image_id = "lowres", maxcell = 5e4, ncol = 2) + custom_theme()
plot <- (p1 + p2 + p3 + p4 + p5 + p6) + patchwork::plot_layout(ncol = 2, guides = "collect")
show_plot(plot, "qc_5.png", width = 1600, height = 800, res = 100)

# Plotting Metrics in Space

## Counts Per Spot in and out of Tissue

Plot the total unique molecular identifier (UMI) counts per spot.

In [None]:
violin <- plotColData(processed_sfe, "nCounts", x = "in_tissue", colour_by = "in_tissue") +
    theme(legend.position = "top") + custom_theme()
spatial <- plotSpatialFeature(processed_sfe, "nCounts", colGeometryName = "spotPoly",
                              annotGeometryName = "tissueBoundary", 
                              image = "lowres", maxcell = 5e4,
                              annot_fixed = list(fill = NA, color = "black")) + custom_theme()
violin + spatial

In [None]:
colData(processed_sfe)$nGenes <- colSums(counts(processed_sfe) > 0)

In [None]:
violin <- scater::plotColData(processed_sfe, "nGenes", x = "in_tissue", colour_by = "in_tissue") +
    theme(legend.position = "top") + custom_theme()
spatial <- Voyager::plotSpatialFeature(processed_sfe, "nGenes", colGeometryName = "spotPoly",
                              annotGeometryName = "tissueBoundary",
                              image = "lowres", maxcell = 5e4,
                              annot_fixed = list(fill = NA, color = "black")) + custom_theme()
violin + spatial

In [None]:
scater::plotColData(processed_sfe, x = "nCounts", y = "nGenes", colour_by = "in_tissue") + custom_theme()

In [None]:
mito_ind <- str_detect(rowData(processed_sfe)$symbol, "^mt-")
colData(processed_sfe)$prop_mito <- colSums(counts(processed_sfe)[mito_ind,]) / colData(processed_sfe)$nCounts

In [None]:
violin <- scater::plotColData(processed_sfe, "prop_mito", x = "in_tissue", colour_by = "in_tissue") +
    theme(legend.position = "top") + custom_theme()
spatial <- Voyager::plotSpatialFeature(processed_sfe, "prop_mito", colGeometryName = "spotPoly",
                              annotGeometryName = "tissueBoundary",
                              image = "lowres", maxcell = 5e4,
                              annot_fixed = list(fill = NA, color = "black")) + custom_theme()
violin + spatial

# Only use in_tissue spots

In [None]:
sfe_tissue <- processed_sfe[, colData(processed_sfe)$in_tissue]
sfe_tissue <- sfe_tissue[rowSums(counts(sfe_tissue)) > 0,]

In [None]:
rowData(sfe_tissue)$means <- rowMeans(counts(sfe_tissue))
rowData(sfe_tissue)$vars <- rowVars(counts(sfe_tissue))
# Coefficient of variance
rowData(sfe_tissue)$cv2 <- rowData(sfe_tissue)$vars/rowData(sfe_tissue)$means^2

In [None]:
scater::plotRowData(sfe_tissue, x = "means", y = "vars", bins = 50) +
    ggplot2::geom_abline(slope = 1, intercept = 0, color = "red") +
    ggplot2::scale_x_log10() + ggplot2::scale_y_log10() +
    ggplot2::scale_fill_distiller(palette = "Blues", direction = 1) +
    ggplot2::annotation_logticks() +
    ggplot2::coord_equal() + custom_theme()

In [None]:
sfe_tissue <- scuttle::logNormCounts(sfe_tissue)

In [None]:
dec <- scran::modelGeneVar(sfe_tissue, lowess = FALSE)
hvgs <- scran::getTopHVGs(dec, n = 2000)

In [None]:
sfe_tissue <- BiocSingular::runPCA(sfe_tissue, ncomponents = 30, subset_row = hvgs, scale = TRUE)

In [None]:
Voyager::ElbowPlot(sfe_tissue, ndims = 30) + custom_theme()

In [None]:
plots <- Voyager::plotDimLoadings(sfe_tissue, dims = 1:5, swap_rownames = "symbol", ncol = 3)
plots <- wrap_plots(plots) & custom_theme()
plots

In [None]:
set.seed(29)
colData(sfe_tissue)$cluster <- bluster::clusterRows(
    reducedDim(sfe_tissue, "PCA")[,1:3], 
    BLUSPARAM = SNNGraphParam(cluster.fun = "leiden", 
    cluster.args = list(resolution_parameter = 0.5, objective_function = "modularity"))
)

In [None]:
scater::plotPCA(sfe_tissue, ncomponents = 5, colour_by = "cluster") + custom_theme()

In [None]:
Voyager::plotSpatialFeature(sfe_tissue, features = "cluster", colGeometryName = "spotPoly", image_id = "lowres") + custom_theme()

In [None]:
plots <- Voyager::spatialReducedDim(sfe_tissue, "PCA", ncomponents = 5, 
                  colGeometryName = "spotPoly", divergent = TRUE, 
                  diverge_center = 0, ncol = 2, 
                  image_id = "lowres", maxcell = 5e4)
plots <- wrap_plots(plots) & custom_theme()
plots

In [None]:
set.seed(29)
sfe_tissue <- scater::runUMAP(sfe_tissue, dimred = "PCA", n_dimred = 3)

In [None]:
scater::plotUMAP(sfe_tissue, colour_by = "cluster") + custom_theme()

In [None]:
markers <- scran::findMarkers(sfe_tissue, groups = colData(sfe_tissue)$cluster,
                       test.type = "wilcox", pval.type = "all", direction = "up")

In [None]:
genes_use <- vapply(markers, function(x) rownames(x)[1], FUN.VALUE = character(1))
plots <- scater::plotExpression(sfe_tissue, rowData(sfe_tissue)[genes_use, "symbol"], x = "cluster",
               colour_by = "cluster", swap_rownames = "symbol")
plots <- wrap_plots(plots) & custom_theme()
plots

In [None]:
plots <- Voyager::plotSpatialFeature(sfe_tissue, genes_use, colGeometryName = "spotPoly", ncol = 2,
                   swap_rownames = "symbol", image_id = "lowres", maxcell = 5e4) + custom_theme()
plots <- wrap_plots(plots) & custom_theme()
plots

In [None]:
sp <- SpatialFeatureExperiment::spotPoly(sfe_tissue)

In [None]:
SpatialFeatureExperiment::dimGeometry(sfe_tissue, "spotPoly", MARGIN = 2) <- sp

In [None]:
(tb <- SpatialFeatureExperiment::annotGeometry(sfe_tissue, "tissueBoundary"))

In [None]:
plot(sf::st_geometry(tb))
plot(sp, col = "gray", add = TRUE)

In [None]:
Voyager::plotSpatialFeature(sfe_tissue, features = "nCounts", colGeometryName = "spotPoly", annotGeometryName = "tissueBoundary", 
                            aes_use = "color", linewidth = 0.5, fill = NA) + custom_theme()

# Spatial Statistics

The intuition behind spatial statistics is that nearer things are more closely related than more distant things. For example, the weather in Brisbane and the Sunshine Coast are more similar than the weather in Melbourne. Spatial autocorrelation has been used for decades in [geographical information systems (GIS)](https://dces.wisc.edu/wp-content/uploads/sites/128/2013/08/W5_Getis2008.pdf) applied to areas such analysis of air pollution, water quality, or soil properties. A good example of using geographical, [spatial exploratory and confirmatory analysis](https://en.wikipedia.org/wiki/Exploratory_data_analysis) to improve cancer screening can be found in, "[Spatial evaluation of prevalence, pattern and predictors of cervical cancer screening in India](https://www.sciencedirect.com/science/article/pii/S003335061930294X#sec2.2)".

Univariate, bivariate and multivariate spatial correlation measures the degree of spatial dependence or clustering for a single variable, two variables or all variables across different locations. It quantifies whether values of a variable at nearby locations are more similar or dissimilar than expected by chance.

For example, Moran’s I is similar to the Pearson correlation between the value at each location and the average value at its neighbors. Like Pearson correlation, Moran’s I is generally bound between -1 and 1, where positive value indicates positive spatial autocorrelation and negative value indicates negative spatial autocorrelation.

To determine if the spatial autocorrelation is statistically significant, the [moran.test](https://r-spatial.github.io/spdep/reference/moran.test.html) function in [spdep](https://github.com/r-spatial/spdep) is used. It provides a p-value, but the p-value may not be accurate if the data is not normally distributed. Gene expression data is generally not normally distributed and data normalization doesn’t always work well. Instead, permutation testing is used to generate the significance of Moran’s I and Geary’s C.

Types of spatial correlation:
* Univariate
  * Global
      * Moran's I
      * Geary's C
      * Carrelogram
      * Varigram
  * Local
      * Moran Scatter Plot
      * Local Moran's I
      * Local spatial heteroscedasticity
      * Getis-Ord Gi
* Bivariate
    *  Lee's L
    *  Cross Variogram
* Multivariate
    * MULTISPATI PCA
    * Multivariate local Geary's C

A list of spatial statistic functions available for use:

In [None]:
Voyager::listSFEMethods(variate = "uni", scope = "global")

The "moran.test" and "geary.test" refer to autocorrelation functions that provide a p-value. The test statistic, known as the standard deviate of Moran's I, is assumed to follow a standard normal distribution (z-distribution) when the null hypothesis is true. However, the p-value may not be accurate if the data is not normally distributed, which is often the case with gene expression data. 

The "moran.mc" and "geary.mc" perform permutation testing using a Monte Carlo simulation to calculate the p-value. Permutation testing is a robust approach for assessing the significance of spatial correlation, especially when the data is not normally distributed. It provides a reliable way to determine if the observed spatial patterns are likely to have arisen by chance or if they reflect meaningful spatial relationships.

Testing is performed by randomly shuffling the values of the variable across the locations multiple times (e.g. 999 times) and recalculating Moran's I for each permutation. This creates a reference distribution of Moran's I values under the null hypothesis of no spatial correlation. The observed Moran's I value is then compared to this reference distribution. If the observed value falls in the extreme tails of the distribution (e.g., top 5% or bottom 5%), it suggests that the spatial correlation is statistically significant and unlikely to have occurred by chance.

## Generating a Spatial Neighbourhood Graph

A spatial neighbourhood graph is required to be generated in order to use spatial correlation. For Visium, where the spots 
are in a hexagonal grid, the spatial neighborhood graph is straightforward (we're using [tri2.nb](https://r-spatial.github.io/spdep/reference/tri2nb.html)).

We'll compare generating a graph for the entire sample (with spots removed), the tissue of interest and the tissue of interest with spots removed.

In [None]:
(g_all <- SpatialFeatureExperiment::findSpatialNeighbors(processed_sfe, MARGIN = 2, method = "tri2nb"))

In [None]:
(g_specific <- SpatialFeatureExperiment::findSpatialNeighbors(sfe_tissue, MARGIN = 2, method = "tri2nb"))

In [None]:
spatialGraph(processed_sfe, "graph1", MARGIN = 2) <- g_all

In [None]:
spatialGraph(sfe_tissue, "graph1", MARGIN = 2) <- g_specific

In [None]:
colGraph(sfe_tissue, "visium") <- SpatialFeatureExperiment::findVisiumGraph(sfe_tissue, zero.policy = TRUE)

## Visualising the Neighbourhood Graphs

The following shows the new neighbourhood graph generated from the original Visium spots and the Visium spots with QC applied to remove
some of the spots.

In [None]:
plot(colGraph(processed_sfe, "graph1"), coords = spatialCoords(processed_sfe))

In [None]:
plot(colGraph(sfe_tissue, "graph1"), coords = spatialCoords(sfe_tissue))

In [None]:
plot(colGraph(sfe_tissue, "visium"), coords = spatialCoords(sfe_tissue))

## Global Spatial Correlation

Global spatial correlation looks at the entire dataset to assess whether it is clustered, dispersed, or randomly assigned.

### Using Moran's I

A global Moran's I value will give an indication if values in the dataset cluster together (positive), dispersed (negative values) or if the value is close to zero indicate that they do not cluster together. As it is a global calculation, it does not indicate clusters or outliers, it is just the characteristic of the dataset.

As an example, we'll calculate the top 2000 highly variable genes and see if they are spatially correlated.

In [None]:
dec <- scran::modelGeneVar(sfe_tissue)
hvgs <- scran::getTopHVGs(dec, n = 2000)

In [None]:
# sfe_tissue <- Voyager::calculateUnivariate(t(colData(sfe_tissue)[,c("nCounts", "nGenes")]), type = "moran", listw = colGraph(sfe_tissue, "graph1"))
sfe_tissue <- Voyager::colDataUnivariate(sfe_tissue, features = c("nCounts", "nGenes"), colGraphName = "graph1", type = "moran")
colFeatureData(sfe_tissue)[c("nCounts", "nGenes"),]

What do these results mean? A positive "moran" value means that they are spatially correlated and a negative value indicates that they are not. The "K" value or [kurtosis](https://en.wikipedia.org/wiki/Kurtosis) is a measure of the "tailedness" of a distribution. Higer values indicates a distribution with a higher chance of producing outliers, conversely, lower values have a smaller chance of producing outliers and a value of 3 is the typical value for normally distributed data.

In this case, both "nCounts" and "nGenes" have high positive Moran's I values (0.761749 and 0.793523, respectively), suggesting strong positive spatial autocorrelation. The kurtosis values for "nCounts" and "nGenes" are 1.84931 and 1.96607, respectively, which are less than 3, suggesting that both variables have a distribution with a smaller chance of producing outliers compared to a normal distribution.

We can visualise it in a histogram as follows.

In [None]:
sfe_tissue <- Voyager::runMoransI(sfe_tissue, features = hvgs)
Voyager::plotRowDataHistogram(sfe_tissue, "moran_sample01") + custom_theme()

In [None]:
df <- rowData(sfe_tissue)[hvgs,]

In [None]:
ord <- order(df$I_sample01, decreasing = TRUE)
df[ord, c("symbol", "moran_sample01")]

In [None]:
df[1:6,1:3]

In [None]:
plots <- Voyager::plotSpatialFeature(sfe_tissue, rownames(df)[1:6], colGeometryName = "spotPoly", 
                                     image = "lowres", maxcell = 5e4, swap_rownames = "symbol", ncol = 2)
plots <- wrap_plots(plots) & custom_theme()
plots

In [None]:
neg_moran <- rownames(sfe_tissue)[order(rowData(sfe_tissue)$moran_sample01, decreasing = FALSE)[1:6]]
sfe_tissue <- runUnivariate(sfe_tissue, "moran.mc", neg_moran, colGraphName = "graph1", nsim = 200, alternative = "less")

In [None]:
plots <- Voyager::plotSpatialFeature(sfe_tissue, neg_moran, swap_rownames = "symbol", image_id = "lowres", maxcell = 5e4, ncol = 2)
plots <- wrap_plots(plots) & custom_theme()
plots

## Local Spatial Correlation

Local spatial correlation looks at the neighbours of values. Instead of generating a single statistic characterising the dataset, local methods generate a value for each location. You can reuse methods such a Moran's I and Geary's C to process data locally - these will show clusters of similar values (low or high) as well as outliers. Getis-Ord Gi* is another method that generates clusters of cold (low) and hot (high) values and can be easier to intrepet.

In [None]:
colGraph(sfe_tissue, "visium_B") <- SpatialFeatureExperiment::findVisiumGraph(sfe_tissue, style = "B", zero.policy = TRUE)

In [None]:
sfe_tissue <- Voyager::runUnivariate(sfe_tissue, type = "localG_perm", features = "Ptgds",
                            colGraphName = "visium_B", swap_rownames = "symbol")

In [None]:
Voyager::plotLocalResult(sfe_tissue, "localG_perm", features = "Ptgds", 
                colGeometryName = "spotPoly", divergent = TRUE,
                diverge_center = 0, image_id = "lowres", swap_rownames = "symbol", 
                color = "black", linewidth = 0.1) + custom_theme()

In [None]:
SpatialFeatureExperiment::localResultAttrs(sfe_tissue, "localG_perm", "Ptgds", swap_rownames = "symbol")

In [None]:
Voyager::plotLocalResult(sfe_tissue, "localG_perm", features = "Ptgds", 
                attribute = "-log10p_adj Sim",
                colGeometryName = "spotPoly", divergent = TRUE,
                diverge_center = -log10(0.05), swap_rownames = "symbol",
                image_id = "lowres") + custom_theme()

## MULTISPATI PCA (Multivariate Spatial Correlation)

Due to the large number of genes quantified in single cell and spatial transcriptomics, dimension reduction is part of the standard workflow to analyze such data, to visualize, to help interpreting the data, to distill relevant information and reduce noise, to facilitate downstream analyses such as clustering and pseudotime, to project different samples into a shared latent space for data integration, and so on.

Spatially informed dimension reduction is actually not new, and dates back to at least 1985, with Wartenberg’s crossover of Moran’s I and PCA (Wartenberg 1985), which was generalized and further developed as MULTISPATI PCA (Dray, Saı̈d, and Débias 2008).

In short, while PCA tries to maximize the variance explained by each PC, MULTISPATI maximizes the product of Moran’s I and variance explained. Also, while all the eigenvalues from PCA are non-negative, because the covariance matrix is positive semidefinite, MULTISPATI can give negative eigenvalues, which represent negative spatial autocorrelation, which can be present and interesting but is not as common as positive spatial autocorrelation and is often masked by the latter (Griffith 2019).

In [None]:
colGraph(sfe_tissue, "knn5") <- SpatialFeatureExperiment::findSpatialNeighbors(sfe_tissue, method = "knearneigh", dist_type = "idw", k = 5, style = "W")

In [None]:
sfe_tissue <- Voyager::runMultivariate(sfe_tissue, "multispati", colGraphName = "knn5", nfposi = 20, nfnega = 20)

In [None]:
Voyager::ElbowPlot(sfe_tissue, nfnega = 20, reduction = "multispati") + custom_theme()

In [None]:
plots <- Voyager::plotDimLoadings(sfe_tissue, dims = c(1:5), swap_rownames = "symbol", reduction = "multispati")
plots <- wrap_plots(plots) & custom_theme()
plots

In [None]:
plots <- Voyager::spatialReducedDim(sfe_tissue, "multispati", ncomponents = 5, 
                  colGeometryName = "spotPoly", divergent = TRUE, 
                  diverge_center = 0, ncol = 2, 
                  image_id = "lowres", maxcell = 5e4)
plots <- wrap_plots(plots) & custom_theme()
plots

## Comparing Spatial and Non-Spatial PCA

In the context of spatial transcriptomics, clusters are groups of spots that share similar characteristics within 
each group. 

Non-spatial clustering methods, such as principal component analysis (PCA), identify genes that are effective in 
distinguishing different cell types based solely on their expression patterns without considering the spatial 
arrangement of the cells. However, gene expression may exhibit 
strong spatial structure, meaning they are not randomly distributed across the tissue but rather form distinct 
spatial patterns.

MULTISPATI's components identify genes that define spatial regions in addition to differentiating between cell types. 
These genes not only distinguish cell types but also capture the spatial organization of cells within the tissue. 
The genes associated with each MULTISPATI component can provide valuable insights into the spatial patterns 
and the underlying biological processes.

Both analysis methods provide different perspective and uncovers different aspects of the data. Non-spatial 
clustering focuses on identifying cell types based on gene expression, while spatial clustering incorporates 
both gene expression and spatial information to identify spatially coherent regions and the genes that define 
them.

We will now performing clustering on both the original PCA and the new MULTISPATI generate components.

In [None]:
set.seed(29)
sfe_tissue$clusts_nonspatial <- scran::clusterCells(sfe_tissue, use.dimred = "PCA",
                                             BLUSPARAM = NNGraphParam(
                                              cluster.fun = "leiden",
                                              cluster.args = list(
                                                  objective_function = "modularity",
                                                  resolution_parameter = 1
                                              )
                                          ))

In [None]:
set.seed(29)
sfe_tissue$clusts_multispati <- bluster::clusterRows(SingleCellExperiment::reducedDim(sfe_tissue, "multispati")[,1:20], 
                                            BLUSPARAM = NNGraphParam(
                                                cluster.fun = "leiden",
                                                cluster.args = list(
                                                    objective_function = "modularity",
                                                    resolution_parameter = 1
                                                )
                                            )
                                           )

In [None]:
Voyager::plotSpatialFeature(sfe_tissue, c("clusts_nonspatial", "clusts_multispati"), colGeometryName = "spotPoly", 
                            scattermore = TRUE, pointsize = 7) & guides(colour = guide_legend(override.aes = list(size=2), ncol = 2))