From 6044f4190972451fc0673bc88f87c525ad8f4af4 Mon Sep 17 00:00:00 2001 From: Micha Sam Brickman Raredon <42100128+msraredon@users.noreply.github.com> Date: Sat, 4 Apr 2026 09:54:11 -0400 Subject: [PATCH 1/5] initial files, migrated from tissue field --- .Rbuildignore | 12 + DESCRIPTION | 37 + NAMESPACE | 25 + R/TissueMask-package.R | 28 + R/fit_spatial_mask.R | 399 +++ R/utils.R | 151 + demo_fit_spatial_mask.R | 217 ++ demo_holes_and_scale.R | 433 +++ fit_spatial_mask.R | 371 +++ vignette_fit_spatial_mask.Rmd | 750 +++++ vignette_fit_spatial_mask.html | 2799 +++++++++++++++++ .../figure-html/sec1-plot-1.png | Bin 0 -> 380736 bytes .../figure-html/sec2-plot-1.png | Bin 0 -> 188955 bytes .../figure-html/sec3-plot-1.png | Bin 0 -> 408573 bytes .../figure-html/sec4-plot-1.png | Bin 0 -> 453673 bytes .../figure-html/sec5-plot-1.png | Bin 0 -> 290186 bytes .../figure-html/sec6-plot-1.png | Bin 0 -> 359156 bytes .../figure-html/sec7a-plot-1.png | Bin 0 -> 492729 bytes .../figure-html/sec7b-plot-1.png | Bin 0 -> 706752 bytes .../figure-html/sec7c-plot-1.png | Bin 0 -> 371306 bytes .../figure-html/sec7d-plot-1.png | Bin 0 -> 800304 bytes .../figure-html/sec7e-plot-1.png | Bin 0 -> 791260 bytes .../figure-html/sec7f-plot-1.png | Bin 0 -> 495869 bytes 23 files changed, 5222 insertions(+) create mode 100644 .Rbuildignore create mode 100644 DESCRIPTION create mode 100644 NAMESPACE create mode 100644 R/TissueMask-package.R create mode 100644 R/fit_spatial_mask.R create mode 100644 R/utils.R create mode 100644 demo_fit_spatial_mask.R create mode 100644 demo_holes_and_scale.R create mode 100644 fit_spatial_mask.R create mode 100644 vignette_fit_spatial_mask.Rmd create mode 100644 vignette_fit_spatial_mask.html create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec1-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec2-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec3-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec4-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec5-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec6-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec7a-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec7b-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec7c-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec7d-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec7e-plot-1.png create mode 100644 vignette_fit_spatial_mask_files/figure-html/sec7f-plot-1.png diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..6dc001b --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,12 @@ +^fit_spatial_mask\.R$ +^demo_fit_spatial_mask\.R$ +^demo_holes_and_scale\.R$ +^vignette_fit_spatial_mask\.Rmd$ +^vignette_fit_spatial_mask\.html$ +^vignette_fit_spatial_mask_cache$ +^_pkgdown\.yml$ +^pkgdown$ +^\.github$ +^.*\.Rproj$ +^\.Rproj\.user$ +^README\.Rmd$ diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..f2266b3 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,37 @@ +Package: TissueMask +Type: Package +Title: Spatial Boundary Mask Fitting for Point Coordinate Data +Version: 0.1.0 +Authors@R: person("Raredon Laboratory", role = c("aut", "cre"), + email = "raredonlab@yale.edu") +Description: Fits polygon spatial masks to XY point coordinates such as + cell centroids or transcript locations from spatial transcriptomics + and spatial proteomics experiments. Supports four methods of + increasing topological complexity: convex hull, concave hull, kernel + density estimation (KDE), and a raster-based Gaussian smoothing + approach that naturally handles internal voids (vessel lumens, + necrotic cores, tissue tears) and disconnected tissue islands via + GEOS polygon dissolve. +License: MIT + file LICENSE +Encoding: UTF-8 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.3.2 +Depends: + R (>= 4.1.0) +Imports: + sf (>= 1.0-0), + stats, + parallel +Suggests: + concaveman, + MASS, + isoband, + ggplot2, + testthat (>= 3.0.0), + knitr, + rmarkdown, + patchwork +VignetteBuilder: knitr +URL: https://raredonlab.github.io/TissueMask/, https://github.com/RaredonLab/TissueMask +BugReports: https://github.com/RaredonLab/TissueMask/issues +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..c6cc1f7 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,25 @@ +# Generated by roxygen2: do not edit by hand + +export(fit_spatial_mask) +importFrom(parallel,mclapply) +importFrom(sf,NA_crs_) +importFrom(sf,st_area) +importFrom(sf,st_as_sf) +importFrom(sf,st_bbox) +importFrom(sf,st_buffer) +importFrom(sf,st_convex_hull) +importFrom(sf,st_crs<-) +importFrom(sf,st_difference) +importFrom(sf,st_distance) +importFrom(sf,st_geometry) +importFrom(sf,st_make_valid) +importFrom(sf,st_polygon) +importFrom(sf,st_sf) +importFrom(sf,st_sfc) +importFrom(sf,st_simplify) +importFrom(sf,st_union) +importFrom(sf,st_within) +importFrom(sf,st_as_sf) +importFrom(stats,complete.cases) +importFrom(stats,filter) +importFrom(stats,quantile) diff --git a/R/TissueMask-package.R b/R/TissueMask-package.R new file mode 100644 index 0000000..9ea3b00 --- /dev/null +++ b/R/TissueMask-package.R @@ -0,0 +1,28 @@ +#' TissueMask: Spatial Boundary Mask Fitting for Point Coordinate Data +#' +#' @description +#' TissueMask provides [`fit_spatial_mask()`], which fits polygon spatial masks +#' to XY point coordinate data such as cell centroids or transcript locations +#' from spatial transcriptomics and spatial proteomics experiments. +#' +#' Four masking methods are available, supporting geometries of increasing +#' topological complexity — from simple convex hulls to multi-polygon masks +#' with internal voids and disconnected islands: +#' +#' | Method | Holes/Islands | Speed | Recommended for | +#' |---|---|---|---| +#' | `"convex"` | No | Instant | Sanity checks | +#' | `"concave"` | No | Fast | Simple non-convex shapes | +#' | `"kde"` | Yes | Moderate | Dense, smooth datasets | +#' | `"raster"` | Yes | Fast | Most use cases (default) | +#' +#' @section Part of TissueSuite: +#' TissueMask is the first component of the **TissueSuite** package family +#' developed at the Raredon Laboratory, Yale School of Medicine. The companion +#' package **TissueField** uses TissueMask output to estimate steady-state +#' molecular concentration fields via a diffusion–clearance PDE. +#' +#' @seealso [`fit_spatial_mask()`] +#' +#' @keywords internal +"_PACKAGE" diff --git a/R/fit_spatial_mask.R b/R/fit_spatial_mask.R new file mode 100644 index 0000000..3269ffb --- /dev/null +++ b/R/fit_spatial_mask.R @@ -0,0 +1,399 @@ +#' Fit a Spatial Boundary Mask to Point Coordinate Data +#' +#' @description +#' Fits a polygon (or multi-polygon) spatial mask — an [sf::sfc] geometry +#' object — to a set of XY point coordinates. Suitable for cell centroids, +#' single-molecule transcript locations, or any spatial point process data. +#' The mask faithfully captures tissue shape including internal voids (vessel +#' lumens, necrotic cores, tissue tears) and disconnected fragments. +#' +#' Four algorithms are available: +#' +#' | `method` | Topology | Speed | Best for | +#' |---|---|---|---| +#' | `"raster"` | Holes + islands | Fast | Most use cases (default) | +#' | `"kde"` | Holes + islands | Moderate | Dense, smooth datasets | +#' | `"concave"` | No holes | Fast | Simple non-convex shapes | +#' | `"convex"` | No holes | Instant | Quick checks | +#' +#' **Raster method overview** (`method = "raster"`, v3): +#' 1. Bin point coordinates onto a regular grid. +#' 2. Convolve the occupancy grid with a 2-D Gaussian of width `raster_sigma` +#' (in coordinate units). +#' 3. Threshold at `raster_threshold × max`. Cells above threshold are "inside". +#' 4. Convert each "inside" cell to an axis-aligned rectangle and dissolve via +#' GEOS union. Holes and islands emerge from the geometry naturally — no +#' ring-winding logic required. +#' 5. Apply a morphological close (buffer out then in) to smooth staircase +#' boundaries. +#' +#' @param coords A data frame or matrix with columns `x` and `y`. If column +#' names differ, the first two columns are used and renamed. Rows containing +#' `NA` in `x` or `y` are silently dropped. +#' @param method Character scalar. Masking algorithm. One of: +#' * `"raster"` *(default)* — Gaussian smoothing on a rasterised grid, +#' binary threshold, then GEOS union of "on" cells. Supports holes and +#' islands. Recommended for most datasets. +#' * `"kde"` — 2-D kernel density estimate via [MASS::kde2d()] contoured +#' with [isoband::isobands()]. Supports holes and islands. Requires +#' packages **MASS** and **isoband**. +#' * `"concave"` — Concave hull via [concaveman::concaveman()]. No holes or +#' islands. Requires package **concaveman**. +#' * `"convex"` — Convex hull via [sf::st_convex_hull()]. No holes or +#' islands. No additional packages required. +#' @param concavity Numeric. Relative concavity for `method = "concave"`. +#' Lower values produce tighter, more concave boundaries; higher values +#' approach the convex hull. Default `2`. +#' @param length_threshold Numeric. Minimum edge length for +#' `method = "concave"`. Edges shorter than this are not included. +#' Default `0`. +#' @param kde_bandwidth Numeric scalar or length-2 vector giving the KDE +#' bandwidth(s) in the x and y directions. `NULL` uses +#' [MASS::bandwidth.nrd()] applied independently to each axis. Only used +#' when `method = "kde"`. Default `NULL`. +#' @param kde_threshold Numeric in `(0, 1)`. KDE probability quantile used +#' as the lower contour level. Smaller values produce larger masks. +#' Default `0.05`. +#' @param kde_resolution Integer. Grid resolution for the KDE density +#' estimate. Larger values give finer boundaries at the cost of memory. +#' Default `256L`. +#' @param raster_resolution Integer. Number of grid cells along each axis for +#' `method = "raster"`. Default `256L`. +#' @param raster_sigma Numeric. Standard deviation of the Gaussian kernel in +#' **coordinate units** (the same units as `x` and `y`). This is the key +#' tuning parameter: +#' * Larger `raster_sigma` → holes fill in, islands merge, boundary smooths. +#' * Smaller `raster_sigma` → holes and fine structure are preserved. +#' * `NULL` (default) → auto-set to 3% of the domain width. +#' @param raster_threshold Numeric in `(0, 1)`. A grid cell is "inside" when +#' its smoothed value exceeds `raster_threshold × max(smoothed_grid)`. +#' Higher values shrink the mask; lower values grow it. Default `0.15`. +#' @param raster_min_pts Integer. Minimum number of points that must fall in +#' a grid cell before it is eligible for the convolution. Cells with fewer +#' points are treated as empty. Default `1L`. +#' @param buffer_dist Numeric. Distance to expand the mask after fitting via +#' [sf::st_buffer()]. Zero means no expansion. Default `0`. +#' @param smooth_mask Logical. If `TRUE`, the mask boundary is simplified with +#' [sf::st_simplify()] after fitting. Default `FALSE`. +#' @param smooth_tolerance Numeric. Douglas–Peucker tolerance passed to +#' [sf::st_simplify()]. `NULL` → auto-set to 1% of the bounding-box +#' diagonal. Default `NULL`. +#' @param n_cores Integer. Number of cores for parallel row-smoothing in +#' [parallel::mclapply()] (Unix/macOS only; ignored on Windows). Default +#' `1L`. +#' @param crs CRS specification accepted by [sf::st_crs()]: an EPSG integer, +#' a WKT string, a PROJ string, or `NA` for no CRS. Default `NA`. +#' @param plot Logical. If `TRUE`, a quick [ggplot2] scatter + polygon plot is +#' printed. Requires **ggplot2** to be installed. Default `FALSE`. +#' @param verbose Logical. If `TRUE`, prints a summary table and intermediate +#' messages (e.g., `raster_sigma` in grid-cell units). Default `TRUE`. +#' +#' @return An [sf::sfc] geometry collection of class `sfc_POLYGON` or +#' `sfc_MULTIPOLYGON`. The CRS is set to `crs`. All input points (after NA +#' removal) are guaranteed to lie inside or on the boundary of the returned +#' mask. If any points fall outside the raw fitted mask a corrective buffer +#' is applied automatically and a warning is issued. +#' +#' @examples +#' set.seed(42) +#' # Two-cluster point cloud +#' coords <- data.frame( +#' x = c(rnorm(200, 0, 3), rnorm(200, 10, 3)), +#' y = c(rnorm(200, 0, 3), rnorm(200, 10, 3)) +#' ) +#' +#' # Raster method (recommended default) +#' mask <- fit_spatial_mask(coords, verbose = FALSE) +#' +#' # Convex hull — instant, no additional packages +#' mask_cv <- fit_spatial_mask(coords, method = "convex", verbose = FALSE) +#' +#' # Concave hull — requires 'concaveman' +#' \donttest{ +#' mask_cc <- fit_spatial_mask(coords, method = "concave", +#' concavity = 1.5, verbose = FALSE) +#' } +#' +#' # Verify all points are inside the mask +#' library(sf) +#' pts <- sf::st_as_sf(coords, coords = c("x", "y"), crs = sf::NA_crs_) +#' all(sf::st_within(pts, sf::st_union(mask), sparse = FALSE)[, 1]) +#' +#' @seealso [sf::st_sfc()], [sf::st_union()], [sf::st_buffer()] +#' +#' @export +#' @importFrom sf st_as_sf st_convex_hull st_union st_sfc st_polygon +#' @importFrom sf st_buffer st_distance st_simplify st_bbox st_area +#' @importFrom sf st_crs<- st_make_valid st_geometry st_sf st_within +#' @importFrom sf st_difference NA_crs_ +fit_spatial_mask <- function( + coords, + + # Method + method = "raster", # "convex" | "concave" | "kde" | "raster" + + # Concave-hull parameters + concavity = 2, + length_threshold = 0, + + # KDE parameters + kde_bandwidth = NULL, + kde_threshold = 0.05, + kde_resolution = 256L, + + # Raster parameters + raster_resolution = 256L, + raster_sigma = NULL, + raster_threshold = 0.15, + raster_min_pts = 1L, + + # Post-processing + buffer_dist = 0, + smooth_mask = FALSE, + smooth_tolerance = NULL, + + # Misc + n_cores = 1L, + crs = NA, + plot = FALSE, + verbose = TRUE +) { + + # ---- 0. Normalise CRS (sf::st_sfc rejects bare NA in sf >= 1.0) ----------- + crs <- if (length(crs) == 1L && is.na(crs)) sf::NA_crs_ else crs + + # ---- 1. Parse and validate coordinates ------------------------------------- + if (is.matrix(coords)) coords <- as.data.frame(coords) + if (!is.data.frame(coords)) stop("`coords` must be a data.frame or matrix.") + if (!all(c("x", "y") %in% names(coords))) { + if (ncol(coords) >= 2L) { + coords <- coords[, 1:2] + names(coords) <- c("x", "y") + } else { + stop("`coords` needs at least two columns, or columns named 'x' and 'y'.") + } + } + coords <- coords[stats::complete.cases(coords[, c("x", "y")]), c("x", "y")] + n_pts <- nrow(coords) + if (n_pts < 3L) + stop("Need at least 3 coordinate pairs to fit a mask (got ", n_pts, ").") + if (n_pts > 50000L && method == "kde" && verbose) + message("Large n = ", n_pts, " with method='kde' — consider method='raster'.") + + # ---- 2. Fit mask ----------------------------------------------------------- + mask_poly <- switch(method, + + # -- Convex hull ------------------------------------------------------------ + "convex" = { + pts <- sf::st_as_sf(coords, coords = c("x", "y"), crs = crs) + sf::st_convex_hull(sf::st_union(pts)) + }, + + # -- Concave hull ----------------------------------------------------------- + "concave" = { + if (!requireNamespace("concaveman", quietly = TRUE)) + stop("Package 'concaveman' is required for method='concave'.\n", + "Install it with: install.packages('concaveman')") + hp <- concaveman::concaveman( + as.matrix(coords), + concavity = concavity, + length_threshold = length_threshold + ) + sf::st_sfc(sf::st_polygon(list(hp)), crs = crs) + }, + + # -- KDE contour (supports holes, slower for large n) ---------------------- + "kde" = { + if (!requireNamespace("MASS", quietly = TRUE)) + stop("Package 'MASS' is required for method='kde'.\n", + "Install it with: install.packages('MASS')") + if (!requireNamespace("isoband", quietly = TRUE)) + stop("Package 'isoband' is required for method='kde'.\n", + "Install it with: install.packages('isoband')") + bw_x <- if (!is.null(kde_bandwidth)) kde_bandwidth[1L] else + MASS::bandwidth.nrd(coords$x) + bw_y <- if (!is.null(kde_bandwidth)) { + if (length(kde_bandwidth) == 2L) kde_bandwidth[2L] else kde_bandwidth[1L] + } else { + MASS::bandwidth.nrd(coords$y) + } + xp <- diff(range(coords$x)) * 0.1 + yp <- diff(range(coords$y)) * 0.1 + kf <- MASS::kde2d( + coords$x, coords$y, + h = c(bw_x, bw_y), + n = kde_resolution, + lims = c(range(coords$x) + c(-xp, xp), + range(coords$y) + c(-yp, yp)) + ) + dv <- as.vector(kf$z) + bands <- isoband::isobands( + kf$x, kf$y, kf$z, + levels_low = stats::quantile(dv, kde_threshold), + levels_high = max(dv) + 1 + ) + .build_sf_with_holes(bands, crs) + }, + + # -- Raster: Gaussian sum → threshold → cell union ------------------------- + # + # Algorithm: + # 1. Bin points onto a regular grid (raster_resolution × raster_resolution). + # 2. Convolve the binary occupancy grid with a 2-D Gaussian of width + # raster_sigma (coordinate units). Equivalent to placing an isotropic + # Gaussian at every point and summing them all. + # 3. Threshold at raster_threshold × max(field). Cells above threshold + # are "inside"; cells below are "outside". + # 4. Convert each "inside" cell to an axis-aligned rectangle and dissolve + # via GEOS union. Holes and islands emerge from the geometry naturally + # — no ring-winding logic required. + # 5. Smooth the staircase boundary with a morphological close + # (buffer out then in by ~0.6 × cell diagonal). + "raster" = { + + # Grid cell edges + xpad <- diff(range(coords$x)) * 0.05 + ypad <- diff(range(coords$y)) * 0.05 + xb <- seq(min(coords$x) - xpad, max(coords$x) + xpad, + length.out = raster_resolution + 1L) + yb <- seq(min(coords$y) - ypad, max(coords$y) + ypad, + length.out = raster_resolution + 1L) + hx <- xb[2L] - xb[1L] # cell width (coordinate units) + hy <- yb[2L] - yb[1L] # cell height (coordinate units) + + # Bin points: cm[yi, xi] = point count at cell (xi, yi) + xi <- pmax(1L, pmin(raster_resolution, + findInterval(coords$x, xb, rightmost.closed = TRUE))) + yi <- pmax(1L, pmin(raster_resolution, + findInterval(coords$y, yb, rightmost.closed = TRUE))) + cv <- tabulate(yi + (xi - 1L) * raster_resolution, + nbins = raster_resolution^2L) + cm <- matrix(cv, nrow = raster_resolution, ncol = raster_resolution) + + # Convert raster_sigma from coordinate units to grid-cell units + domain_w <- diff(range(coords$x)) + 2 * xpad + sigma_cu <- if (is.null(raster_sigma)) 0.03 * domain_w else raster_sigma + sigma_gc <- sigma_cu / hx # grid-cell units for convolution kernel + + if (verbose) + message(" raster_sigma = ", round(sigma_cu, 4), + " coord units (", round(sigma_gc, 1), " grid cells)") + + # Convolve binary occupancy with Gaussian — sum of point Gaussians + indicator <- (cm >= raster_min_pts) * 1.0 + sm <- .gaussian_smooth_2d(indicator, sigma = sigma_gc, + n_cores = n_cores) + + # Threshold → binary + thresh <- raster_threshold * max(sm, na.rm = TRUE) + binary <- sm >= thresh + on_cells <- which(binary, arr.ind = TRUE) # [row = yi, col = xi] + + if (nrow(on_cells) == 0L) + stop("No grid cells above threshold. Try: lower raster_threshold, ", + "increase raster_sigma, or increase raster_resolution.") + + # Build one closed rectangle per on-cell + ri <- on_cells[, 1L] # y indices + ci <- on_cells[, 2L] # x indices + rects <- lapply(seq_len(nrow(on_cells)), function(k) { + sf::st_polygon(list(matrix( + c(xb[ci[k]], yb[ri[k]], + xb[ci[k] + 1L], yb[ri[k]], + xb[ci[k] + 1L], yb[ri[k] + 1L], + xb[ci[k]], yb[ri[k] + 1L], + xb[ci[k]], yb[ri[k]]), # close ring + ncol = 2L, byrow = TRUE + ))) + }) + + # GEOS dissolve — topology (holes, islands) emerges automatically + raw <- sf::st_union(sf::st_sfc(rects, crs = crs)) + + # Morphological close: smooth staircase boundary. + # Buffer out then in by ~0.6 × cell diagonal. + cell_diag <- sqrt(hx^2 + hy^2) + raw <- sf::st_buffer(raw, dist = cell_diag * 0.6) + raw <- sf::st_buffer(raw, dist = -cell_diag * 0.6) + + sf::st_make_valid(raw) + }, + + stop("Unknown method '", method, "'. Choose: convex, concave, kde, raster.") + ) + + if (!inherits(mask_poly, "sfc")) mask_poly <- sf::st_geometry(mask_poly) + + # ---- 3. Optional buffer ---------------------------------------------------- + if (buffer_dist > 0) + mask_poly <- sf::st_buffer(mask_poly, dist = buffer_dist) + + # ---- 4. Containment guarantee ---------------------------------------------- + pts_sfc <- sf::st_geometry( + sf::st_as_sf(coords, coords = c("x", "y"), crs = crs)) + contained <- sf::st_within(pts_sfc, sf::st_union(mask_poly), sparse = FALSE) + n_out <- sum(!contained[, 1L]) + if (n_out > 0L) { + warning(n_out, " point(s) outside mask — applying corrective buffer.") + dists <- sf::st_distance(pts_sfc[!contained[, 1L]], + sf::st_union(mask_poly)) + mask_poly <- sf::st_buffer(mask_poly, + dist = max(as.numeric(dists)) * 1.05) + } + + # ---- 5. Optional boundary smoothing ---------------------------------------- + if (smooth_mask) { + if (is.null(smooth_tolerance)) { + bb <- sf::st_bbox(mask_poly) + smooth_tolerance <- sqrt((bb["xmax"] - bb["xmin"])^2 + + (bb["ymax"] - bb["ymin"])^2) * 0.01 + } + mask_poly <- sf::st_simplify(mask_poly, dTolerance = smooth_tolerance) + } + + sf::st_crs(mask_poly) <- crs + + # ---- 6. Verbose summary ---------------------------------------------------- + if (verbose) { + bb <- sf::st_bbox(mask_poly) + area <- suppressWarnings(as.numeric(sf::st_area(mask_poly))) + cat("--------------------------------------------------\n") + cat(" TissueMask: spatial mask fitted\n") + cat(" Method :", method, "\n") + cat(" Points :", n_pts, "\n") + cat(" Sub-geometries :", length(mask_poly), "\n") + cat(" Bounding box : x [", round(bb["xmin"], 3), ",", + round(bb["xmax"], 3), "] y [", + round(bb["ymin"], 3), ",", round(bb["ymax"], 3), "]\n") + cat(" Area :", round(area, 4), "\n") + cat(" Buffer applied :", buffer_dist, "\n") + cat("--------------------------------------------------\n") + } + + # ---- 7. Optional quick plot ------------------------------------------------ + if (plot) { + if (!requireNamespace("ggplot2", quietly = TRUE)) + message("Package 'ggplot2' not installed; skipping plot.") + else { + mask_sf <- sf::st_sf(geometry = mask_poly) + print(ggplot2::ggplot() + + ggplot2::geom_sf( + data = mask_sf, + fill = "#4c9be8", alpha = 0.2, + color = "#1a5fa8", linewidth = 0.55) + + ggplot2::geom_point( + data = coords, ggplot2::aes(x = x, y = y), + color = "#c0392b", size = 0.7, alpha = 0.5) + + ggplot2::coord_sf(datum = NA) + + ggplot2::labs( + title = paste0("TissueMask | method = '", method, "'"), + subtitle = paste0(n_pts, " points \u00b7 area = ", + round(suppressWarnings( + as.numeric(sf::st_area(mask_poly))), 2)), + x = "X", y = "Y") + + ggplot2::theme_minimal(base_size = 11)) + } + } + + return(mask_poly) +} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..edb8acc --- /dev/null +++ b/R/utils.R @@ -0,0 +1,151 @@ +# Internal helper functions for TissueMask +# These are not exported and not part of the public API. + +# ---- Geometry helpers -------------------------------------------------------- + +#' Signed area of a ring matrix +#' +#' Computes the signed area of a closed ring via the shoelace formula. +#' Positive = counter-clockwise (exterior), negative = clockwise (hole). +#' +#' @param ring_mat Numeric matrix with columns for x and y coordinates. +#' Must have at least 3 rows. +#' @return Numeric scalar (signed area). +#' @keywords internal +.signed_area <- function(ring_mat) { + n <- nrow(ring_mat) + if (n < 3L) return(0) + x <- ring_mat[, 1L] + y <- ring_mat[, 2L] + 0.5 * sum(x[seq_len(n - 1L)] * y[seq_len(n - 1L) + 1L] - + x[seq_len(n - 1L) + 1L] * y[seq_len(n - 1L)]) +} + +# ---- Gaussian convolution helpers -------------------------------------------- + +#' Normalised 1-D Gaussian kernel +#' +#' @param sigma Standard deviation in grid-cell units. +#' @return Numeric vector of kernel weights (sums to 1). +#' @keywords internal +.gaussian_kernel_1d <- function(sigma) { + r <- ceiling(3 * sigma) + k <- exp(-((-r):r)^2 / (2 * sigma^2)) + k / sum(k) +} + +#' Row-wise 1-D convolution +#' +#' @param mat Numeric matrix. +#' @param k Filter kernel vector. +#' @return Matrix with same dimensions as `mat`, edge NA values set to 0. +#' @importFrom stats filter +#' @keywords internal +.smooth_rows <- function(mat, k) { + t(apply(mat, 1L, function(row) { + s <- stats::filter(row, k, method = "convolution", sides = 2L) + s[is.na(s)] <- 0 + as.numeric(s) + })) +} + +#' Column-wise 1-D convolution +#' +#' @param mat Numeric matrix. +#' @param k Filter kernel vector. +#' @return Matrix with same dimensions as `mat`, edge NA values set to 0. +#' @importFrom stats filter +#' @keywords internal +.smooth_cols <- function(mat, k) { + apply(mat, 2L, function(col) { + s <- stats::filter(col, k, method = "convolution", sides = 2L) + s[is.na(s)] <- 0 + as.numeric(s) + }) +} + +#' Separable 2-D Gaussian convolution +#' +#' Applies a separable Gaussian blur to a matrix by convolving rows then +#' columns with a 1-D Gaussian kernel. `sigma` is in **grid-cell units**. +#' For parallel row processing on Unix, set `n_cores > 1`. +#' +#' @param mat Numeric matrix to smooth. +#' @param sigma Gaussian standard deviation in grid-cell units. +#' @param n_cores Integer. Number of cores for parallel row smoothing +#' (Unix only). Default `1L`. +#' @return Smoothed numeric matrix. +#' @importFrom parallel mclapply +#' @keywords internal +.gaussian_smooth_2d <- function(mat, sigma, n_cores = 1L) { + k <- .gaussian_kernel_1d(sigma) + if (n_cores > 1L && .Platform$OS.type == "unix") { + row_list <- parallel::mclapply(seq_len(nrow(mat)), function(i) { + s <- stats::filter(mat[i, ], k, method = "convolution", sides = 2L) + s[is.na(s)] <- 0 + as.numeric(s) + }, mc.cores = n_cores) + mat <- do.call(rbind, row_list) + } else { + mat <- .smooth_rows(mat, k) + } + .smooth_cols(mat, k) +} + +# ---- KDE polygon assembly ---------------------------------------------------- + +#' Assemble sf geometry with holes from isoband output +#' +#' Used only by `method = "kde"`. Takes a list of isoband band objects, +#' extracts rings, sorts by area, and assembles exterior polygons with +#' interior holes using GEOS set operations. +#' +#' @param bands List of band objects from [isoband::isobands()], each with +#' `$x`, `$y`, and optionally `$id` components. +#' @param crs CRS specification for the output [sf::sfc] object. +#' @return An [sf::sfc] geometry (single or multi-polygon with holes). +#' @keywords internal +.build_sf_with_holes <- function(bands, crs) { + rings <- list() + for (band in bands) { + if (length(band$x) == 0L) next + ids <- if (!is.null(band$id)) band$id else rep(1L, length(band$x)) + for (uid in unique(ids[!is.na(ids)])) { + sel <- !is.na(ids) & ids == uid + rx <- band$x[sel] + ry <- band$y[sel] + if (length(rx) < 3L) next + if (rx[1L] != rx[length(rx)] || ry[1L] != ry[length(ry)]) { + rx <- c(rx, rx[1L]) + ry <- c(ry, ry[1L]) + } + mat <- cbind(rx, ry) + area <- abs(.signed_area(mat)) + rings <- c(rings, list(list(mat = mat, area = area))) + } + } + if (length(rings) == 0L) + stop("No rings extracted. Adjust kde_threshold / kde_bandwidth parameters.") + + ord <- order(sapply(rings, `[[`, "area"), decreasing = TRUE) + rings <- rings[ord] + + polys <- lapply(rings, function(r) { + mat <- rbind(r$mat, r$mat[1L, , drop = FALSE]) + dupl <- c(FALSE, rowSums(abs(diff(mat))) == 0) + sf::st_sfc(sf::st_polygon(list(mat[!dupl, , drop = FALSE])), crs = crs) + }) + + result <- polys[[1L]] + if (length(polys) > 1L) { + for (i in seq(2L, length(polys))) { + inside <- tryCatch( + isTRUE(sf::st_within(polys[[i]], result, sparse = FALSE)[1L, 1L]), + error = function(e) FALSE + ) + result <- if (inside) sf::st_difference(result, polys[[i]]) + else sf::st_union(result, polys[[i]]) + } + } + sf::st_make_valid(result) +} diff --git a/demo_fit_spatial_mask.R b/demo_fit_spatial_mask.R new file mode 100644 index 0000000..d9a52f2 --- /dev/null +++ b/demo_fit_spatial_mask.R @@ -0,0 +1,217 @@ +# ============================================================ +# demo_fit_spatial_mask.R +# +# Basic demonstrations of fit_spatial_mask() across all +# three methods and their core tunable parameters. +# +# Run after sourcing: source("fit_spatial_mask.R") +# +# Required packages: +# install.packages(c("sf","concaveman","MASS","isoband", +# "ggplot2","patchwork")) +# +# Sections: +# 1 — Circular cloud (sanity check) +# 2 — Elongated cloud (anisotropy) +# 3 — L-shaped cloud (non-convex; key diagnostic) +# 4 — Multi-cluster blobs +# 5 — Concavity parameter sweep +# 6 — Buffer and smoothing effects +# 7 — Validation: point containment table +# ============================================================ + +library(sf) +library(ggplot2) +library(patchwork) +source("fit_spatial_mask.R") +set.seed(42) + +plot_mask <- function(mask, coords, title="", subtitle="", + fill="#4c9be8", border="#1a5fa8", + pt_col="#c0392b", pt_size=1.2, pt_alpha=0.55) { + mask_sf <- sf::st_sf(geometry = mask) + ggplot() + + geom_sf(data=mask_sf, + fill=fill, alpha=0.2, color=border, linewidth=0.55) + + geom_point(data=coords, aes(x=x,y=y), color=pt_col, + size=pt_size, alpha=pt_alpha) + + coord_sf(datum=NA) + + labs(title=title, subtitle=subtitle, x="X", y="Y") + + theme_minimal(base_size=9.5) + + theme(plot.title=element_text(face="bold",size=9.5), + plot.subtitle=element_text(size=8,color="grey40"), + panel.grid=element_line(color="grey93")) +} + +# ============================================================ +# SECTION 1 — Circular cloud +# ============================================================ +n <- 400 +coords_circle <- data.frame(x=rnorm(n,0,1), y=rnorm(n,0,1)) + +m1a <- fit_spatial_mask(coords_circle, method="convex", verbose=FALSE) +m1b <- fit_spatial_mask(coords_circle, method="concave", concavity=2, verbose=FALSE) +m1c <- fit_spatial_mask(coords_circle, method="kde", kde_threshold=0.02, + buffer_dist=0.1, verbose=FALSE) + +print( + plot_mask(m1a, coords_circle, title="1a. Circular — Convex", subtitle="method='convex'") + + plot_mask(m1b, coords_circle, title="1b. Circular — Concave", subtitle="concavity=2") + + plot_mask(m1c, coords_circle, title="1c. Circular — KDE", + subtitle="threshold=0.02", fill="#27ae60", border="#1e8449") + + plot_annotation(title="Section 1: Circular Cloud — Sanity Check", + theme=theme(plot.title=element_text(face="bold",size=13))) +) + +# ============================================================ +# SECTION 2 — Elongated cloud +# ============================================================ +coords_elong <- data.frame(x=rnorm(n,0,3), y=rnorm(n,0,0.6)) + +m2a <- fit_spatial_mask(coords_elong, method="convex", verbose=FALSE) +m2b <- fit_spatial_mask(coords_elong, method="concave", verbose=FALSE) +m2c <- fit_spatial_mask(coords_elong, method="kde", kde_threshold=0.02, + buffer_dist=0.1, verbose=FALSE) + +print( + plot_mask(m2a, coords_elong, title="2a. Elongated — Convex") + + plot_mask(m2b, coords_elong, title="2b. Elongated — Concave") + + plot_mask(m2c, coords_elong, title="2c. Elongated — KDE", + fill="#27ae60", border="#1e8449") + + plot_annotation(title="Section 2: Elongated Cloud", + theme=theme(plot.title=element_text(face="bold",size=13))) +) + +# ============================================================ +# SECTION 3 — L-shaped cloud (KEY test) +# ============================================================ +coords_L <- rbind( + data.frame(x=runif(200,0,5)+rnorm(200,0,0.05), y=runif(200,0,1)+rnorm(200,0,0.05)), + data.frame(x=runif(200,0,1)+rnorm(200,0,0.05), y=runif(200,0,5)+rnorm(200,0,0.05))) + +m3a <- fit_spatial_mask(coords_L, method="convex", verbose=FALSE) +m3b <- fit_spatial_mask(coords_L, method="concave", concavity=1.5, verbose=FALSE) +m3c <- fit_spatial_mask(coords_L, method="concave", concavity=3.0, verbose=FALSE) + +print( + plot_mask(m3a, coords_L, title="3a. L-shape — Convex", + subtitle="[includes empty corner]", fill="#e74c3c", border="#922b21") + + plot_mask(m3b, coords_L, title="3b. Concave tight (1.5)", + subtitle="[correctly excludes corner]") + + plot_mask(m3c, coords_L, title="3c. Concave relaxed (3.0)", + subtitle="[partially includes corner]", fill="#8e44ad", border="#6c3483") + + plot_annotation(title="Section 3: L-Shaped Cloud — Non-Convex", + subtitle="Lower concavity wraps more tightly; convex hull fails", + theme=theme(plot.title=element_text(face="bold",size=13), + plot.subtitle=element_text(size=9,color="grey40"))) +) + +# ============================================================ +# SECTION 4 — Multi-cluster +# ============================================================ +mk_cluster <- function(n,cx,cy,sd=0.4) data.frame(x=rnorm(n,cx,sd),y=rnorm(n,cy,sd)) +coords_multi <- rbind(mk_cluster(150,0,0), mk_cluster(150,4,1), mk_cluster(150,2,4)) + +m4a <- fit_spatial_mask(coords_multi, method="convex", verbose=FALSE) +m4b <- fit_spatial_mask(coords_multi, method="concave", concavity=2, verbose=FALSE) +m4c <- fit_spatial_mask(coords_multi, method="kde", kde_threshold=0.01, + buffer_dist=0.15, verbose=FALSE) +m4d <- fit_spatial_mask(coords_multi, method="kde", kde_threshold=0.25, + buffer_dist=0.15, verbose=FALSE) + +print( + (plot_mask(m4a,coords_multi,title="4a. Multi — Convex") + + plot_mask(m4b,coords_multi,title="4b. Multi — Concave")) / + (plot_mask(m4c,coords_multi,title="4c. Multi — KDE inclusive", + subtitle="threshold=0.01", fill="#27ae60", border="#1e8449") + + plot_mask(m4d,coords_multi,title="4d. Multi — KDE tight", + subtitle="threshold=0.25: separate masks", fill="#8e44ad", border="#6c3483")) + + plot_annotation(title="Section 4: Multi-Cluster", + theme=theme(plot.title=element_text(face="bold",size=13))) +) + +# ============================================================ +# SECTION 5 — Concavity sweep +# ============================================================ +theta <- seq(0.2, pi-0.2, length.out=300) +coords_cres <- data.frame( + x=cos(theta)*(1+rnorm(300,0,0.08))*3, + y=sin(theta)*(1+rnorm(300,0,0.08))*3 - cos(theta)*1.5) + +pal <- c("#e74c3c","#e67e22","#27ae60","#2980b9") +sw <- mapply(function(cv,col) { + m <- fit_spatial_mask(coords_cres, method="concave", concavity=cv, verbose=FALSE) + plot_mask(m, coords_cres, title=paste0("concavity = ",cv), fill=col, border=col) +}, c(1.2,2,3,5), pal, SIMPLIFY=FALSE) + +print(wrap_plots(sw, nrow=1) + + plot_annotation( + title="Section 5: Concavity Sweep — Crescent", + subtitle="Lower = tighter | Higher = approaches convex", + theme=theme(plot.title=element_text(face="bold",size=13), + plot.subtitle=element_text(size=9,color="grey40"))) +) + +# ============================================================ +# SECTION 6 — Buffer and smoothing +# ============================================================ +r <- sqrt(runif(250,0.4^2,1^2)); ang <- runif(250,0,2*pi) +coords_ring <- data.frame(x=r*cos(ang)*5, y=r*sin(ang)*5) + +m6a <- fit_spatial_mask(coords_ring, method="concave", concavity=2, + buffer_dist=0, smooth_mask=FALSE, verbose=FALSE) +m6b <- fit_spatial_mask(coords_ring, method="concave", concavity=2, + buffer_dist=0.3, smooth_mask=FALSE, verbose=FALSE) +m6c <- fit_spatial_mask(coords_ring, method="concave", concavity=2, + buffer_dist=0.3, smooth_mask=TRUE, + smooth_tolerance=0.15, verbose=FALSE) + +print( + plot_mask(m6a, coords_ring, title="6a. No buffer / no smooth") + + plot_mask(m6b, coords_ring, title="6b. Buffer=0.3", + fill="#27ae60", border="#1e8449") + + plot_mask(m6c, coords_ring, title="6c. Buffer + smooth", + fill="#8e44ad", border="#6c3483") + + plot_annotation(title="Section 6: Buffer and Smoothing", + theme=theme(plot.title=element_text(face="bold",size=13))) +) + +# ============================================================ +# SECTION 7 — Validation table +# ============================================================ +cases <- list( + list(label="Circle/convex", mask=m1a, coords=coords_circle), + list(label="Circle/concave", mask=m1b, coords=coords_circle), + list(label="Circle/kde", mask=m1c, coords=coords_circle), + list(label="Elongated/convex",mask=m2a, coords=coords_elong), + list(label="Elongated/concave",mask=m2b,coords=coords_elong), + list(label="Elongated/kde", mask=m2c, coords=coords_elong), + list(label="L-shape/convex", mask=m3a, coords=coords_L), + list(label="L-shape/conc1.5", mask=m3b, coords=coords_L), + list(label="L-shape/conc3", mask=m3c, coords=coords_L), + list(label="Multi/convex", mask=m4a, coords=coords_multi), + list(label="Multi/concave", mask=m4b, coords=coords_multi), + list(label="Multi/kde-lo", mask=m4c, coords=coords_multi), + list(label="Multi/kde-hi", mask=m4d, coords=coords_multi), + list(label="Ring/raw", mask=m6a, coords=coords_ring), + list(label="Ring/buffer", mask=m6b, coords=coords_ring), + list(label="Ring/smooth", mask=m6c, coords=coords_ring) +) + +results <- do.call(rbind, lapply(cases, function(vc) { + pts <- sf::st_geometry(sf::st_as_sf(vc$coords, coords=c("x","y"))) + ok <- sf::st_within(pts, sf::st_union(vc$mask), sparse=FALSE)[,1L] + n <- nrow(vc$coords) + data.frame(Case=vc$label, N=n, Enclosed=sum(ok), + Pct=round(100*sum(ok)/n,2), + Result=ifelse(sum(ok)==n,"\u2713 PASS","\u2717 FAIL"), + stringsAsFactors=FALSE) +})) + +cat("\n============================================================\n") +cat(" VALIDATION: Point Containment\n") +cat("============================================================\n") +print(results, row.names=FALSE) +cat("============================================================\n") +cat("demo_fit_spatial_mask.R complete.\n") + diff --git a/demo_holes_and_scale.R b/demo_holes_and_scale.R new file mode 100644 index 0000000..dbf511b --- /dev/null +++ b/demo_holes_and_scale.R @@ -0,0 +1,433 @@ +# ============================================================ +# demo_holes_and_scale.R (v3) +# +# Demonstrates fit_spatial_mask() on topologically complex +# cases: interior holes, irregular boundaries, archipelagos, +# and 100k-cell scale tests. +# +# Run after sourcing: source("fit_spatial_mask.R") +# +# Fixes in v3: +# 1. raster_sigma now in COORDINATE UNITS (not grid cells). +# Old calls used sigma in grid-cell units; values have been +# converted: sigma_cu = sigma_gc * (domain_width / resolution). +# 2. All hand-built circle/ellipse polygons now explicitly close +# their rings with rbind(mat, mat[1L,]). sf::st_polygon +# requires first == last row; seq(0, 2*pi, length.out=N) +# does NOT guarantee this due to floating-point rounding. +# +# Required packages: +# install.packages(c("sf","concaveman","MASS","isoband", +# "ggplot2","patchwork")) +# +# Sections: +# 1 — Donut: single interior void +# 2 — Swiss cheese: multiple holes +# 3 — Fractured map: irregular boundary + lakes +# 4 — Archipelago: disconnected islands +# 5 — Nested crescent voids +# 6 — sigma sensitivity sweep (coordinate units) +# 7 — Scale test: 100,000 cells +# 8 — Validation table +# ============================================================ + +library(sf) +library(ggplot2) +library(patchwork) +source("fit_spatial_mask.R") +set.seed(123) + + +# ---- Shared helpers ---------------------------------------------------------- + +plot_mask <- function(mask, coords, title = "", subtitle = "", + fill = "#4c9be8", border = "#1a5fa8", + pt_col = "#c0392b", pt_size = 0.9, pt_alpha = 0.5) { + # geom_sf is required for correct hole rendering. + # geom_polygon draws a line between consecutive rings in the same group, + # producing diagonal artefacts across hole interiors. + mask_sf <- sf::st_sf(geometry = mask) + ggplot() + + geom_sf(data = mask_sf, + fill = fill, alpha = 0.2, color = border, linewidth = 0.55) + + geom_point(data = coords, aes(x = x, y = y), + color = pt_col, size = pt_size, alpha = pt_alpha) + + coord_sf(datum = NA) + # datum=NA: suppress geographic graticule + labs(title = title, subtitle = subtitle, x = "X", y = "Y") + + theme_minimal(base_size = 9.5) + + theme(plot.title = element_text(face = "bold", size = 9.5), + plot.subtitle = element_text(size = 8, color = "grey40"), + panel.grid = element_line(color = "grey93")) +} + +# Sample n points uniformly from inside a polygon +sample_in_polygon <- function(poly_sf, n, max_tries = 20) { + bb <- sf::st_bbox(poly_sf); mu <- sf::st_union(poly_sf) + pts <- data.frame(x = numeric(0), y = numeric(0)) + for (i in seq_len(max_tries)) { + if (nrow(pts) >= n) break + cnd <- data.frame(x = runif(n * 8, bb["xmin"], bb["xmax"]), + y = runif(n * 8, bb["ymin"], bb["ymax"])) + ok <- as.logical( + sf::st_within(sf::st_as_sf(cnd, coords = c("x","y")), mu, + sparse = FALSE)[, 1L]) + pts <- rbind(pts, cnd[ok, ]) + } + pts[seq_len(min(n, nrow(pts))), ] +} + +# Build a closed ellipse polygon. +# IMPORTANT: seq(0, 2*pi, length.out=n) does NOT produce a closed ring +# due to float rounding (cos(2*pi) != cos(0) exactly). We always +# append the first row explicitly. +make_ellipse_poly <- function(cx, cy, a, b, angle_deg, n = 80) { + th <- seq(0, 2 * pi, length.out = n) + ang <- angle_deg * pi / 180 + xe <- a * cos(th); ye <- b * sin(th) + mat <- cbind(cx + xe * cos(ang) - ye * sin(ang), + cy + xe * sin(ang) + ye * cos(ang)) + mat <- rbind(mat, mat[1L, ]) # explicit ring closure + sf::st_polygon(list(mat)) +} + +# Helper: closed circle matrix +.circle_mat <- function(cx, cy, r, n = 100) { + th <- seq(0, 2 * pi, length.out = n) + mat <- cbind(cx + r * cos(th), cy + r * sin(th)) + rbind(mat, mat[1L, ]) # explicit ring closure +} + + +# ============================================================ +# SECTION 1 — Donut +# ============================================================ +cat("Section 1: Donut...\n") + +r_d <- sqrt(runif(800, 2^2, 5^2)) +th_d <- runif(800, 0, 2 * pi) +coords_donut <- data.frame(x = r_d * cos(th_d), y = r_d * sin(th_d)) +# Domain: x,y in [-5, 5], domain width ~ 10 coord units. +# raster_sigma = 0.25 coordinate units +# (previously 8 grid cells at res=400 over domain=11 → 8*(11/400) = 0.22) + +m1_r <- fit_spatial_mask(coords_donut, method = "raster", + raster_resolution = 256L, raster_sigma = 0.25, raster_threshold = 0.2, + verbose = FALSE) +m1_c <- fit_spatial_mask(coords_donut, method = "convex", verbose = FALSE) + +print( + plot_mask(m1_r, coords_donut, + title = "1a. Donut \u2014 Raster (hole \u2713)", + subtitle = "\u03c3 = 0.25 coord units, threshold = 0.2") + + plot_mask(m1_c, coords_donut, + title = "1b. Donut \u2014 Convex", + subtitle = "[hole filled]", + fill = "#e74c3c", border = "#922b21") + + plot_annotation(title = "Section 1: Donut", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +) + + +# ============================================================ +# SECTION 2 — Swiss cheese +# ============================================================ +cat("Section 2: Swiss cheese...\n") + +# Four circular holes in a 10x10 square. +# Domain width = 10 coord units. +# raster_sigma = 0.25 coord units +# (previously 7 grid cells at res=450 over domain=11 → 7*(11/450) = 0.17) +hdefs <- list( + list(cx = 2.5, cy = 7.5, r = 1.1), + list(cx = 7.0, cy = 6.5, r = 1.4), + list(cx = 4.0, cy = 2.5, r = 0.9), + list(cx = 8.0, cy = 2.0, r = 1.2) +) +outer_sq <- sf::st_sfc(sf::st_polygon(list( + rbind(c(0,0), c(10,0), c(10,10), c(0,10), c(0,0))))) +hcircles <- lapply(hdefs, function(h) { + sf::st_polygon(list(.circle_mat(h$cx, h$cy, h$r))) +}) +swiss_region <- sf::st_difference(outer_sq, + sf::st_sfc(sf::st_union(sf::st_sfc(hcircles)))) +coords_swiss <- sample_in_polygon(swiss_region, n = 1500) + +m2_r <- fit_spatial_mask(coords_swiss, method = "raster", + raster_resolution = 256L, raster_sigma = 0.25, raster_threshold = 0.2, + verbose = FALSE) +m2_c <- fit_spatial_mask(coords_swiss, method = "concave", + concavity = 1.8, verbose = FALSE) + +print( + plot_mask(m2_r, coords_swiss, + title = "2a. Swiss \u2014 Raster (holes \u2713)", + subtitle = "\u03c3 = 0.25 coord units, threshold = 0.2") + + plot_mask(m2_c, coords_swiss, + title = "2b. Swiss \u2014 Concave", + subtitle = "[holes filled]", + fill = "#e74c3c", border = "#922b21") + + plot_annotation(title = "Section 2: Swiss Cheese", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +) + + +# ============================================================ +# SECTION 3 — Fractured map +# ============================================================ +cat("Section 3: Fractured map...\n") + +# Irregular outer coast (radius ~9-12), five elliptic lakes. +# Domain width ~ 24 coord units. +# raster_sigma = 0.5 coord units +# (previously 9 grid cells at res=500 over domain=26.4 → 9*(26.4/500) = 0.48) +th_f <- seq(0, 2 * pi, length.out = 600) +r_c <- 9 + 2*sin(3*th_f) + sin(7*th_f) + 0.5*cos(13*th_f) + 0.3*sin(19*th_f) +mat_f <- cbind(r_c * cos(th_f), r_c * sin(th_f)) +mat_f <- rbind(mat_f, mat_f[1L, ]) # explicit ring closure +outer_coast <- sf::st_sfc(sf::st_polygon(list(mat_f))) + +lakes <- sf::st_sfc(list( + make_ellipse_poly(-3.0, 3.5, 2.2, 1.0, 30), + make_ellipse_poly( 4.5, 2.0, 1.8, 1.3, -20), + make_ellipse_poly( 1.0, -4.5, 2.5, 0.8, 60), + make_ellipse_poly(-5.0, -2.0, 1.2, 1.2, 0), + make_ellipse_poly( 3.5, -0.5, 1.0, 0.6, 15) +)) +frac_region <- sf::st_difference(outer_coast, sf::st_union(lakes)) +coords_frac <- sample_in_polygon(frac_region, n = 2000) + +m3_r <- fit_spatial_mask(coords_frac, method = "raster", + raster_resolution = 256L, raster_sigma = 0.5, raster_threshold = 0.18, + verbose = FALSE) +m3_k <- fit_spatial_mask(coords_frac, method = "kde", + kde_resolution = 300L, kde_threshold = 0.01, buffer_dist = 0.1, + verbose = FALSE) + +print( + plot_mask(m3_r, coords_frac, + title = "3a. Fractured \u2014 Raster", + subtitle = "Coast + 5 lakes | \u03c3 = 0.5") + + plot_mask(m3_k, coords_frac, + title = "3b. Fractured \u2014 KDE", + subtitle = "Density-based boundary", + fill = "#27ae60", border = "#1e8449") + + plot_annotation(title = "Section 3: Fractured Map", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +) + + +# ============================================================ +# SECTION 4 — Archipelago +# ============================================================ +cat("Section 4: Archipelago...\n") + +# Three disconnected islands; domain width ~ 16 coord units. +# raster_sigma = 0.4 coord units +# (previously 7 grid cells at res=450 over domain=17.6 → 7*(17.6/450) = 0.27) +mk_isl <- function(n, cx, cy, rx, ry, ns = 0.15) { + th <- runif(n, 0, 2 * pi); r <- sqrt(runif(n)) + data.frame(x = cx + r * rx * cos(th) + rnorm(n, 0, ns), + y = cy + r * ry * sin(th) + rnorm(n, 0, ns)) +} +coords_arch <- rbind(mk_isl(300, 0, 0, 3, 2), + mk_isl(300, 9, 4, 2, 3), + mk_isl(300, 5, -7, 2.5, 1.5)) + +m4_r <- fit_spatial_mask(coords_arch, method = "raster", + raster_resolution = 256L, raster_sigma = 0.4, raster_threshold = 0.18, + verbose = FALSE) +m4_c <- fit_spatial_mask(coords_arch, method = "convex", verbose = FALSE) + +n_geom <- length(sf::st_cast(m4_r, "POLYGON")) +print( + plot_mask(m4_r, coords_arch, + title = "4a. Archipelago \u2014 Raster", + subtitle = sprintf("%d disconnected polygons \u2713", n_geom)) + + plot_mask(m4_c, coords_arch, + title = "4b. Archipelago \u2014 Convex", + subtitle = "[bridges islands]", + fill = "#e74c3c", border = "#922b21") + + plot_annotation(title = "Section 4: Archipelago", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +) + + +# ============================================================ +# SECTION 5 — Nested crescent voids +# ============================================================ +cat("Section 5: Nested voids...\n") + +# Two crescent-shaped voids inside a circle of radius 10. +# Domain width ~ 20 coord units. +# fine sigma = 0.4 (previously 8 gc at res=500 over domain=22 → 0.35) +# coarse sigma = 1.0 (previously 20 gc → 0.88) +mk_cres <- function(cx, cy, ro, ri, dx, dy, n = 100) { + moc <- .circle_mat(cx, cy, ro, n) + mic <- .circle_mat(cx+dx, cy+dy, ri, n) + oc <- sf::st_polygon(list(moc)) + ic <- sf::st_polygon(list(mic)) + sf::st_sfc(sf::st_difference(sf::st_sfc(oc), sf::st_sfc(ic))) +} + +th_od <- seq(0, 2 * pi, length.out = 200) +mat_od <- cbind(10 * cos(th_od), 10 * sin(th_od)) +mat_od <- rbind(mat_od, mat_od[1L, ]) # explicit ring closure +outer_d <- sf::st_sfc(sf::st_polygon(list(mat_od))) +cres1 <- mk_cres(-2, 2, 2.5, 2, 0.9, -0.2) +cres2 <- mk_cres( 3, -3, 2.2, 1.8, -0.7, 0.4) +nest_region <- sf::st_difference(outer_d, sf::st_union(c(cres1, cres2))) +coords_nested <- sample_in_polygon(nest_region, n = 2500) + +m5_fine <- fit_spatial_mask(coords_nested, method = "raster", + raster_resolution = 256L, raster_sigma = 0.4, raster_threshold = 0.18, + verbose = FALSE) +m5_coarse <- fit_spatial_mask(coords_nested, method = "raster", + raster_resolution = 256L, raster_sigma = 1.0, raster_threshold = 0.18, + verbose = FALSE) + +print( + plot_mask(m5_fine, coords_nested, + title = "5a. Nested \u2014 fine \u03c3 = 0.4", + subtitle = "Crescent voids preserved") + + plot_mask(m5_coarse, coords_nested, + title = "5b. Nested \u2014 coarse \u03c3 = 1.0", + subtitle = "Narrow voids filled", + fill = "#8e44ad", border = "#6c3483") + + plot_annotation( + title = "Section 5: Nested Crescent Voids", + subtitle = "Smaller \u03c3 \u2192 finer void detection", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +) + + +# ============================================================ +# SECTION 6 — Sigma sweep (coordinate units) +# ============================================================ +cat("Section 6: Sigma sweep...\n") + +# Swiss cheese domain ~ 10 coord units. +# Sweep spans: tight (holes preserved) → loose (holes filled). +# Values in COORDINATE UNITS: c(0.1, 0.2, 0.4, 0.7) +# Equivalent old grid-cell values at res=400, domain=11: +# c(3.6, 7.3, 14.5, 25.5) +sig_vals <- c(0.1, 0.2, 0.4, 0.7) +sig_cols <- c("#e74c3c", "#e67e22", "#27ae60", "#2980b9") + +sig_pls <- mapply(function(sv, col) { + m <- fit_spatial_mask(coords_swiss, method = "raster", + raster_resolution = 256L, raster_sigma = sv, raster_threshold = 0.2, + verbose = FALSE) + nh <- max(0L, length(sf::st_cast(m, "POLYGON")) - 1L) + plot_mask(m, coords_swiss, + title = paste0("\u03c3 = ", sv, " cu"), + subtitle = paste0(nh, " hole(s) detected"), + fill = col, border = col, pt_size = 0.5) +}, sig_vals, sig_cols, SIMPLIFY = FALSE) + +print(wrap_plots(sig_pls, nrow = 1) + + plot_annotation( + title = "Section 6: raster_sigma Sensitivity \u2014 Swiss Cheese", + subtitle = "All values in coordinate units | Small \u03c3 \u2192 holes preserved | Large \u03c3 \u2192 holes filled", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +) + + +# ============================================================ +# SECTION 7 — 100k scale test +# ============================================================ +cat("Section 7: 100k scale test...\n") + +# Irregular outer shape radius ~12-14, three elliptic holes. +# Domain width ~ 28 coord units. +# raster_sigma = 0.6 coord units +# (previously 10 grid cells at res=512 over domain=30.8 → 10*(30.8/512) = 0.60) +th100 <- seq(0, 2 * pi, length.out = 400) +r100 <- 12 + sin(5 * th100) + 0.4 * cos(11 * th100) +mat100 <- cbind(r100 * cos(th100), r100 * sin(th100)) +mat100 <- rbind(mat100, mat100[1L, ]) # explicit ring closure +outer100 <- sf::st_sfc(sf::st_polygon(list(mat100))) + +holes100 <- sf::st_sfc(list( + make_ellipse_poly( 3, 5, 3, 1.5, 10), + make_ellipse_poly(-5, 2, 2.5, 2, -30), + make_ellipse_poly( 1, -6, 2, 3, 50) +)) +reg100 <- sf::st_difference(outer100, sf::st_union(holes100)) +cat(" Sampling 100,000 points...\n") +coords_100k <- sample_in_polygon(reg100, n = 100000) + +t_r <- system.time( + m7_r <- fit_spatial_mask(coords_100k, method = "raster", + raster_resolution = 256L, raster_sigma = 0.6, raster_threshold = 0.18, + verbose = TRUE) +) +t_k <- system.time( + m7_k <- fit_spatial_mask(coords_100k, method = "kde", + kde_resolution = 200L, kde_threshold = 0.005, buffer_dist = 0.1, + verbose = TRUE) +) +cat(sprintf("\n Raster (256\u00d7256): %.2f s\n KDE (200\u00d7200): %.2f s\n", + t_r["elapsed"], t_k["elapsed"])) + +sub <- coords_100k[sample(nrow(coords_100k), 5000), ] +print( + plot_mask(m7_r, sub, + title = sprintf("7a. 100k \u2014 Raster (%.1f s)", t_r["elapsed"]), + subtitle = "3 holes detected \u2713", + pt_size = 0.25, pt_alpha = 0.25) + + plot_mask(m7_k, sub, + title = sprintf("7b. 100k \u2014 KDE (%.1f s)", t_k["elapsed"]), + subtitle = "Density boundary", + fill = "#27ae60", border = "#1e8449", + pt_size = 0.25, pt_alpha = 0.25) + + plot_annotation( + title = "Section 7: 100,000-Cell Scale Test", + subtitle = "5,000 of 100,000 points shown", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +) + + +# ============================================================ +# SECTION 8 — Validation table +# ============================================================ +cat("\nSection 8: Validation...\n") + +vcases <- list( + list(label = "Donut/raster", mask = m1_r, coords = coords_donut), + list(label = "Donut/convex", mask = m1_c, coords = coords_donut), + list(label = "Swiss/raster", mask = m2_r, coords = coords_swiss), + list(label = "Swiss/concave", mask = m2_c, coords = coords_swiss), + list(label = "Fractured/raster", mask = m3_r, coords = coords_frac), + list(label = "Fractured/kde", mask = m3_k, coords = coords_frac), + list(label = "Archipelago/raster", mask = m4_r, coords = coords_arch), + list(label = "Archipelago/convex", mask = m4_c, coords = coords_arch), + list(label = "Nested/fine", mask = m5_fine, coords = coords_nested), + list(label = "Nested/coarse", mask = m5_coarse, coords = coords_nested), + list(label = "100k/raster", mask = m7_r, coords = coords_100k), + list(label = "100k/kde", mask = m7_k, coords = coords_100k) +) + +res <- do.call(rbind, lapply(vcases, function(vc) { + pts <- sf::st_geometry(sf::st_as_sf(vc$coords, coords = c("x", "y"))) + ok <- sf::st_within(pts, sf::st_union(vc$mask), sparse = FALSE)[, 1L] + n <- nrow(vc$coords) + data.frame( + Case = vc$label, + N = n, + Enclosed = sum(ok), + Pct = round(100 * sum(ok) / n, 3), + Result = ifelse(sum(ok) == n, "\u2713 PASS", "\u2717 FAIL"), + stringsAsFactors = FALSE + ) +})) + +cat("\n============================================================\n") +cat(" VALIDATION: Point Containment (all must be PASS)\n") +cat("============================================================\n") +print(res, row.names = FALSE) +cat("============================================================\n") +cat("demo_holes_and_scale.R complete.\n") + diff --git a/fit_spatial_mask.R b/fit_spatial_mask.R new file mode 100644 index 0000000..a12bff7 --- /dev/null +++ b/fit_spatial_mask.R @@ -0,0 +1,371 @@ +# ============================================================ +# fit_spatial_mask.R (v3) +# +# Fits a spatial boundary mask to XY point coordinates. +# Methods: "convex" | "concave" | "kde" | "raster" +# +# RASTER METHOD (v3 rewrite) +# --------------------------- +# Previously: Gaussian smooth → isoband contour → manual hole assembly. +# Problem: isoband winding is inconsistent; manual hole assembly +# produced bowtie artefacts and diagonal fill lines. +# +# Now: Gaussian smooth → binary threshold → union of "on" grid cells. +# Key insight: unioning axis-aligned rectangles via GEOS dissolve always +# produces valid topology. Holes and islands emerge naturally — +# no ring-winding logic needed at all. +# +# KEY PARAMETER CHANGE (v3): +# raster_sigma is now in COORDINATE UNITS (same units as x/y), +# not grid-cell units. This makes it directly interpretable: +# "how wide is the Gaussian around each point?" +# NULL → auto = 3 % of domain width. +# +# install.packages(c("sf", "concaveman", "MASS", "isoband", "ggplot2")) +# ============================================================ + + +# ---- Internal helpers -------------------------------------------------------- + +.signed_area <- function(ring_mat) { + n <- nrow(ring_mat); if (n < 3L) return(0) + x <- ring_mat[, 1L]; y <- ring_mat[, 2L] + 0.5 * sum(x[seq_len(n - 1L)] * y[seq_len(n - 1L) + 1L] - + x[seq_len(n - 1L) + 1L] * y[seq_len(n - 1L)]) +} + +.gaussian_kernel_1d <- function(sigma) { + r <- ceiling(3 * sigma) + k <- exp(-((-r):r)^2 / (2 * sigma^2)) + k / sum(k) +} + +.smooth_rows <- function(mat, k) { + t(apply(mat, 1L, function(row) { + s <- stats::filter(row, k, method = "convolution", sides = 2L) + s[is.na(s)] <- 0; as.numeric(s) + })) +} + +.smooth_cols <- function(mat, k) { + apply(mat, 2L, function(col) { + s <- stats::filter(col, k, method = "convolution", sides = 2L) + s[is.na(s)] <- 0; as.numeric(s) + }) +} + +# Separable 2-D Gaussian convolution. sigma is in GRID-CELL units. +.gaussian_smooth_2d <- function(mat, sigma, n_cores = 1L) { + k <- .gaussian_kernel_1d(sigma) + if (n_cores > 1L && .Platform$OS.type == "unix") { + row_list <- parallel::mclapply(seq_len(nrow(mat)), function(i) { + s <- stats::filter(mat[i, ], k, method = "convolution", sides = 2L) + s[is.na(s)] <- 0; as.numeric(s) + }, mc.cores = n_cores) + mat <- do.call(rbind, row_list) + } else { + mat <- .smooth_rows(mat, k) + } + .smooth_cols(mat, k) +} + +# Used only by the "kde" method (isoband-based). +.build_sf_with_holes <- function(bands, crs) { + rings <- list() + for (band in bands) { + if (length(band$x) == 0L) next + ids <- if (!is.null(band$id)) band$id else rep(1L, length(band$x)) + for (uid in unique(ids[!is.na(ids)])) { + sel <- !is.na(ids) & ids == uid + rx <- band$x[sel]; ry <- band$y[sel] + if (length(rx) < 3L) next + if (rx[1L] != rx[length(rx)] || ry[1L] != ry[length(ry)]) { + rx <- c(rx, rx[1L]); ry <- c(ry, ry[1L]) + } + mat <- cbind(rx, ry) + area <- abs(.signed_area(mat)) + rings <- c(rings, list(list(mat = mat, area = area))) + } + } + if (length(rings) == 0L) + stop("No rings extracted. Adjust kde_threshold / kde_bandwidth parameters.") + ord <- order(sapply(rings, `[[`, "area"), decreasing = TRUE) + rings <- rings[ord] + polys <- lapply(rings, function(r) { + mat <- rbind(r$mat, r$mat[1L, , drop = FALSE]) + dupl <- c(FALSE, rowSums(abs(diff(mat))) == 0) + sf::st_sfc(sf::st_polygon(list(mat[!dupl, , drop = FALSE])), crs = crs) + }) + result <- polys[[1L]] + if (length(polys) > 1L) { + for (i in seq(2L, length(polys))) { + inside <- tryCatch( + isTRUE(sf::st_within(polys[[i]], result, sparse = FALSE)[1L, 1L]), + error = function(e) FALSE + ) + result <- if (inside) sf::st_difference(result, polys[[i]]) + else sf::st_union(result, polys[[i]]) + } + } + sf::st_make_valid(result) +} + + +# ---- Main function ----------------------------------------------------------- + +fit_spatial_mask <- function( + coords, + + # Method + method = "raster", # "convex" | "concave" | "kde" | "raster" + + # Concave-hull parameters + concavity = 2, + length_threshold = 0, + + # KDE parameters + kde_bandwidth = NULL, + kde_threshold = 0.05, + kde_resolution = 256L, + + # Raster parameters + # raster_sigma: Gaussian spread in COORDINATE UNITS (same as x/y). + # NULL → auto = 3 % of domain width. + # Larger sigma → holes fill in, islands merge, boundary smooths. + # Smaller sigma → holes and fine gaps are preserved. + raster_resolution = 256L, + raster_sigma = NULL, + raster_threshold = 0.15, + raster_min_pts = 1L, + + # Post-processing + buffer_dist = 0, + smooth_mask = FALSE, + smooth_tolerance = NULL, + + # Misc + n_cores = 1L, + crs = NA, + plot = FALSE, + verbose = TRUE +) { + + # ---- 0. Package checks ----------------------------------------------------- + req <- c("sf", "concaveman", "MASS", "isoband") + miss <- req[!sapply(req, requireNamespace, quietly = TRUE)] + if (length(miss) > 0L) + stop("Install missing packages: install.packages(c(", + paste0('"', miss, '"', collapse = ", "), "))") + + # ---- 0b. Normalise CRS (sf::st_sfc rejects bare NA in sf >= 1.0) ---------- + crs <- if (length(crs) == 1L && is.na(crs)) sf::NA_crs_ else crs + + # ---- 1. Parse and validate coordinates ------------------------------------ + if (is.matrix(coords)) coords <- as.data.frame(coords) + if (!is.data.frame(coords)) stop("`coords` must be a data.frame or matrix.") + if (!all(c("x", "y") %in% names(coords))) { + if (ncol(coords) >= 2L) { + coords <- coords[, 1:2]; names(coords) <- c("x", "y") + } else stop("Need columns 'x' and 'y'.") + } + coords <- coords[complete.cases(coords[, c("x", "y")]), c("x", "y")] + n_pts <- nrow(coords) + if (n_pts < 3L) stop("Need at least 3 coordinate pairs.") + if (n_pts > 50000L && method == "kde" && verbose) + message("Large n = ", n_pts, " — consider method = 'raster'.") + + # ---- 2. Fit mask ----------------------------------------------------------- + mask_poly <- switch(method, + + # -- Convex hull ------------------------------------------------------------ + "convex" = { + pts <- sf::st_as_sf(coords, coords = c("x", "y"), crs = crs) + sf::st_convex_hull(sf::st_union(pts)) + }, + + # -- Concave hull ----------------------------------------------------------- + "concave" = { + hp <- concaveman::concaveman(as.matrix(coords), + concavity = concavity, + length_threshold = length_threshold) + sf::st_sfc(sf::st_polygon(list(hp)), crs = crs) + }, + + # -- KDE contour (supports holes, slower for large n) ---------------------- + "kde" = { + bw_x <- if (!is.null(kde_bandwidth)) kde_bandwidth[1L] else + MASS::bandwidth.nrd(coords$x) + bw_y <- if (!is.null(kde_bandwidth)) { + if (length(kde_bandwidth) == 2L) kde_bandwidth[2L] else kde_bandwidth[1L] + } else MASS::bandwidth.nrd(coords$y) + xp <- diff(range(coords$x)) * 0.1 + yp <- diff(range(coords$y)) * 0.1 + kf <- MASS::kde2d(coords$x, coords$y, h = c(bw_x, bw_y), + n = kde_resolution, + lims = c(range(coords$x) + c(-xp, xp), + range(coords$y) + c(-yp, yp))) + dv <- as.vector(kf$z) + bands <- isoband::isobands(kf$x, kf$y, kf$z, + levels_low = quantile(dv, kde_threshold), + levels_high = max(dv) + 1) + .build_sf_with_holes(bands, crs) + }, + + # -- Raster: Gaussian sum → threshold → cell union ------------------------- + # + # Algorithm: + # 1. Bin points onto a regular grid (raster_resolution x raster_resolution). + # 2. Convolve the binary occupancy grid with a 2-D Gaussian of width + # raster_sigma (coordinate units). Equivalent to placing an isotropic + # Gaussian at every point and summing them all. + # 3. Threshold at raster_threshold x max(field). Cells above threshold + # are "inside"; cells below are "outside". + # 4. Convert each "inside" cell to an axis-aligned rectangle and dissolve + # via GEOS union. Holes and islands emerge from the geometry naturally + # — no ring-winding logic required. + # 5. Smooth the staircase boundary with a morphological close + # (buffer out then in by ~half the cell diagonal). + # + # Tuning guide: + # raster_sigma up → holes fill in, islands merge, edges smoother + # raster_sigma down → holes preserved, fine structure retained + # raster_threshold up → mask shrinks (requires denser coverage) + # raster_threshold down → mask grows (accepts sparse regions) + "raster" = { + + # Grid cell edges + xpad <- diff(range(coords$x)) * 0.05 + ypad <- diff(range(coords$y)) * 0.05 + xb <- seq(min(coords$x) - xpad, max(coords$x) + xpad, + length.out = raster_resolution + 1L) + yb <- seq(min(coords$y) - ypad, max(coords$y) + ypad, + length.out = raster_resolution + 1L) + hx <- xb[2L] - xb[1L] # cell width (coordinate units) + hy <- yb[2L] - yb[1L] # cell height (coordinate units) + + # Bin points: cm[yi, xi] = point count at cell (xi, yi) + xi <- pmax(1L, pmin(raster_resolution, + findInterval(coords$x, xb, rightmost.closed = TRUE))) + yi <- pmax(1L, pmin(raster_resolution, + findInterval(coords$y, yb, rightmost.closed = TRUE))) + cv <- tabulate(yi + (xi - 1L) * raster_resolution, + nbins = raster_resolution^2L) + cm <- matrix(cv, nrow = raster_resolution, ncol = raster_resolution) + + # Convert raster_sigma from coordinate units to grid-cell units + domain_w <- diff(range(coords$x)) + 2 * xpad + sigma_cu <- if (is.null(raster_sigma)) 0.03 * domain_w else raster_sigma + sigma_gc <- sigma_cu / hx # grid-cell units for the convolution kernel + + if (verbose) + message(" raster_sigma = ", round(sigma_cu, 4), + " coord units (", round(sigma_gc, 1), " grid cells)") + + # Convolve binary occupancy with Gaussian — sum of point Gaussians + indicator <- (cm >= raster_min_pts) * 1.0 + sm <- .gaussian_smooth_2d(indicator, sigma = sigma_gc, + n_cores = n_cores) + + # Threshold → binary + thresh <- raster_threshold * max(sm, na.rm = TRUE) + binary <- sm >= thresh + on_cells <- which(binary, arr.ind = TRUE) # [row = yi, col = xi] + + if (nrow(on_cells) == 0L) + stop("No grid cells above threshold. Try: lower raster_threshold, ", + "increase raster_sigma, or increase raster_resolution.") + + # Build one closed rectangle per on-cell + ri <- on_cells[, 1L] # y indices + ci <- on_cells[, 2L] # x indices + rects <- lapply(seq_len(nrow(on_cells)), function(k) { + sf::st_polygon(list(matrix( + c(xb[ci[k]], yb[ri[k]], + xb[ci[k] + 1L], yb[ri[k]], + xb[ci[k] + 1L], yb[ri[k] + 1L], + xb[ci[k]], yb[ri[k] + 1L], + xb[ci[k]], yb[ri[k]]), # close ring + ncol = 2L, byrow = TRUE + ))) + }) + + # GEOS dissolve — topology (holes, islands) emerges automatically + raw <- sf::st_union(sf::st_sfc(rects, crs = crs)) + + # Morphological close: smooth staircase boundary. + # Buffer out then in by ~half the cell diagonal. + cell_diag <- sqrt(hx^2 + hy^2) + raw <- sf::st_buffer(raw, dist = cell_diag * 0.6) + raw <- sf::st_buffer(raw, dist = -cell_diag * 0.6) + + sf::st_make_valid(raw) + }, + + stop("Unknown method '", method, "'. Choose: convex, concave, kde, raster.") + ) + + if (!inherits(mask_poly, "sfc")) mask_poly <- sf::st_geometry(mask_poly) + + # ---- 3. Optional buffer ---------------------------------------------------- + if (buffer_dist > 0) + mask_poly <- sf::st_buffer(mask_poly, dist = buffer_dist) + + # ---- 4. Containment guarantee ---------------------------------------------- + pts_sfc <- sf::st_geometry( + sf::st_as_sf(coords, coords = c("x", "y"), crs = crs)) + contained <- sf::st_within(pts_sfc, sf::st_union(mask_poly), sparse = FALSE) + n_out <- sum(!contained[, 1L]) + if (n_out > 0L) { + warning(n_out, " point(s) outside mask — applying corrective buffer.") + dists <- sf::st_distance(pts_sfc[!contained[, 1L]], + sf::st_union(mask_poly)) + mask_poly <- sf::st_buffer(mask_poly, + dist = max(as.numeric(dists)) * 1.05) + } + + # ---- 5. Optional boundary smoothing ---------------------------------------- + if (smooth_mask) { + if (is.null(smooth_tolerance)) { + bb <- sf::st_bbox(mask_poly) + smooth_tolerance <- sqrt((bb["xmax"] - bb["xmin"])^2 + + (bb["ymax"] - bb["ymin"])^2) * 0.01 + } + mask_poly <- sf::st_simplify(mask_poly, dTolerance = smooth_tolerance) + } + sf::st_crs(mask_poly) <- crs + + # ---- 6. Verbose summary ---------------------------------------------------- + if (verbose) { + bb <- sf::st_bbox(mask_poly) + area <- suppressWarnings(as.numeric(sf::st_area(mask_poly))) + cat("--------------------------------------------------\n") + cat(" Spatial mask fitted (v3)\n") + cat(" Method :", method, "\n") + cat(" Points :", n_pts, "\n") + cat(" Sub-geometries :", length(mask_poly), "\n") + cat(" Bounding box : x [", round(bb["xmin"], 3), ",", + round(bb["xmax"], 3), "] y [", + round(bb["ymin"], 3), ",", round(bb["ymax"], 3), "]\n") + cat(" Area :", round(area, 4), "\n") + cat(" Buffer applied :", buffer_dist, "\n") + cat("--------------------------------------------------\n") + } + + # ---- 7. Optional quick plot ------------------------------------------------ + if (plot && requireNamespace("ggplot2", quietly = TRUE)) { + mask_sf <- sf::st_sf(geometry = mask_poly) + print(ggplot2::ggplot() + + ggplot2::geom_sf( + data = mask_sf, + fill = "#4c9be8", alpha = 0.2, color = "#1a5fa8", linewidth = 0.55) + + ggplot2::geom_point( + data = coords, ggplot2::aes(x = x, y = y), + color = "#c0392b", size = 0.7, alpha = 0.5) + + ggplot2::coord_sf(datum = NA) + + ggplot2::labs(title = paste0("Mask | method='", method, "'"), + x = "X", y = "Y") + + ggplot2::theme_minimal(base_size = 11)) + } + + return(mask_poly) +} diff --git a/vignette_fit_spatial_mask.Rmd b/vignette_fit_spatial_mask.Rmd new file mode 100644 index 0000000..aac1ac2 --- /dev/null +++ b/vignette_fit_spatial_mask.Rmd @@ -0,0 +1,750 @@ +--- +title: "Fitting Spatial Masks to Point Coordinate Data" +subtitle: "A guide to `fit_spatial_mask()`: methods, parameters, and tuning" +author: "Raredon Laboratory · Yale School of Medicine" +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_float: + collapsed: false + smooth_scroll: true + toc_depth: 3 + theme: flatly + highlight: kate + code_folding: show + fig_width: 10 + fig_height: 5 + df_print: kable +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + warning = FALSE, + message = FALSE, + fig.align = "center", + cache = FALSE +) +``` + +--- + +# Overview + +In spatial transcriptomics and spatial proteomics experiments, the tissue of interest occupies some irregular region of a slide. Before computing any spatially-resolved quantity (cell-type composition, signaling gradients, niche structure), you need a reliable **spatial mask**: a polygon or multi-polygon that faithfully represents the shape of the tissue, including internal voids (vessel lumens, necrotic cores, tissue tears) and disconnected fragments. + +`fit_spatial_mask()` takes a set of XY point coordinates — typically cell centroids or transcript locations — and fits a mask polygon to them. It supports four methods of increasing complexity: + +| Method | Topology | Speed | Best for | +|---|---|---|---| +| `"convex"` | No holes, no islands | Instant | Quick sanity checks | +| `"concave"` | No holes, no islands | Fast | Simple non-convex shapes | +| `"kde"` | Holes and islands | Moderate | Dense, smooth datasets | +| `"raster"` | Holes and islands | Fast at scale | Most use cases; recommended default | + +--- + +# Setup + +Place `fit_spatial_mask.R` in your working directory, then source it. The packages below are required. + +```{r libraries} +library(sf) +library(ggplot2) +library(patchwork) +source("fit_spatial_mask.R") + +set.seed(42) +``` + +--- + +# Shared plotting helper + +Throughout this vignette we use a single `plot_mask()` helper. The key implementation note is that it uses `geom_sf()` rather than `geom_polygon()`. This is essential: `geom_polygon()` draws a line between the last vertex of the exterior ring and the first vertex of each interior (hole) ring, producing diagonal artefacts. `geom_sf()` delegates rendering to the sf/GEOS layer which handles holes correctly. + +```{r plot-helper} +plot_mask <- function(mask, coords, title = "", subtitle = "", + fill = "#4c9be8", border = "#1a5fa8", + pt_col = "#c0392b", pt_size = 1.2, pt_alpha = 0.55) { + mask_sf <- sf::st_sf(geometry = mask) + ggplot() + + geom_sf(data = mask_sf, fill = fill, alpha = 0.2, + color = border, linewidth = 0.55) + + geom_point(data = coords, aes(x = x, y = y), + color = pt_col, size = pt_size, alpha = pt_alpha) + + coord_sf(datum = NA) + # suppress geographic graticule + labs(title = title, subtitle = subtitle, x = "X", y = "Y") + + theme_minimal(base_size = 9.5) + + theme(plot.title = element_text(face = "bold", size = 9.5), + plot.subtitle = element_text(size = 8, color = "grey40"), + panel.grid = element_line(color = "grey93")) +} +``` + +--- + +# Function reference: `fit_spatial_mask()` + +```r +fit_spatial_mask( + coords, + + # Method selection + method = "raster", # "convex" | "concave" | "kde" | "raster" + + # Concave hull + concavity = 2, # lower = tighter; higher = approaches convex + length_threshold = 0, + + # KDE + kde_bandwidth = NULL, # NULL = auto (bandwidth.nrd) + kde_threshold = 0.05, # quantile of density to threshold at + kde_resolution = 256L, + + # Raster (recommended) + raster_resolution = 256L, # grid cells per axis + raster_sigma = NULL, # Gaussian spread in COORDINATE UNITS; NULL = 3% of domain + raster_threshold = 0.15, # fraction of peak Gaussian field to threshold at + raster_min_pts = 1L, # min points per cell to count as occupied + + # Post-processing + buffer_dist = 0, # expand mask outward by this distance + smooth_mask = FALSE, # simplify boundary (st_simplify) + smooth_tolerance = NULL, # NULL = 1% of bounding box diagonal + + # Misc + n_cores = 1L, + crs = NA, + plot = FALSE, + verbose = TRUE +) +``` + +## Return value + +`fit_spatial_mask()` returns an `sfc` geometry object (the `sf` package's geometry column class). You can: + +- Pass it directly to `estimate_concentration_field()` as the `mask` argument +- Plot it with `geom_sf()` +- Compute area with `sf::st_area()` +- Test point containment with `sf::st_within()` +- Convert to a data frame with `as.data.frame(sf::st_coordinates(mask))` + +--- + +# Section 1 — Circular cloud: sanity check + +The simplest possible test: a Gaussian cloud with no special topology. All three non-trivial methods should return similar results here. This section establishes that the function runs and that the three main methods agree on simple inputs. + +```{r sec1-data} +n <- 400 +coords_circle <- data.frame(x = rnorm(n, 0, 1), + y = rnorm(n, 0, 1)) +``` + +```{r sec1-fit} +m1a <- fit_spatial_mask(coords_circle, method = "convex", verbose = FALSE) +m1b <- fit_spatial_mask(coords_circle, method = "concave", concavity = 2, + verbose = FALSE) +m1c <- fit_spatial_mask(coords_circle, method = "kde", + kde_threshold = 0.02, buffer_dist = 0.1, + verbose = FALSE) +``` + +```{r sec1-plot, fig.height=4} +plot_mask(m1a, coords_circle, title = "1a. Convex hull", + subtitle = "method='convex'") + +plot_mask(m1b, coords_circle, title = "1b. Concave hull", + subtitle = "concavity=2") + +plot_mask(m1c, coords_circle, title = "1c. KDE contour", + subtitle = "kde_threshold=0.02, buffer=0.1", + fill = "#27ae60", border = "#1e8449") + +plot_annotation( + title = "Section 1: Circular Cloud — Sanity Check", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**What to look for:** All three masks should closely envelope the point cloud. Small differences near the boundary are expected — convex gives a polygon, KDE gives a smoother contour. Any large discrepancy here indicates a parameter problem. + +--- + +# Section 2 — Elongated cloud: anisotropy + +Elongated tissue (e.g. a muscle section, a linear airway epithelium) has very different extents along its two axes. All methods should handle this naturally; this section confirms that no method is implicitly assuming isotropy. + +```{r sec2} +coords_elong <- data.frame(x = rnorm(n, 0, 3), + y = rnorm(n, 0, 0.6)) + +m2a <- fit_spatial_mask(coords_elong, method = "convex", verbose = FALSE) +m2b <- fit_spatial_mask(coords_elong, method = "concave", verbose = FALSE) +m2c <- fit_spatial_mask(coords_elong, method = "kde", + kde_threshold = 0.02, buffer_dist = 0.1, + verbose = FALSE) +``` + +```{r sec2-plot, fig.height=4} +plot_mask(m2a, coords_elong, title = "2a. Convex") + +plot_mask(m2b, coords_elong, title = "2b. Concave") + +plot_mask(m2c, coords_elong, title = "2c. KDE", + fill = "#27ae60", border = "#1e8449") + +plot_annotation( + title = "Section 2: Elongated Cloud", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**What to look for:** The mask should track the long axis of the cloud, not default to a circle. The KDE bandwidth is computed separately for x and y (`MASS::bandwidth.nrd`), so it adapts to anisotropy automatically. + +--- + +# Section 3 — L-shaped cloud: the non-convex diagnostic + +This is the most important basic test. The L-shape is the canonical non-convex case: the convex hull incorrectly fills the concave corner of the L, including empty space that contains no tissue. A good mask must track the actual boundary and exclude the corner. + +> **Rule of thumb:** If your tissue has any re-entrant angles (C-shapes, crescents, airways, arcs), do not use `method = "convex"`. Use `method = "concave"` or `method = "raster"`. + +```{r sec3-data} +coords_L <- rbind( + data.frame(x = runif(200, 0, 5) + rnorm(200, 0, 0.05), + y = runif(200, 0, 1) + rnorm(200, 0, 0.05)), # horizontal bar + data.frame(x = runif(200, 0, 1) + rnorm(200, 0, 0.05), + y = runif(200, 0, 5) + rnorm(200, 0, 0.05))) # vertical bar +``` + +```{r sec3-fit} +m3a <- fit_spatial_mask(coords_L, method = "convex", verbose = FALSE) +m3b <- fit_spatial_mask(coords_L, method = "concave", concavity = 1.5, verbose = FALSE) +m3c <- fit_spatial_mask(coords_L, method = "concave", concavity = 3.0, verbose = FALSE) +``` + +```{r sec3-plot, fig.height=4} +plot_mask(m3a, coords_L, + title = "3a. Convex hull", + subtitle = "Incorrectly includes empty corner", + fill = "#e74c3c", border = "#922b21") + +plot_mask(m3b, coords_L, + title = "3b. Concave, tight (1.5)", + subtitle = "Correctly excludes corner") + +plot_mask(m3c, coords_L, + title = "3c. Concave, relaxed (3.0)", + subtitle = "Partially includes corner", + fill = "#8e44ad", border = "#6c3483") + +plot_annotation( + title = "Section 3: L-Shaped Cloud — Non-Convex", + subtitle = "Lower concavity wraps more tightly; convex hull fails", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +### The `concavity` parameter + +`concavity` controls how aggressively the hull "cuts in" to follow the data boundary. It is the key tuning parameter for `method = "concave"`. + +| Value | Behaviour | +|---|---| +| 1.0–1.5 | Very tight — tracks the data boundary closely | +| 2.0 | Default — good balance for most tissue shapes | +| 3.0–5.0 | Relaxed — approaches the convex hull | + +**When to use lower values:** C-shapes, crescents, U-shapes, any tissue with a notch or bay. **When to use higher values:** Dense blobs where you want a smooth, padded boundary. If you see the mask cutting through the interior of your tissue, increase `concavity`. If it fills in empty space at concavities, decrease it. + +--- + +# Section 4 — Multi-cluster blobs: connected vs. isolated masks + +When tissue fragments are well-separated, a low `kde_threshold` will produce a single mask that bridges them (because the KDE density never drops to zero between clusters at low thresholds). A higher threshold separates them into distinct polygons. + +The raster method's `raster_sigma` plays the analogous role: a large sigma bridges clusters; a small sigma isolates them. + +```{r sec4} +mk_cluster <- function(n, cx, cy, sd = 0.4) + data.frame(x = rnorm(n, cx, sd), y = rnorm(n, cy, sd)) + +coords_multi <- rbind(mk_cluster(150, 0, 0), + mk_cluster(150, 4, 1), + mk_cluster(150, 2, 4)) + +m4a <- fit_spatial_mask(coords_multi, method = "convex", verbose = FALSE) +m4b <- fit_spatial_mask(coords_multi, method = "concave", concavity = 2, + verbose = FALSE) +m4c <- fit_spatial_mask(coords_multi, method = "kde", + kde_threshold = 0.01, buffer_dist = 0.15, + verbose = FALSE) +m4d <- fit_spatial_mask(coords_multi, method = "kde", + kde_threshold = 0.25, buffer_dist = 0.15, + verbose = FALSE) +``` + +```{r sec4-plot, fig.height=7} +(plot_mask(m4a, coords_multi, title = "4a. Convex") + + plot_mask(m4b, coords_multi, title = "4b. Concave")) / +(plot_mask(m4c, coords_multi, + title = "4c. KDE — inclusive", + subtitle = "threshold=0.01: merged", + fill = "#27ae60", border = "#1e8449") + + plot_mask(m4d, coords_multi, + title = "4d. KDE — tight", + subtitle = "threshold=0.25: separated", + fill = "#8e44ad", border = "#6c3483")) + +plot_annotation( + title = "Section 4: Multi-Cluster", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +### Choosing `kde_threshold` + +`kde_threshold` is expressed as a quantile of the KDE density values across the grid. + +- Low values (0.01–0.05): the contour is drawn far out in the low-density tails. Clusters merge; mask is generous. +- High values (0.15–0.30): only the dense core is enclosed. Clusters separate; mask is tight. + +**Practical advice:** Start at 0.05 and increase until the mask matches your visual inspection. If you expect biologically separated regions (e.g. two lobes), a threshold that separates them is usually correct. + +--- + +# Section 5 — Concavity sweep on a crescent + +A crescent is a severe test of the concave hull: it has a large, deep concavity spanning most of the shape. This sweep lets you calibrate the `concavity` parameter on a shape where the expected correct answer is obvious. + +```{r sec5} +theta <- seq(0.2, pi - 0.2, length.out = 300) +coords_cres <- data.frame( + x = cos(theta) * (1 + rnorm(300, 0, 0.08)) * 3, + y = sin(theta) * (1 + rnorm(300, 0, 0.08)) * 3 - cos(theta) * 1.5) + +pal <- c("#e74c3c", "#e67e22", "#27ae60", "#2980b9") +sw <- mapply(function(cv, col) { + m <- fit_spatial_mask(coords_cres, method = "concave", + concavity = cv, verbose = FALSE) + plot_mask(m, coords_cres, title = paste0("concavity = ", cv), + fill = col, border = col) +}, c(1.2, 2, 3, 5), pal, SIMPLIFY = FALSE) +``` + +```{r sec5-plot, fig.height=4} +wrap_plots(sw, nrow = 1) + + plot_annotation( + title = "Section 5: Concavity Sweep — Crescent", + subtitle = "Lower = tighter | Higher = approaches convex hull", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +**What to look for:** At `concavity = 1.2`, the hull cuts tightly around the arc. At `concavity = 5`, it is nearly convex and fills the interior. The "correct" value for real tissue is the one that matches a human drawing of the tissue boundary. For a crescent-shaped airway section, `1.2–1.5` is usually appropriate. + +--- + +# Section 6 — Buffer and smoothing + +After fitting, two post-processing options are available: + +- **`buffer_dist`**: expands the mask outward by this distance in coordinate units using `sf::st_buffer()`. Useful when you want a small safety margin to ensure all cells fall inside the mask, or when points are sparse near the boundary. +- **`smooth_mask`**: applies `sf::st_simplify()` to reduce the vertex count and round jagged boundary segments. Controlled by `smooth_tolerance` (default: 1% of bounding box diagonal). + +```{r sec6} +r <- sqrt(runif(250, 0.4^2, 1^2)) +ang <- runif(250, 0, 2 * pi) +coords_ring <- data.frame(x = r * cos(ang) * 5, + y = r * sin(ang) * 5) + +m6a <- fit_spatial_mask(coords_ring, method = "concave", concavity = 2, + buffer_dist = 0, smooth_mask = FALSE, verbose = FALSE) +m6b <- fit_spatial_mask(coords_ring, method = "concave", concavity = 2, + buffer_dist = 0.3, smooth_mask = FALSE, verbose = FALSE) +m6c <- fit_spatial_mask(coords_ring, method = "concave", concavity = 2, + buffer_dist = 0.3, smooth_mask = TRUE, + smooth_tolerance = 0.15, verbose = FALSE) +``` + +```{r sec6-plot, fig.height=4} +plot_mask(m6a, coords_ring, + title = "6a. Raw — no buffer, no smooth") + +plot_mask(m6b, coords_ring, + title = "6b. Buffer = 0.3", + fill = "#27ae60", border = "#1e8449") + +plot_mask(m6c, coords_ring, + title = "6c. Buffer + smooth", + subtitle = "smooth_tolerance = 0.15", + fill = "#8e44ad", border = "#6c3483") + +plot_annotation( + title = "Section 6: Buffer and Smoothing", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**When to use each:** + +- `buffer_dist`: use when downstream point-in-mask tests are failing for points you know should be inside (e.g. points right on the boundary). A value of 1–5% of the domain width is usually safe. +- `smooth_mask`: use when you want a visually clean boundary for publication figures, or when a jagged boundary is causing downstream geometric operations to be slow or numerically unstable. Be careful: large `smooth_tolerance` values can erase real tissue features. + +--- + +# Section 7 — Topologically complex cases: holes and islands + +The `"raster"` method is the recommended approach for masks that need to represent **holes** (vessel lumens, necrotic cores, tissue tears) or **disconnected islands**. It is also the fastest method for large point clouds (100k+ cells). + +## How the raster method works + +1. **Bin** all points onto a regular `raster_resolution × raster_resolution` grid. +2. **Convolve** the binary occupancy grid with a 2D isotropic Gaussian of width `raster_sigma` (in coordinate units). This is equivalent to placing a Gaussian "blob" at every point and summing — the value at each cell represents how much point density is nearby. +3. **Threshold** at `raster_threshold × max(field)`. Cells above threshold are "inside"; cells below are "outside". +4. **Build** one axis-aligned rectangle per inside cell, then **dissolve** all rectangles with `sf::st_union()`. GEOS handles the topology automatically — holes and islands emerge from the geometry without any ring-winding logic. +5. **Smooth** the staircase boundary with a morphological close (buffer out then in by ~half the cell diagonal). + +## Key tuning parameters + +| Parameter | Effect | +|---|---| +| `raster_sigma` ↑ | Holes fill in, islands merge, boundary smooths | +| `raster_sigma` ↓ | Fine holes and gaps preserved | +| `raster_threshold` ↑ | Mask shrinks; requires denser local coverage | +| `raster_threshold` ↓ | Mask grows; accepts sparse regions | +| `raster_resolution` ↑ | Finer grid; more detail; slower union step | + +> **Units note:** `raster_sigma` is in **coordinate units** (same as x and y). If your tissue coordinates span 0–10, a `raster_sigma` of 0.25 means each Gaussian blob has a standard deviation of 0.25 tissue units. `NULL` sets sigma automatically to 3% of the domain width. + +--- + +## 7a — Donut: single interior void + +```{r sec7a} +r_d <- sqrt(runif(800, 2^2, 5^2)) +th_d <- runif(800, 0, 2 * pi) +coords_donut <- data.frame(x = r_d * cos(th_d), + y = r_d * sin(th_d)) + +m7a_r <- fit_spatial_mask(coords_donut, method = "raster", + raster_resolution = 256L, raster_sigma = 0.25, raster_threshold = 0.2, + verbose = FALSE) +m7a_c <- fit_spatial_mask(coords_donut, method = "convex", verbose = FALSE) +``` + +```{r sec7a-plot, fig.height=4} +plot_mask(m7a_r, coords_donut, + title = "7a-i. Raster — hole detected ✓", + subtitle = "σ = 0.25 coord units, threshold = 0.2") + +plot_mask(m7a_c, coords_donut, + title = "7a-ii. Convex — hole filled ✗", + subtitle = "Convex hull cannot represent holes", + fill = "#e74c3c", border = "#922b21") + +plot_annotation( + title = "Donut: Single Interior Void", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +The raster method naturally detects the empty centre as a region of low convolved density, below the threshold. The convex hull cannot ever represent a hole — it is by definition the smallest convex polygon containing all the points. + +--- + +## 7b — Swiss cheese: multiple holes + +```{r sec7b} +.circle_mat <- function(cx, cy, r, n = 100) { + th <- seq(0, 2 * pi, length.out = n) + mat <- cbind(cx + r * cos(th), cy + r * sin(th)) + rbind(mat, mat[1L, ]) +} + +hdefs <- list( + list(cx = 2.5, cy = 7.5, r = 1.1), + list(cx = 7.0, cy = 6.5, r = 1.4), + list(cx = 4.0, cy = 2.5, r = 0.9), + list(cx = 8.0, cy = 2.0, r = 1.2) +) + +sample_in_polygon <- function(poly_sf, n, max_tries = 20) { + bb <- sf::st_bbox(poly_sf); mu <- sf::st_union(poly_sf) + pts <- data.frame(x = numeric(0), y = numeric(0)) + for (i in seq_len(max_tries)) { + if (nrow(pts) >= n) break + cnd <- data.frame(x = runif(n * 8, bb["xmin"], bb["xmax"]), + y = runif(n * 8, bb["ymin"], bb["ymax"])) + ok <- as.logical(sf::st_within( + sf::st_as_sf(cnd, coords = c("x","y")), mu, sparse = FALSE)[, 1L]) + pts <- rbind(pts, cnd[ok, ]) + } + pts[seq_len(min(n, nrow(pts))), ] +} + +outer_sq <- sf::st_sfc(sf::st_polygon(list( + rbind(c(0,0), c(10,0), c(10,10), c(0,10), c(0,0))))) +hcircles <- lapply(hdefs, function(h) + sf::st_polygon(list(.circle_mat(h$cx, h$cy, h$r)))) +swiss_region <- sf::st_difference(outer_sq, + sf::st_sfc(sf::st_union(sf::st_sfc(hcircles)))) +coords_swiss <- sample_in_polygon(swiss_region, n = 1500) + +m7b_r <- fit_spatial_mask(coords_swiss, method = "raster", + raster_resolution = 256L, raster_sigma = 0.25, raster_threshold = 0.2, + verbose = FALSE) +m7b_c <- fit_spatial_mask(coords_swiss, method = "concave", + concavity = 1.8, verbose = FALSE) +``` + +```{r sec7b-plot, fig.height=4} +plot_mask(m7b_r, coords_swiss, + title = "7b-i. Raster — all 4 holes detected ✓", + subtitle = "σ = 0.25, threshold = 0.2") + +plot_mask(m7b_c, coords_swiss, + title = "7b-ii. Concave — holes filled ✗", + subtitle = "Concave hull also cannot represent holes", + fill = "#e74c3c", border = "#922b21") + +plot_annotation( + title = "Swiss Cheese: Multiple Interior Holes", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +--- + +## 7c — Archipelago: disconnected islands + +```{r sec7c} +mk_isl <- function(n, cx, cy, rx, ry, ns = 0.15) { + th <- runif(n, 0, 2 * pi); r <- sqrt(runif(n)) + data.frame(x = cx + r * rx * cos(th) + rnorm(n, 0, ns), + y = cy + r * ry * sin(th) + rnorm(n, 0, ns)) +} +coords_arch <- rbind(mk_isl(300, 0, 0, 3, 2), + mk_isl(300, 9, 4, 2, 3), + mk_isl(300, 5, -7, 2.5, 1.5)) + +m7c_r <- fit_spatial_mask(coords_arch, method = "raster", + raster_resolution = 256L, raster_sigma = 0.4, raster_threshold = 0.18, + verbose = FALSE) +m7c_c <- fit_spatial_mask(coords_arch, method = "convex", verbose = FALSE) + +n_geom <- length(sf::st_cast(m7c_r, "POLYGON")) +``` + +```{r sec7c-plot, fig.height=4} +plot_mask(m7c_r, coords_arch, + title = sprintf("7c-i. Raster — %d separate polygons ✓", n_geom), + subtitle = "Each island detected independently") + +plot_mask(m7c_c, coords_arch, + title = "7c-ii. Convex — islands bridged ✗", + subtitle = "Convex hull merges all islands into one", + fill = "#e74c3c", border = "#922b21") + +plot_annotation( + title = "Archipelago: Disconnected Islands", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +Disconnected islands emerge automatically from the GEOS union step: if two groups of "on" cells are not touching, they become separate polygons within the same `sfc` object. The number of distinct polygons is accessible via `length(sf::st_cast(mask, "POLYGON"))`. + +--- + +## 7d — Sigma sensitivity sweep + +This is the most important tuning exercise. Run it on your own data before finalising `raster_sigma`. + +```{r sec7d} +sig_vals <- c(0.1, 0.2, 0.4, 0.7) +sig_cols <- c("#e74c3c", "#e67e22", "#27ae60", "#2980b9") + +sig_pls <- mapply(function(sv, col) { + m <- fit_spatial_mask(coords_swiss, method = "raster", + raster_resolution = 256L, raster_sigma = sv, + raster_threshold = 0.2, verbose = FALSE) + nh <- max(0L, length(sf::st_cast(m, "POLYGON")) - 1L) + plot_mask(m, coords_swiss, + title = paste0("σ = ", sv, " coord units"), + subtitle = paste0(nh, " hole(s) detected"), + fill = col, border = col, pt_size = 0.5) +}, sig_vals, sig_cols, SIMPLIFY = FALSE) +``` + +```{r sec7d-plot, fig.height=4} +wrap_plots(sig_pls, nrow = 1) + + plot_annotation( + title = "7d: raster_sigma Sensitivity — Swiss Cheese", + subtitle = "Small σ → holes preserved | Large σ → holes filled", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +**Tuning strategy:** + +1. Start with `raster_sigma = NULL` (auto = 3% of domain width) +2. Inspect the result; note whether real voids are detected +3. Decrease sigma if holes are being filled that you expect to be present +4. Increase sigma if the mask is too jagged or breaks into spurious fragments where there is actually continuous tissue +5. Adjust `raster_threshold` to expand or contract the mask after sigma is set + +--- + +## 7e — Nested crescent voids + +```{r sec7e} +mk_cres <- function(cx, cy, ro, ri, dx, dy, n = 100) { + moc <- .circle_mat(cx, cy, ro, n) + mic <- .circle_mat(cx+dx, cy+dy, ri, n) + oc <- sf::st_polygon(list(moc)) + ic <- sf::st_polygon(list(mic)) + sf::st_sfc(sf::st_difference(sf::st_sfc(oc), sf::st_sfc(ic))) +} + +th_od <- seq(0, 2 * pi, length.out = 200) +mat_od <- cbind(10 * cos(th_od), 10 * sin(th_od)) +mat_od <- rbind(mat_od, mat_od[1L, ]) +outer_d <- sf::st_sfc(sf::st_polygon(list(mat_od))) +cres1 <- mk_cres(-2, 2, 2.5, 2, 0.9, -0.2) +cres2 <- mk_cres( 3, -3, 2.2, 1.8, -0.7, 0.4) +nest_region <- sf::st_difference(outer_d, sf::st_union(c(cres1, cres2))) +coords_nested <- sample_in_polygon(nest_region, n = 2500) + +m7e_fine <- fit_spatial_mask(coords_nested, method = "raster", + raster_resolution = 256L, raster_sigma = 0.4, raster_threshold = 0.18, + verbose = FALSE) +m7e_coarse <- fit_spatial_mask(coords_nested, method = "raster", + raster_resolution = 256L, raster_sigma = 1.0, raster_threshold = 0.18, + verbose = FALSE) +``` + +```{r sec7e-plot, fig.height=4} +plot_mask(m7e_fine, coords_nested, + title = "7e-i. Fine σ = 0.4", + subtitle = "Crescent voids preserved") + +plot_mask(m7e_coarse, coords_nested, + title = "7e-ii. Coarse σ = 1.0", + subtitle = "Narrow voids filled in", + fill = "#8e44ad", border = "#6c3483") + +plot_annotation( + title = "Nested Crescent Voids: sigma controls void resolution", + subtitle = "The minimum detectable void width is approximately 2 × raster_sigma", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +> **Key principle:** The minimum resolvable void has a width of approximately `2 × raster_sigma`. Voids narrower than this will be filled in by the Gaussian blur. If you need to resolve fine voids (e.g. individual capillary lumens), you must use a small sigma — but this may also make the mask jagged elsewhere. + +--- + +## 7f — Scale test: 100,000 cells + +The raster method is `O(N²)` in the grid size but `O(n)` in the number of input points (binning is a single `tabulate()` call). This makes it practical for large datasets. + +```{r sec7f, cache=TRUE} +th100 <- seq(0, 2 * pi, length.out = 400) +r100 <- 12 + sin(5 * th100) + 0.4 * cos(11 * th100) +mat100 <- cbind(r100 * cos(th100), r100 * sin(th100)) +mat100 <- rbind(mat100, mat100[1L, ]) +outer100 <- sf::st_sfc(sf::st_polygon(list(mat100))) + +make_ellipse_poly <- function(cx, cy, a, b, angle_deg, n = 80) { + th <- seq(0, 2 * pi, length.out = n) + ang <- angle_deg * pi / 180 + xe <- a * cos(th); ye <- b * sin(th) + mat <- cbind(cx + xe * cos(ang) - ye * sin(ang), + cy + xe * sin(ang) + ye * cos(ang)) + mat <- rbind(mat, mat[1L, ]) + sf::st_polygon(list(mat)) +} + +holes100 <- sf::st_sfc(list( + make_ellipse_poly( 3, 5, 3, 1.5, 10), + make_ellipse_poly(-5, 2, 2.5, 2, -30), + make_ellipse_poly( 1, -6, 2, 3, 50) +)) +reg100 <- sf::st_difference(outer100, sf::st_union(holes100)) +coords_100k <- sample_in_polygon(reg100, n = 100000) + +t_raster <- system.time( + m_100k <- fit_spatial_mask(coords_100k, method = "raster", + raster_resolution = 256L, raster_sigma = 0.6, + raster_threshold = 0.18, verbose = TRUE) +) +``` + +```{r sec7f-plot, fig.height=5} +sub <- coords_100k[sample(nrow(coords_100k), 5000), ] +plot_mask(m_100k, sub, + title = sprintf("7f. 100k cells — raster (%.1f s)", t_raster["elapsed"]), + subtitle = "3 elliptic holes detected | 5,000 of 100,000 points shown", + pt_size = 0.3, pt_alpha = 0.2) +``` + +--- + +# Section 8 — Validation: point containment + +A properly fitted mask must contain all its input points. This table checks every mask produced in this vignette. Any FAIL indicates that the mask needs a larger `buffer_dist` or a lower `raster_threshold`. + +```{r sec8-validation} +cases <- list( + list(label = "Circle/convex", mask = m1a, coords = coords_circle), + list(label = "Circle/concave", mask = m1b, coords = coords_circle), + list(label = "Circle/kde", mask = m1c, coords = coords_circle), + list(label = "Elongated/convex", mask = m2a, coords = coords_elong), + list(label = "Elongated/concave", mask = m2b, coords = coords_elong), + list(label = "Elongated/kde", mask = m2c, coords = coords_elong), + list(label = "L-shape/convex", mask = m3a, coords = coords_L), + list(label = "L-shape/conc1.5", mask = m3b, coords = coords_L), + list(label = "L-shape/conc3", mask = m3c, coords = coords_L), + list(label = "Multi/convex", mask = m4a, coords = coords_multi), + list(label = "Multi/concave", mask = m4b, coords = coords_multi), + list(label = "Multi/kde-lo", mask = m4c, coords = coords_multi), + list(label = "Multi/kde-hi", mask = m4d, coords = coords_multi), + list(label = "Ring/raw", mask = m6a, coords = coords_ring), + list(label = "Ring/buffer", mask = m6b, coords = coords_ring), + list(label = "Ring/smooth", mask = m6c, coords = coords_ring), + list(label = "Donut/raster", mask = m7a_r, coords = coords_donut), + list(label = "Swiss/raster", mask = m7b_r, coords = coords_swiss), + list(label = "Archipelago/raster",mask = m7c_r, coords = coords_arch), + list(label = "Nested/fine", mask = m7e_fine, coords = coords_nested), + list(label = "Nested/coarse", mask = m7e_coarse, coords = coords_nested), + list(label = "100k/raster", mask = m_100k, coords = coords_100k) +) + +results <- do.call(rbind, lapply(cases, function(vc) { + pts <- sf::st_geometry(sf::st_as_sf(vc$coords, coords = c("x","y"))) + ok <- sf::st_within(pts, sf::st_union(vc$mask), sparse = FALSE)[, 1L] + n <- nrow(vc$coords) + data.frame( + Case = vc$label, + N = n, + Enclosed = sum(ok), + Pct = round(100 * sum(ok) / n, 2), + Result = ifelse(sum(ok) == n, "✓ PASS", "✗ FAIL"), + stringsAsFactors = FALSE + ) +})) + +results +``` + +--- + +# Quick-start decision guide + +``` +Your data + │ + ├─ Simple convex blob? ──────────────────────► method = "convex" + │ + ├─ Non-convex (C-shape, arc, L-shape)? + │ └─ No holes needed? ───────────────────► method = "concave" + │ concavity: start at 2, lower to tighten + │ + ├─ Holes or islands expected? + │ ├─ n < 50,000? ─────────────────────────► method = "raster" + │ └─ n > 50,000? ─────────────────────────► method = "raster" (still fine) + │ + └─ Dense, smooth, no need for holes? + └─ KDE is an alternative ───────────────► method = "kde" + kde_threshold: start at 0.05 + +For method = "raster": + raster_sigma = NULL (auto) for a first pass + raster_sigma ↓ to preserve fine voids + raster_sigma ↑ to fill gaps and smooth edges + raster_threshold = 0.15 is a reliable default +``` + +--- + +# Session information + +```{r session-info} +sessionInfo() +``` diff --git a/vignette_fit_spatial_mask.html b/vignette_fit_spatial_mask.html new file mode 100644 index 0000000..6e6e8f6 --- /dev/null +++ b/vignette_fit_spatial_mask.html @@ -0,0 +1,2799 @@ + + + + +
+ + + + + + + + + + +fit_spatial_mask():
+methods, parameters, and tuningIn spatial transcriptomics and spatial proteomics experiments, the +tissue of interest occupies some irregular region of a slide. Before +computing any spatially-resolved quantity (cell-type composition, +signaling gradients, niche structure), you need a reliable +spatial mask: a polygon or multi-polygon that +faithfully represents the shape of the tissue, including internal voids +(vessel lumens, necrotic cores, tissue tears) and disconnected +fragments.
+fit_spatial_mask() takes a set of XY point coordinates —
+typically cell centroids or transcript locations — and fits a mask
+polygon to them. It supports four methods of increasing complexity:
| Method | +Topology | +Speed | +Best for | +
|---|---|---|---|
"convex" |
+No holes, no islands | +Instant | +Quick sanity checks | +
"concave" |
+No holes, no islands | +Fast | +Simple non-convex shapes | +
"kde" |
+Holes and islands | +Moderate | +Dense, smooth datasets | +
"raster" |
+Holes and islands | +Fast at scale | +Most use cases; recommended default | +
Place fit_spatial_mask.R in your working directory, then
+source it. The packages below are required.
fit_spatial_mask()fit_spatial_mask(
+ coords,
+
+ # Method selection
+ method = "raster", # "convex" | "concave" | "kde" | "raster"
+
+ # Concave hull
+ concavity = 2, # lower = tighter; higher = approaches convex
+ length_threshold = 0,
+
+ # KDE
+ kde_bandwidth = NULL, # NULL = auto (bandwidth.nrd)
+ kde_threshold = 0.05, # quantile of density to threshold at
+ kde_resolution = 256L,
+
+ # Raster (recommended)
+ raster_resolution = 256L, # grid cells per axis
+ raster_sigma = NULL, # Gaussian spread in COORDINATE UNITS; NULL = 3% of domain
+ raster_threshold = 0.15, # fraction of peak Gaussian field to threshold at
+ raster_min_pts = 1L, # min points per cell to count as occupied
+
+ # Post-processing
+ buffer_dist = 0, # expand mask outward by this distance
+ smooth_mask = FALSE, # simplify boundary (st_simplify)
+ smooth_tolerance = NULL, # NULL = 1% of bounding box diagonal
+
+ # Misc
+ n_cores = 1L,
+ crs = NA,
+ plot = FALSE,
+ verbose = TRUE
+)fit_spatial_mask() returns an sfc geometry
+object (the sf package’s geometry column class). You
+can:
estimate_concentration_field() as
+the mask argumentgeom_sf()sf::st_area()sf::st_within()as.data.frame(sf::st_coordinates(mask))The simplest possible test: a Gaussian cloud with no special +topology. All three non-trivial methods should return similar results +here. This section establishes that the function runs and that the three +main methods agree on simple inputs.
+ +m1a <- fit_spatial_mask(coords_circle, method = "convex", verbose = FALSE)
+m1b <- fit_spatial_mask(coords_circle, method = "concave", concavity = 2,
+ verbose = FALSE)
+m1c <- fit_spatial_mask(coords_circle, method = "kde",
+ kde_threshold = 0.02, buffer_dist = 0.1,
+ verbose = FALSE)plot_mask(m1a, coords_circle, title = "1a. Convex hull",
+ subtitle = "method='convex'") +
+plot_mask(m1b, coords_circle, title = "1b. Concave hull",
+ subtitle = "concavity=2") +
+plot_mask(m1c, coords_circle, title = "1c. KDE contour",
+ subtitle = "kde_threshold=0.02, buffer=0.1",
+ fill = "#27ae60", border = "#1e8449") +
+plot_annotation(
+ title = "Section 1: Circular Cloud — Sanity Check",
+ theme = theme(plot.title = element_text(face = "bold", size = 13)))What to look for: All three masks should closely +envelope the point cloud. Small differences near the boundary are +expected — convex gives a polygon, KDE gives a smoother contour. Any +large discrepancy here indicates a parameter problem.
+Elongated tissue (e.g. a muscle section, a linear airway epithelium) +has very different extents along its two axes. All methods should handle +this naturally; this section confirms that no method is implicitly +assuming isotropy.
+coords_elong <- data.frame(x = rnorm(n, 0, 3),
+ y = rnorm(n, 0, 0.6))
+
+m2a <- fit_spatial_mask(coords_elong, method = "convex", verbose = FALSE)
+m2b <- fit_spatial_mask(coords_elong, method = "concave", verbose = FALSE)
+m2c <- fit_spatial_mask(coords_elong, method = "kde",
+ kde_threshold = 0.02, buffer_dist = 0.1,
+ verbose = FALSE)plot_mask(m2a, coords_elong, title = "2a. Convex") +
+plot_mask(m2b, coords_elong, title = "2b. Concave") +
+plot_mask(m2c, coords_elong, title = "2c. KDE",
+ fill = "#27ae60", border = "#1e8449") +
+plot_annotation(
+ title = "Section 2: Elongated Cloud",
+ theme = theme(plot.title = element_text(face = "bold", size = 13)))What to look for: The mask should track the long
+axis of the cloud, not default to a circle. The KDE bandwidth is
+computed separately for x and y (MASS::bandwidth.nrd), so
+it adapts to anisotropy automatically.
This is the most important basic test. The L-shape is the canonical +non-convex case: the convex hull incorrectly fills the concave corner of +the L, including empty space that contains no tissue. A good mask must +track the actual boundary and exclude the corner.
+++Rule of thumb: If your tissue has any re-entrant +angles (C-shapes, crescents, airways, arcs), do not use +
+method = "convex". Usemethod = "concave"or +method = "raster".
coords_L <- rbind(
+ data.frame(x = runif(200, 0, 5) + rnorm(200, 0, 0.05),
+ y = runif(200, 0, 1) + rnorm(200, 0, 0.05)), # horizontal bar
+ data.frame(x = runif(200, 0, 1) + rnorm(200, 0, 0.05),
+ y = runif(200, 0, 5) + rnorm(200, 0, 0.05))) # vertical barm3a <- fit_spatial_mask(coords_L, method = "convex", verbose = FALSE)
+m3b <- fit_spatial_mask(coords_L, method = "concave", concavity = 1.5, verbose = FALSE)
+m3c <- fit_spatial_mask(coords_L, method = "concave", concavity = 3.0, verbose = FALSE)plot_mask(m3a, coords_L,
+ title = "3a. Convex hull",
+ subtitle = "Incorrectly includes empty corner",
+ fill = "#e74c3c", border = "#922b21") +
+plot_mask(m3b, coords_L,
+ title = "3b. Concave, tight (1.5)",
+ subtitle = "Correctly excludes corner") +
+plot_mask(m3c, coords_L,
+ title = "3c. Concave, relaxed (3.0)",
+ subtitle = "Partially includes corner",
+ fill = "#8e44ad", border = "#6c3483") +
+plot_annotation(
+ title = "Section 3: L-Shaped Cloud — Non-Convex",
+ subtitle = "Lower concavity wraps more tightly; convex hull fails",
+ theme = theme(plot.title = element_text(face = "bold", size = 13),
+ plot.subtitle = element_text(size = 9, color = "grey40")))concavity parameterconcavity controls how aggressively the hull “cuts in”
+to follow the data boundary. It is the key tuning parameter for
+method = "concave".
| Value | +Behaviour | +
|---|---|
| 1.0–1.5 | +Very tight — tracks the data boundary closely | +
| 2.0 | +Default — good balance for most tissue shapes | +
| 3.0–5.0 | +Relaxed — approaches the convex hull | +
When to use lower values: C-shapes, crescents,
+U-shapes, any tissue with a notch or bay. When to use higher
+values: Dense blobs where you want a smooth, padded boundary.
+If you see the mask cutting through the interior of your tissue,
+increase concavity. If it fills in empty space at
+concavities, decrease it.
When tissue fragments are well-separated, a low
+kde_threshold will produce a single mask that bridges them
+(because the KDE density never drops to zero between clusters at low
+thresholds). A higher threshold separates them into distinct
+polygons.
The raster method’s raster_sigma plays the analogous
+role: a large sigma bridges clusters; a small sigma isolates them.
mk_cluster <- function(n, cx, cy, sd = 0.4)
+ data.frame(x = rnorm(n, cx, sd), y = rnorm(n, cy, sd))
+
+coords_multi <- rbind(mk_cluster(150, 0, 0),
+ mk_cluster(150, 4, 1),
+ mk_cluster(150, 2, 4))
+
+m4a <- fit_spatial_mask(coords_multi, method = "convex", verbose = FALSE)
+m4b <- fit_spatial_mask(coords_multi, method = "concave", concavity = 2,
+ verbose = FALSE)
+m4c <- fit_spatial_mask(coords_multi, method = "kde",
+ kde_threshold = 0.01, buffer_dist = 0.15,
+ verbose = FALSE)
+m4d <- fit_spatial_mask(coords_multi, method = "kde",
+ kde_threshold = 0.25, buffer_dist = 0.15,
+ verbose = FALSE)(plot_mask(m4a, coords_multi, title = "4a. Convex") +
+ plot_mask(m4b, coords_multi, title = "4b. Concave")) /
+(plot_mask(m4c, coords_multi,
+ title = "4c. KDE — inclusive",
+ subtitle = "threshold=0.01: merged",
+ fill = "#27ae60", border = "#1e8449") +
+ plot_mask(m4d, coords_multi,
+ title = "4d. KDE — tight",
+ subtitle = "threshold=0.25: separated",
+ fill = "#8e44ad", border = "#6c3483")) +
+plot_annotation(
+ title = "Section 4: Multi-Cluster",
+ theme = theme(plot.title = element_text(face = "bold", size = 13)))kde_thresholdkde_threshold is expressed as a quantile of the KDE
+density values across the grid.
Practical advice: Start at 0.05 and increase until +the mask matches your visual inspection. If you expect biologically +separated regions (e.g. two lobes), a threshold that separates them is +usually correct.
+A crescent is a severe test of the concave hull: it has a large, deep
+concavity spanning most of the shape. This sweep lets you calibrate the
+concavity parameter on a shape where the expected correct
+answer is obvious.
theta <- seq(0.2, pi - 0.2, length.out = 300)
+coords_cres <- data.frame(
+ x = cos(theta) * (1 + rnorm(300, 0, 0.08)) * 3,
+ y = sin(theta) * (1 + rnorm(300, 0, 0.08)) * 3 - cos(theta) * 1.5)
+
+pal <- c("#e74c3c", "#e67e22", "#27ae60", "#2980b9")
+sw <- mapply(function(cv, col) {
+ m <- fit_spatial_mask(coords_cres, method = "concave",
+ concavity = cv, verbose = FALSE)
+ plot_mask(m, coords_cres, title = paste0("concavity = ", cv),
+ fill = col, border = col)
+}, c(1.2, 2, 3, 5), pal, SIMPLIFY = FALSE)wrap_plots(sw, nrow = 1) +
+ plot_annotation(
+ title = "Section 5: Concavity Sweep — Crescent",
+ subtitle = "Lower = tighter | Higher = approaches convex hull",
+ theme = theme(plot.title = element_text(face = "bold", size = 13),
+ plot.subtitle = element_text(size = 9, color = "grey40")))What to look for: At concavity = 1.2,
+the hull cuts tightly around the arc. At concavity = 5, it
+is nearly convex and fills the interior. The “correct” value for real
+tissue is the one that matches a human drawing of the tissue boundary.
+For a crescent-shaped airway section, 1.2–1.5 is usually
+appropriate.
After fitting, two post-processing options are available:
+buffer_dist: expands the mask outward
+by this distance in coordinate units using sf::st_buffer().
+Useful when you want a small safety margin to ensure all cells fall
+inside the mask, or when points are sparse near the boundary.smooth_mask: applies
+sf::st_simplify() to reduce the vertex count and round
+jagged boundary segments. Controlled by smooth_tolerance
+(default: 1% of bounding box diagonal).r <- sqrt(runif(250, 0.4^2, 1^2))
+ang <- runif(250, 0, 2 * pi)
+coords_ring <- data.frame(x = r * cos(ang) * 5,
+ y = r * sin(ang) * 5)
+
+m6a <- fit_spatial_mask(coords_ring, method = "concave", concavity = 2,
+ buffer_dist = 0, smooth_mask = FALSE, verbose = FALSE)
+m6b <- fit_spatial_mask(coords_ring, method = "concave", concavity = 2,
+ buffer_dist = 0.3, smooth_mask = FALSE, verbose = FALSE)
+m6c <- fit_spatial_mask(coords_ring, method = "concave", concavity = 2,
+ buffer_dist = 0.3, smooth_mask = TRUE,
+ smooth_tolerance = 0.15, verbose = FALSE)plot_mask(m6a, coords_ring,
+ title = "6a. Raw — no buffer, no smooth") +
+plot_mask(m6b, coords_ring,
+ title = "6b. Buffer = 0.3",
+ fill = "#27ae60", border = "#1e8449") +
+plot_mask(m6c, coords_ring,
+ title = "6c. Buffer + smooth",
+ subtitle = "smooth_tolerance = 0.15",
+ fill = "#8e44ad", border = "#6c3483") +
+plot_annotation(
+ title = "Section 6: Buffer and Smoothing",
+ theme = theme(plot.title = element_text(face = "bold", size = 13)))When to use each:
+buffer_dist: use when downstream point-in-mask tests
+are failing for points you know should be inside (e.g. points right on
+the boundary). A value of 1–5% of the domain width is usually safe.smooth_mask: use when you want a visually clean
+boundary for publication figures, or when a jagged boundary is causing
+downstream geometric operations to be slow or numerically unstable. Be
+careful: large smooth_tolerance values can erase real
+tissue features.The "raster" method is the recommended approach for
+masks that need to represent holes (vessel lumens,
+necrotic cores, tissue tears) or disconnected islands.
+It is also the fastest method for large point clouds (100k+ cells).
raster_resolution × raster_resolution grid.raster_sigma (in coordinate
+units). This is equivalent to placing a Gaussian “blob” at every point
+and summing — the value at each cell represents how much point density
+is nearby.raster_threshold × max(field). Cells above threshold are
+“inside”; cells below are “outside”.sf::st_union(). GEOS handles the topology automatically —
+holes and islands emerge from the geometry without any ring-winding
+logic.| Parameter | +Effect | +
|---|---|
raster_sigma ↑ |
+Holes fill in, islands merge, boundary smooths | +
raster_sigma ↓ |
+Fine holes and gaps preserved | +
raster_threshold ↑ |
+Mask shrinks; requires denser local coverage | +
raster_threshold ↓ |
+Mask grows; accepts sparse regions | +
raster_resolution ↑ |
+Finer grid; more detail; slower union step | +
++Units note:
+raster_sigmais in +coordinate units (same as x and y). If your tissue +coordinates span 0–10, araster_sigmaof 0.25 means each +Gaussian blob has a standard deviation of 0.25 tissue units. +NULLsets sigma automatically to 3% of the domain +width.
r_d <- sqrt(runif(800, 2^2, 5^2))
+th_d <- runif(800, 0, 2 * pi)
+coords_donut <- data.frame(x = r_d * cos(th_d),
+ y = r_d * sin(th_d))
+
+m7a_r <- fit_spatial_mask(coords_donut, method = "raster",
+ raster_resolution = 256L, raster_sigma = 0.25, raster_threshold = 0.2,
+ verbose = FALSE)
+m7a_c <- fit_spatial_mask(coords_donut, method = "convex", verbose = FALSE)plot_mask(m7a_r, coords_donut,
+ title = "7a-i. Raster — hole detected ✓",
+ subtitle = "σ = 0.25 coord units, threshold = 0.2") +
+plot_mask(m7a_c, coords_donut,
+ title = "7a-ii. Convex — hole filled ✗",
+ subtitle = "Convex hull cannot represent holes",
+ fill = "#e74c3c", border = "#922b21") +
+plot_annotation(
+ title = "Donut: Single Interior Void",
+ theme = theme(plot.title = element_text(face = "bold", size = 13)))The raster method naturally detects the empty centre as a region of +low convolved density, below the threshold. The convex hull cannot ever +represent a hole — it is by definition the smallest convex polygon +containing all the points.
+.circle_mat <- function(cx, cy, r, n = 100) {
+ th <- seq(0, 2 * pi, length.out = n)
+ mat <- cbind(cx + r * cos(th), cy + r * sin(th))
+ rbind(mat, mat[1L, ])
+}
+
+hdefs <- list(
+ list(cx = 2.5, cy = 7.5, r = 1.1),
+ list(cx = 7.0, cy = 6.5, r = 1.4),
+ list(cx = 4.0, cy = 2.5, r = 0.9),
+ list(cx = 8.0, cy = 2.0, r = 1.2)
+)
+
+sample_in_polygon <- function(poly_sf, n, max_tries = 20) {
+ bb <- sf::st_bbox(poly_sf); mu <- sf::st_union(poly_sf)
+ pts <- data.frame(x = numeric(0), y = numeric(0))
+ for (i in seq_len(max_tries)) {
+ if (nrow(pts) >= n) break
+ cnd <- data.frame(x = runif(n * 8, bb["xmin"], bb["xmax"]),
+ y = runif(n * 8, bb["ymin"], bb["ymax"]))
+ ok <- as.logical(sf::st_within(
+ sf::st_as_sf(cnd, coords = c("x","y")), mu, sparse = FALSE)[, 1L])
+ pts <- rbind(pts, cnd[ok, ])
+ }
+ pts[seq_len(min(n, nrow(pts))), ]
+}
+
+outer_sq <- sf::st_sfc(sf::st_polygon(list(
+ rbind(c(0,0), c(10,0), c(10,10), c(0,10), c(0,0)))))
+hcircles <- lapply(hdefs, function(h)
+ sf::st_polygon(list(.circle_mat(h$cx, h$cy, h$r))))
+swiss_region <- sf::st_difference(outer_sq,
+ sf::st_sfc(sf::st_union(sf::st_sfc(hcircles))))
+coords_swiss <- sample_in_polygon(swiss_region, n = 1500)
+
+m7b_r <- fit_spatial_mask(coords_swiss, method = "raster",
+ raster_resolution = 256L, raster_sigma = 0.25, raster_threshold = 0.2,
+ verbose = FALSE)
+m7b_c <- fit_spatial_mask(coords_swiss, method = "concave",
+ concavity = 1.8, verbose = FALSE)plot_mask(m7b_r, coords_swiss,
+ title = "7b-i. Raster — all 4 holes detected ✓",
+ subtitle = "σ = 0.25, threshold = 0.2") +
+plot_mask(m7b_c, coords_swiss,
+ title = "7b-ii. Concave — holes filled ✗",
+ subtitle = "Concave hull also cannot represent holes",
+ fill = "#e74c3c", border = "#922b21") +
+plot_annotation(
+ title = "Swiss Cheese: Multiple Interior Holes",
+ theme = theme(plot.title = element_text(face = "bold", size = 13)))mk_isl <- function(n, cx, cy, rx, ry, ns = 0.15) {
+ th <- runif(n, 0, 2 * pi); r <- sqrt(runif(n))
+ data.frame(x = cx + r * rx * cos(th) + rnorm(n, 0, ns),
+ y = cy + r * ry * sin(th) + rnorm(n, 0, ns))
+}
+coords_arch <- rbind(mk_isl(300, 0, 0, 3, 2),
+ mk_isl(300, 9, 4, 2, 3),
+ mk_isl(300, 5, -7, 2.5, 1.5))
+
+m7c_r <- fit_spatial_mask(coords_arch, method = "raster",
+ raster_resolution = 256L, raster_sigma = 0.4, raster_threshold = 0.18,
+ verbose = FALSE)
+m7c_c <- fit_spatial_mask(coords_arch, method = "convex", verbose = FALSE)
+
+n_geom <- length(sf::st_cast(m7c_r, "POLYGON"))plot_mask(m7c_r, coords_arch,
+ title = sprintf("7c-i. Raster — %d separate polygons ✓", n_geom),
+ subtitle = "Each island detected independently") +
+plot_mask(m7c_c, coords_arch,
+ title = "7c-ii. Convex — islands bridged ✗",
+ subtitle = "Convex hull merges all islands into one",
+ fill = "#e74c3c", border = "#922b21") +
+plot_annotation(
+ title = "Archipelago: Disconnected Islands",
+ theme = theme(plot.title = element_text(face = "bold", size = 13)))Disconnected islands emerge automatically from the GEOS union step:
+if two groups of “on” cells are not touching, they become separate
+polygons within the same sfc object. The number of distinct
+polygons is accessible via
+length(sf::st_cast(mask, "POLYGON")).
This is the most important tuning exercise. Run it on your own data
+before finalising raster_sigma.
sig_vals <- c(0.1, 0.2, 0.4, 0.7)
+sig_cols <- c("#e74c3c", "#e67e22", "#27ae60", "#2980b9")
+
+sig_pls <- mapply(function(sv, col) {
+ m <- fit_spatial_mask(coords_swiss, method = "raster",
+ raster_resolution = 256L, raster_sigma = sv,
+ raster_threshold = 0.2, verbose = FALSE)
+ nh <- max(0L, length(sf::st_cast(m, "POLYGON")) - 1L)
+ plot_mask(m, coords_swiss,
+ title = paste0("σ = ", sv, " coord units"),
+ subtitle = paste0(nh, " hole(s) detected"),
+ fill = col, border = col, pt_size = 0.5)
+}, sig_vals, sig_cols, SIMPLIFY = FALSE)wrap_plots(sig_pls, nrow = 1) +
+ plot_annotation(
+ title = "7d: raster_sigma Sensitivity — Swiss Cheese",
+ subtitle = "Small σ → holes preserved | Large σ → holes filled",
+ theme = theme(plot.title = element_text(face = "bold", size = 13),
+ plot.subtitle = element_text(size = 9, color = "grey40")))Tuning strategy:
+raster_sigma = NULL (auto = 3% of domain
+width)raster_threshold to expand or contract the mask
+after sigma is setmk_cres <- function(cx, cy, ro, ri, dx, dy, n = 100) {
+ moc <- .circle_mat(cx, cy, ro, n)
+ mic <- .circle_mat(cx+dx, cy+dy, ri, n)
+ oc <- sf::st_polygon(list(moc))
+ ic <- sf::st_polygon(list(mic))
+ sf::st_sfc(sf::st_difference(sf::st_sfc(oc), sf::st_sfc(ic)))
+}
+
+th_od <- seq(0, 2 * pi, length.out = 200)
+mat_od <- cbind(10 * cos(th_od), 10 * sin(th_od))
+mat_od <- rbind(mat_od, mat_od[1L, ])
+outer_d <- sf::st_sfc(sf::st_polygon(list(mat_od)))
+cres1 <- mk_cres(-2, 2, 2.5, 2, 0.9, -0.2)
+cres2 <- mk_cres( 3, -3, 2.2, 1.8, -0.7, 0.4)
+nest_region <- sf::st_difference(outer_d, sf::st_union(c(cres1, cres2)))
+coords_nested <- sample_in_polygon(nest_region, n = 2500)
+
+m7e_fine <- fit_spatial_mask(coords_nested, method = "raster",
+ raster_resolution = 256L, raster_sigma = 0.4, raster_threshold = 0.18,
+ verbose = FALSE)
+m7e_coarse <- fit_spatial_mask(coords_nested, method = "raster",
+ raster_resolution = 256L, raster_sigma = 1.0, raster_threshold = 0.18,
+ verbose = FALSE)plot_mask(m7e_fine, coords_nested,
+ title = "7e-i. Fine σ = 0.4",
+ subtitle = "Crescent voids preserved") +
+plot_mask(m7e_coarse, coords_nested,
+ title = "7e-ii. Coarse σ = 1.0",
+ subtitle = "Narrow voids filled in",
+ fill = "#8e44ad", border = "#6c3483") +
+plot_annotation(
+ title = "Nested Crescent Voids: sigma controls void resolution",
+ subtitle = "The minimum detectable void width is approximately 2 × raster_sigma",
+ theme = theme(plot.title = element_text(face = "bold", size = 13),
+ plot.subtitle = element_text(size = 9, color = "grey40")))++Key principle: The minimum resolvable void has a +width of approximately
+2 × raster_sigma. Voids narrower +than this will be filled in by the Gaussian blur. If you need to resolve +fine voids (e.g. individual capillary lumens), you must use a small +sigma — but this may also make the mask jagged elsewhere.
The raster method is O(N²) in the grid size but
+O(n) in the number of input points (binning is a single
+tabulate() call). This makes it practical for large
+datasets.
th100 <- seq(0, 2 * pi, length.out = 400)
+r100 <- 12 + sin(5 * th100) + 0.4 * cos(11 * th100)
+mat100 <- cbind(r100 * cos(th100), r100 * sin(th100))
+mat100 <- rbind(mat100, mat100[1L, ])
+outer100 <- sf::st_sfc(sf::st_polygon(list(mat100)))
+
+make_ellipse_poly <- function(cx, cy, a, b, angle_deg, n = 80) {
+ th <- seq(0, 2 * pi, length.out = n)
+ ang <- angle_deg * pi / 180
+ xe <- a * cos(th); ye <- b * sin(th)
+ mat <- cbind(cx + xe * cos(ang) - ye * sin(ang),
+ cy + xe * sin(ang) + ye * cos(ang))
+ mat <- rbind(mat, mat[1L, ])
+ sf::st_polygon(list(mat))
+}
+
+holes100 <- sf::st_sfc(list(
+ make_ellipse_poly( 3, 5, 3, 1.5, 10),
+ make_ellipse_poly(-5, 2, 2.5, 2, -30),
+ make_ellipse_poly( 1, -6, 2, 3, 50)
+))
+reg100 <- sf::st_difference(outer100, sf::st_union(holes100))
+coords_100k <- sample_in_polygon(reg100, n = 100000)
+
+t_raster <- system.time(
+ m_100k <- fit_spatial_mask(coords_100k, method = "raster",
+ raster_resolution = 256L, raster_sigma = 0.6,
+ raster_threshold = 0.18, verbose = TRUE)
+)## --------------------------------------------------
+## Spatial mask fitted (v3)
+## Method : raster
+## Points : 100000
+## Sub-geometries : 1
+## Bounding box : x [ -12.93 , 12.629 ] y [ -11.827 , 13.227 ]
+## Area : 523.5807
+## Buffer applied : 0
+## --------------------------------------------------
+sub <- coords_100k[sample(nrow(coords_100k), 5000), ]
+plot_mask(m_100k, sub,
+ title = sprintf("7f. 100k cells — raster (%.1f s)", t_raster["elapsed"]),
+ subtitle = "3 elliptic holes detected | 5,000 of 100,000 points shown",
+ pt_size = 0.3, pt_alpha = 0.2)A properly fitted mask must contain all its input points. This table
+checks every mask produced in this vignette. Any FAIL indicates that the
+mask needs a larger buffer_dist or a lower
+raster_threshold.
cases <- list(
+ list(label = "Circle/convex", mask = m1a, coords = coords_circle),
+ list(label = "Circle/concave", mask = m1b, coords = coords_circle),
+ list(label = "Circle/kde", mask = m1c, coords = coords_circle),
+ list(label = "Elongated/convex", mask = m2a, coords = coords_elong),
+ list(label = "Elongated/concave", mask = m2b, coords = coords_elong),
+ list(label = "Elongated/kde", mask = m2c, coords = coords_elong),
+ list(label = "L-shape/convex", mask = m3a, coords = coords_L),
+ list(label = "L-shape/conc1.5", mask = m3b, coords = coords_L),
+ list(label = "L-shape/conc3", mask = m3c, coords = coords_L),
+ list(label = "Multi/convex", mask = m4a, coords = coords_multi),
+ list(label = "Multi/concave", mask = m4b, coords = coords_multi),
+ list(label = "Multi/kde-lo", mask = m4c, coords = coords_multi),
+ list(label = "Multi/kde-hi", mask = m4d, coords = coords_multi),
+ list(label = "Ring/raw", mask = m6a, coords = coords_ring),
+ list(label = "Ring/buffer", mask = m6b, coords = coords_ring),
+ list(label = "Ring/smooth", mask = m6c, coords = coords_ring),
+ list(label = "Donut/raster", mask = m7a_r, coords = coords_donut),
+ list(label = "Swiss/raster", mask = m7b_r, coords = coords_swiss),
+ list(label = "Archipelago/raster",mask = m7c_r, coords = coords_arch),
+ list(label = "Nested/fine", mask = m7e_fine, coords = coords_nested),
+ list(label = "Nested/coarse", mask = m7e_coarse, coords = coords_nested),
+ list(label = "100k/raster", mask = m_100k, coords = coords_100k)
+)
+
+results <- do.call(rbind, lapply(cases, function(vc) {
+ pts <- sf::st_geometry(sf::st_as_sf(vc$coords, coords = c("x","y")))
+ ok <- sf::st_within(pts, sf::st_union(vc$mask), sparse = FALSE)[, 1L]
+ n <- nrow(vc$coords)
+ data.frame(
+ Case = vc$label,
+ N = n,
+ Enclosed = sum(ok),
+ Pct = round(100 * sum(ok) / n, 2),
+ Result = ifelse(sum(ok) == n, "✓ PASS", "✗ FAIL"),
+ stringsAsFactors = FALSE
+ )
+}))
+
+results| Case | +N | +Enclosed | +Pct | +Result | +
|---|---|---|---|---|
| Circle/convex | +400 | +392 | +98.00 | +✗ FAIL | +
| Circle/concave | +400 | +400 | +100.00 | +✓ PASS | +
| Circle/kde | +400 | +400 | +100.00 | +✓ PASS | +
| Elongated/convex | +400 | +390 | +97.50 | +✗ FAIL | +
| Elongated/concave | +400 | +400 | +100.00 | +✓ PASS | +
| Elongated/kde | +400 | +400 | +100.00 | +✓ PASS | +
| L-shape/convex | +400 | +387 | +96.75 | +✗ FAIL | +
| L-shape/conc1.5 | +400 | +400 | +100.00 | +✓ PASS | +
| L-shape/conc3 | +400 | +400 | +100.00 | +✓ PASS | +
| Multi/convex | +450 | +439 | +97.56 | +✗ FAIL | +
| Multi/concave | +450 | +450 | +100.00 | +✓ PASS | +
| Multi/kde-lo | +450 | +450 | +100.00 | +✓ PASS | +
| Multi/kde-hi | +450 | +450 | +100.00 | +✓ PASS | +
| Ring/raw | +250 | +250 | +100.00 | +✓ PASS | +
| Ring/buffer | +250 | +250 | +100.00 | +✓ PASS | +
| Ring/smooth | +250 | +250 | +100.00 | +✓ PASS | +
| Donut/raster | +800 | +800 | +100.00 | +✓ PASS | +
| Swiss/raster | +1500 | +1500 | +100.00 | +✓ PASS | +
| Archipelago/raster | +900 | +900 | +100.00 | +✓ PASS | +
| Nested/fine | +2500 | +2500 | +100.00 | +✓ PASS | +
| Nested/coarse | +2500 | +2500 | +100.00 | +✓ PASS | +
| 100k/raster | +100000 | +100000 | +100.00 | +✓ PASS | +
Your data
+ │
+ ├─ Simple convex blob? ──────────────────────► method = "convex"
+ │
+ ├─ Non-convex (C-shape, arc, L-shape)?
+ │ └─ No holes needed? ───────────────────► method = "concave"
+ │ concavity: start at 2, lower to tighten
+ │
+ ├─ Holes or islands expected?
+ │ ├─ n < 50,000? ─────────────────────────► method = "raster"
+ │ └─ n > 50,000? ─────────────────────────► method = "raster" (still fine)
+ │
+ └─ Dense, smooth, no need for holes?
+ └─ KDE is an alternative ───────────────► method = "kde"
+ kde_threshold: start at 0.05
+
+For method = "raster":
+ raster_sigma = NULL (auto) for a first pass
+ raster_sigma ↓ to preserve fine voids
+ raster_sigma ↑ to fill gaps and smooth edges
+ raster_threshold = 0.15 is a reliable default
+## R version 4.5.2 (2025-10-31)
+## Platform: aarch64-apple-darwin20
+## Running under: macOS Sonoma 14.6.1
+##
+## Matrix products: default
+## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
+## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
+##
+## locale:
+## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+##
+## time zone: America/New_York
+## tzcode source: internal
+##
+## attached base packages:
+## [1] stats graphics grDevices utils datasets methods base
+##
+## other attached packages:
+## [1] patchwork_1.3.2 ggplot2_4.0.1 sf_1.0-21
+##
+## loaded via a namespace (and not attached):
+## [1] sass_0.4.10 generics_0.1.4 class_7.3-23 KernSmooth_2.23-26
+## [5] digest_0.6.39 magrittr_2.0.4 evaluate_1.0.5 grid_4.5.2
+## [9] RColorBrewer_1.1-3 fastmap_1.2.0 jsonlite_2.0.0 e1071_1.7-16
+## [13] DBI_1.2.3 scales_1.4.0 codetools_0.2-20 isoband_0.3.0
+## [17] jquerylib_0.1.4 cli_3.6.5 rlang_1.1.7 units_0.8-7
+## [21] withr_3.0.2 cachem_1.1.0 yaml_2.3.12 otel_0.2.0
+## [25] tools_4.5.2 concaveman_1.2.0 dplyr_1.1.4 curl_7.0.0
+## [29] vctrs_0.7.1 R6_2.6.1 proxy_0.4-27 lifecycle_1.0.5
+## [33] classInt_0.4-11 V8_8.0.1 MASS_7.3-65 pkgconfig_2.0.3
+## [37] pillar_1.11.1 bslib_0.10.0 gtable_0.3.6 glue_1.8.0
+## [41] Rcpp_1.1.1 xfun_0.56 tibble_3.3.1 tidyselect_1.2.1
+## [45] rstudioapi_0.17.1 knitr_1.51 dichromat_2.0-0.1 farver_2.1.2
+## [49] htmltools_0.5.9 rmarkdown_2.30 compiler_4.5.2 S7_0.2.1
+7)ldj|L1Rmce8@ORt+b#YW%$b(aY!q<(S!{
z|2~GumpMN}cOm8;{Bfg$+y9Kw-cP%qT>RH9BxAo*#GPG=(&~kMm^KU%U@ZG;@JG}L
zW+#~xdx0QN*`8|U?^B*k{toc5oq-_iR_#BLn6Q=h@WjDmzh3)KchUa@BUZFUMg4PG
z|2DJ#*GsNQEQaJb|F{6*|2Q%KG8f!7BL|ZlWGLPB_euKO=l}jPRtnf_lx*$U|I>*7
z?I(k%fz!>_5=!!~+55N8|LrHADP0F<5=Lv5{}@*PHix#r5yl(L%={l>=ih(Fj1de{
z=6lcgKX04=$Gcp?V9{HPVaET}AZ2L4AhWBg@BOFM`@2H%t`*ZsJgF%AvzP1dmtw*v
z8#72kUg8h`*CPLYE`E>o|8HXmiDGrkp#97KK=4GCfD@sudC_Lc?wb+cq$Hfp?m91urVcu8^^<-;1
zMfLGp#Fszh28;!L5ga@^tTK_X_0xbl(+s~tc=w$Pz$*K=Dp`GYa3dX*rBptR-;v63
zij5wS`<)K<)4fHsl;CKmh;rnF&J`jr^1PS9-)wN%FjZLsesZ`Ke)p-#z|^glOslat
zS8G5(bf1(GVQq14DPu5};y&1K-u4$pR;LAgsLSiMb9-{9fi|?9>#e-1=3#=p2V>yH
zQa(eE!bH4DPWP*SSS!I$uBYIYgk^^pH~C&~2(;FUcoABO#!%4fp#>)zd#;=bi+J9fFM{MfE;)F|-y8+~mz_g?
z1!;)x?wzF6 -<81
ztN@o5*Uy1P2^O{f2fnaqFa=#h*OY`MBY(hamKv7tMx_@$a64H!ewc{JZoBWy`=^sR^nzgcuxwI!K(<7wB#P8*yU!Y=YeXeVLiL
z!K2OHfB;XszfC-ux3y5AiujJ08t{BPb`bR5O7H7Db