diff --git a/spatial-modeling-with-claude/application-to-xenium-msbr-2026-04-03.R b/spatial-modeling-with-claude/application-to-xenium-msbr-2026-04-03.R new file mode 100644 index 0000000..c6b6ed5 --- /dev/null +++ b/spatial-modeling-with-claude/application-to-xenium-msbr-2026-04-03.R @@ -0,0 +1,200 @@ +# ---- set wd ---- +setwd("~/Desktop/spatial-modeling-with-claude") + +# ---- set up ---- +require(Seurat) +require(SeuratObject) +library(sf) +library(ggplot2) +library(patchwork) +source("fit_spatial_mask.R") +source("estimate_concentration_field.R") +set.seed(2024) + +# ---- shared theme ---- +theme_demo <- function(bs=9.5) + theme_minimal(base_size=bs) + + theme(plot.title=element_text(face="bold",size=bs+0.5), + plot.subtitle=element_text(size=bs-1,color="grey40",margin=margin(b=4)), + panel.grid=element_line(color="grey94"), + legend.key.width=unit(0.35,"cm"), + legend.title=element_text(size=bs-1), + legend.text=element_text(size=bs-1.5), + axis.text=element_text(size=bs-2)) + +plot_field <- function(result, transcripts=NULL, title="", subtitle="", + palette="magma", log_scale=FALSE, + show_pts=TRUE, show_contours=TRUE, n_contours=6L, + pt_size=0.35, pt_alpha=0.45, pt_color="#00e5ff", + fill_label="Conc.", symmetric=FALSE) { + df <- field_to_df(result, inside_only=TRUE) + fill_col <- if (log_scale) { df$fv <- log1p(df$field); "fv" } else "field" + fill_lbl <- if (log_scale) paste0("log\u2081\u208a(",fill_label,")") else fill_label + p <- ggplot(df, aes(x=x,y=y,fill=.data[[fill_col]])) + + geom_raster(interpolate=TRUE) + coord_equal() + + labs(title=title, subtitle=subtitle, x="X", y="Y") + theme_demo() + if (symmetric) { + lim <- max(abs(df[[fill_col]]),na.rm=TRUE) + p <- p + scale_fill_distiller(palette="RdBu",limits=c(-lim,lim), + name=fill_lbl,na.value="transparent") + } else { + p <- p + scale_fill_viridis_c(option=palette,name=fill_lbl,na.value="transparent") + } + if (show_contours && any(!is.na(df$field))) + p <- p + geom_contour(aes(z=field),color="white", + alpha=0.35,linewidth=0.25,bins=n_contours) + if (show_pts && !is.null(transcripts)) + p <- p + geom_point(data=transcripts,aes(x=x,y=y),inherit.aes=FALSE, + color=pt_color,size=pt_size,alpha=pt_alpha) + p +} + +sample_in_mask <- function(mask_poly, n, max_tries=20) { + bb <- sf::st_bbox(mask_poly); mu <- sf::st_union(mask_poly) + pts <- data.frame(x=numeric(0),y=numeric(0)) + for (i in seq_len(max_tries)) { + if (nrow(pts)>=n) break + cnd <- data.frame(x=runif(n*8,bb["xmin"],bb["xmax"]), + y=runif(n*8,bb["ymin"],bb["ymax"])) + ok <- as.logical(sf::st_within( + sf::st_as_sf(cnd,coords=c("x","y")),mu,sparse=FALSE)[,1L]) + pts <- rbind(pts,cnd[ok,]) + } + pts[seq_len(min(n,nrow(pts))),] +} + +# ---- Shared helpers ---------------------------------------------------------- + +plot_mask <- function(mask, coords, title = "", subtitle = "", + fill = "#4c9be8", border = "#1a5fa8", + pt_col = "#c0392b", pt_size = 0.9, pt_alpha = 0.5) { + # geom_sf is required for correct hole rendering. + # geom_polygon draws a line between consecutive rings in the same group, + # producing diagonal artefacts across hole interiors. + mask_sf <- sf::st_sf(geometry = mask) + ggplot() + + geom_sf(data = mask_sf, + fill = fill, alpha = 0.2, color = border, linewidth = 0.55) + + geom_point(data = coords, aes(x = x, y = y), + color = pt_col, size = pt_size, alpha = pt_alpha) + + coord_sf(datum = NA) + # datum=NA: suppress geographic graticule + labs(title = title, subtitle = subtitle, x = "X", y = "Y") + + theme_minimal(base_size = 9.5) + + theme(plot.title = element_text(face = "bold", size = 9.5), + plot.subtitle = element_text(size = 8, color = "grey40"), + panel.grid = element_line(color = "grey93")) +} + +# ---- apply to real world data ---- +# load xenium object +load("~/Large Files/Pneumonectomy_Project_transfer/MSBR_polishing_2025-11-01/outputs/PPLR.polished.with.territory.and.neighborhood.2025-11-17.Robj") + +# subset out just one TMA puck +PPLR.polished <- UpdateSeuratObject(PPLR.polished) +Idents(PPLR.polished) <- PPLR.polished$TMA +table(Idents(PPLR.polished)) +temp <- subset(PPLR.polished, + idents = 'PNX7_M1_Sp') +temp + +# get centroid xy coordinates +tissue_pts <- data.frame(x = temp$x, + y = temp$y) +# make tissue mask +tissue_mask <- fit_spatial_mask(tissue_pts, + method="raster", + raster_resolution=256L, + raster_sigma=1, + raster_threshold=0.75, + verbose=TRUE, + buffer_dist = 3, + smooth_mask = T, + smooth_tolerance = 3) +# plot mask +plot_mask(tissue_mask, + tissue_pts, + title = "PNX7_M1_Sp", + subtitle = "raster_resolution = 300, raster_sigma = 1, raster_threshold = 0.75, smooth = T, smooth_tolerance = 3, buffer = 3") + +# get transcript xy coordinates +molecules <- temp[['PNX7_M1_Sp']][['molecules']] +tc_Areg <- molecules$Areg@coords +tc_Vegfa <- molecules$Vegfa@coords + +# inspect +tissue_sf <- sf::st_sf(geometry = tissue_mask) +print(ggplot() + + geom_sf(data=tissue_sf, fill="grey92", color="grey60", linewidth=0.4) + + geom_point(data=tc_Areg,aes(x=x,y=y,color="Areg"),alpha=0.6,size=0.1) + + geom_point(data=tc_Vegfa,aes(x=x,y=y,color="Vegfa"),alpha=0.5,size=0.1) + + scale_color_manual(values=c("Areg"="#e74c3c","Vegfa"="#2980b9"),name=NULL) + + #scale_size_continuous(range=c(0.3,2.5),name="log(UMI+1)") + + coord_sf(datum=NA) + theme_demo() + + labs(title="PNX7_M1_Sp", + subtitle="Areg | Vegfa",x="X",y="Y")) + +print(ggplot() + + geom_sf(data=tissue_sf, fill="grey92", color="grey60", linewidth=0.4) + + geom_point(data=tc_Areg,aes(x=x,y=y,color="Areg"),alpha=0.6,size=0.1) + + geom_point(data=tc_Vegfa,aes(x=x,y=y,color="Vegfa"),alpha=0.5,size=0.1) + + scale_color_manual(values=c("Areg"="#e74c3c","Vegfa"="#2980b9"),name=NULL) + + #scale_size_continuous(range=c(0.3,2.5),name="log(UMI+1)") + + coord_sf(datum=NA) + theme_demo() + + labs(title="PNX7_M1_Sp", + subtitle="Areg | Vegfa",x="X",y="Y"))+xlim(6000,6500)+ylim(11500,12000) + +# ---- Section 2: Method comparison ---- + +cat("Section 2: Method comparison...\n") +pb <- list(D=1, + diffusion_length = 10, + lambda=0.3, + production_rate=1, + grid_resolution=256L, + verbose=FALSE) + +res_fd <- do.call(estimate_concentration_field, + c(list(mask=tissue_mask, + transcript_coords=tc_Vegfa, + method="fd", + fd_solver="direct", + boundary_condition="dirichlet"),pb)) +res_green <- do.call(estimate_concentration_field, + c(list(mask=tissue_mask, + transcript_coords=tc_Vegfa, + method="green"),pb)) +res_kde <- do.call(estimate_concentration_field, + c(list(mask=tissue_mask, + transcript_coords=tc_Vegfa, + method="kde", + kde_bandwidth=sqrt(1/0.3)),pb)) +res_diff <- res_fd; +res_diff$field <- res_fd$field - res_green$field + +print((plot_field(res_fd,tc_Vegfa,title="2a. fd (Dirichlet)", + subtitle="Exact BCs; recommended", + show_pts = F)+ + xlim(6000,6500)+ + ylim(11500,12000) + + plot_field(res_green,tc_Vegfa,title="2b. green (FFT)", + subtitle="Infinite domain; no BCs", + show_pts = F)+ + xlim(6000,6500)+ + ylim(11500,12000)) / + (plot_field(res_kde,tc_Vegfa,title="2c. kde", + subtitle="Bandwidth=L; phenomenological", + show_pts = F)+ + xlim(6000,6500)+ + ylim(11500,12000) + + plot_field(res_diff,title="2d. fd \u2212 green", + subtitle="BCs suppress near-edge conc.", + symmetric=TRUE, + show_contours=FALSE, + show_pts=FALSE) + + xlim(6000,6500)+ + ylim(11500,12000) + + plot_annotation(title="Section 2: Method Comparison (Gene A, D=1, \u03bb=0.3)", + theme=theme(plot.title=element_text(face="bold",size=13))))) + + + diff --git a/spatial-modeling-with-claude/demo_concentration_field.R b/spatial-modeling-with-claude/demo_concentration_field.R new file mode 100644 index 0000000..f332d92 --- /dev/null +++ b/spatial-modeling-with-claude/demo_concentration_field.R @@ -0,0 +1,486 @@ +# ============================================================ +# demo_concentration_field.R +# +# Comprehensive demonstration of estimate_concentration_field(). +# +# Run after sourcing: +# source("fit_spatial_mask.R") +# source("estimate_concentration_field.R") +# +# Required packages: +# install.packages(c("sf","concaveman","MASS","isoband", +# "Matrix","ggplot2","patchwork")) +# +# Sections: +# 1 — Synthetic tissue + raw transcript layout +# 2 — Method comparison: fd / green / kde +# 3 — Diffusion length L sweep (most important parameter) +# 4 — Fixed L, varying D and lambda +# 5 — Dirichlet vs. Neumann boundary conditions +# 6 — UMI weighting vs. equal weighting +# 7 — External transcripts (include_external) +# 8 — Holey mask: vascular clearance topology +# 9 — Wide dynamic range + log1p visualisation +# 10 — Two-gene log2(A/B) ratio map +# 11 — 100k transcripts + solver timing comparison +# 12 — Quantitative field extraction +# ============================================================ + +library(sf) +library(ggplot2) +library(patchwork) +source("fit_spatial_mask.R") +source("estimate_concentration_field.R") +set.seed(2024) + +# ---- shared theme ---- +theme_demo <- function(bs=9.5) + theme_minimal(base_size=bs) + + theme(plot.title=element_text(face="bold",size=bs+0.5), + plot.subtitle=element_text(size=bs-1,color="grey40",margin=margin(b=4)), + panel.grid=element_line(color="grey94"), + legend.key.width=unit(0.35,"cm"), + legend.title=element_text(size=bs-1), + legend.text=element_text(size=bs-1.5), + axis.text=element_text(size=bs-2)) + +plot_field <- function(result, transcripts=NULL, title="", subtitle="", + palette="magma", log_scale=FALSE, + show_pts=TRUE, show_contours=TRUE, n_contours=6L, + pt_size=0.35, pt_alpha=0.45, pt_color="#00e5ff", + fill_label="Conc.", symmetric=FALSE) { + df <- field_to_df(result, inside_only=TRUE) + fill_col <- if (log_scale) { df$fv <- log1p(df$field); "fv" } else "field" + fill_lbl <- if (log_scale) paste0("log\u2081\u208a(",fill_label,")") else fill_label + p <- ggplot(df, aes(x=x,y=y,fill=.data[[fill_col]])) + + geom_raster(interpolate=TRUE) + coord_equal() + + labs(title=title, subtitle=subtitle, x="X", y="Y") + theme_demo() + if (symmetric) { + lim <- max(abs(df[[fill_col]]),na.rm=TRUE) + p <- p + scale_fill_distiller(palette="RdBu",limits=c(-lim,lim), + name=fill_lbl,na.value="transparent") + } else { + p <- p + scale_fill_viridis_c(option=palette,name=fill_lbl,na.value="transparent") + } + if (show_contours && any(!is.na(df$field))) + p <- p + geom_contour(aes(z=field),color="white", + alpha=0.35,linewidth=0.25,bins=n_contours) + if (show_pts && !is.null(transcripts)) + p <- p + geom_point(data=transcripts,aes(x=x,y=y),inherit.aes=FALSE, + color=pt_color,size=pt_size,alpha=pt_alpha) + p +} + +sample_in_mask <- function(mask_poly, n, max_tries=20) { + bb <- sf::st_bbox(mask_poly); mu <- sf::st_union(mask_poly) + pts <- data.frame(x=numeric(0),y=numeric(0)) + for (i in seq_len(max_tries)) { + if (nrow(pts)>=n) break + cnd <- data.frame(x=runif(n*8,bb["xmin"],bb["xmax"]), + y=runif(n*8,bb["ymin"],bb["ymax"])) + ok <- as.logical(sf::st_within( + sf::st_as_sf(cnd,coords=c("x","y")),mu,sparse=FALSE)[,1L]) + pts <- rbind(pts,cnd[ok,]) + } + pts[seq_len(min(n,nrow(pts))),] +} + + +# ============================================================ +# SECTION 1 — Synthetic tissue +# ============================================================ +cat("Section 1: Building tissue...\n") + +in_kidney <- function(x,y,oa=9,ob=7,ncx=5,nr=3.8) + (x/oa)^2+(y/ob)^2<1 & sqrt((x-ncx)^2+y^2)>=nr + +cands <- data.frame(x=runif(18000,-11,11),y=runif(18000,-9,9)) +tissue_pts <- cands[in_kidney(cands$x,cands$y),][1:3000,] +tissue_mask <- fit_spatial_mask(tissue_pts, method="raster", + raster_resolution=256L, raster_sigma=0.7, raster_threshold=0.15, verbose=FALSE) + +mu_t <- sf::st_union(tissue_mask) + +tc_A_cl <- data.frame(x=rnorm(350,-4.5,1.2), y=rnorm(350,3.8,0.9)) +A_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_A_cl,coords=c("x","y")),mu_t,sparse=FALSE)[,1L]) +tc_A_cl <- tc_A_cl[A_in,] +tc_A_sc <- sample_in_mask(tissue_mask,50) +tc_A <- rbind(tc_A_cl,tc_A_sc) +tc_A$umi <- c(rpois(nrow(tc_A_cl),15), rpois(nrow(tc_A_sc),2)) + +tc_B_all <- sample_in_mask(tissue_mask,2000) +prob_B <- plogis(tc_B_all$x/3) +tc_B <- tc_B_all[runif(nrow(tc_B_all))=length(xg)||yi<1||yi>=length(yg)) return(NA_real_) + wx<-(qx-xg[xi])/(xg[xi+1]-xg[xi]); wy<-(qy-yg[yi])/(yg[yi+1]-yg[yi]) + (1-wx)*(1-wy)*f[xi,yi]+wx*(1-wy)*f[xi+1,yi]+(1-wx)*wy*f[xi,yi+1]+wx*wy*f[xi+1,yi+1] + }, numeric(1)) +} + +queries <- data.frame( + label=c("Cluster core","Cluster edge","Tissue centre","Far right","Near notch"), + x=c(-4.5,-3.0,0.0,5.0,3.0), y=c(3.8,2.5,0.0,0.0,0.0)) +queries$concentration <- interpolate_field(res_q, queries) + +cat("\n 12a. Concentration at query points:\n") +print(queries[,c("label","x","y","concentration")], row.names=FALSE) + +df_q <- field_to_df(res_q) +cat(sprintf("\n 12b. Mean conc. by half:\n Left : %.5f\n Right: %.5f\n", + mean(df_q$field[df_q$x< 0 & !is.na(df_q$field)]), + mean(df_q$field[df_q$x>=0 & !is.na(df_q$field)]))) + +pk <- which.max(res_q$field); N <- length(res_q$x) +cat(sprintf("\n 12c. Peak: x=%.2f, y=%.2f, value=%.6f\n", + res_q$x[((pk-1L)%%N)+1L], res_q$y[((pk-1L)%/%N)+1L], res_q$field[pk])) + +total <- sum(res_q$field[res_q$mask],na.rm=TRUE)*res_q$hx*res_q$hy +theory <- nrow(tc_A)*1.0/0.3 +cat(sprintf("\n 12d. Field integral = %.4f\n", total)) +cat(sprintf(" Theory (n*r/lam) = %.4f\n", theory)) +cat(sprintf(" Agreement = %.1f%%\n", 100*(1-abs(total-theory)/theory))) + +prof <- df_q[abs(df_q$y) 400.\n") +cat(strrep("=",60),"\n") + + + + + + + diff --git a/spatial-modeling-with-claude/estimate_concentration_field.R b/spatial-modeling-with-claude/estimate_concentration_field.R new file mode 100644 index 0000000..ef29a9a --- /dev/null +++ b/spatial-modeling-with-claude/estimate_concentration_field.R @@ -0,0 +1,323 @@ +# ============================================================ +# estimate_concentration_field.R +# +# Models steady-state continuous molecular concentration over a +# spatial mask from individual mRNA transcript point sources. +# +# Physical model: D * nabla^2 C(x) - lambda * C(x) + s(x) = 0 +# +# Three solvers: +# "fd" — finite-difference sparse linear system (recommended) +# "green" — FFT convolution with 2D Green's function K0(kappa*r) +# "kde" — Gaussian kernel smoothing (phenomenological baseline) +# +# install.packages(c("sf","Matrix","ggplot2")) +# ============================================================ + +.gauss_kernel_1d <- function(sigma) { + r <- max(1L,ceiling(3*sigma)); k <- exp(-(seq(-r,r))^2/(2*sigma^2)); k/sum(k) +} + +.sep_gauss_smooth_2d <- function(mat, sigma_x, sigma_y) { + kx <- .gauss_kernel_1d(sigma_x); ky <- .gauss_kernel_1d(sigma_y) + out <- apply(mat,2L,function(col){s<-stats::filter(col,kx,method="convolution",sides=2L);s[is.na(s)]<-0;as.numeric(s)}) + t(apply(out,1L,function(row){s<-stats::filter(row,ky,method="convolution",sides=2L);s[is.na(s)]<-0;as.numeric(s)})) +} + +.rasterize_mask <- function(gc_sf, mask_union, N, n_cores) { + n_cells <- N*N + if (n_cores>1L && .Platform$OS.type=="unix") { + chunk_size <- ceiling(n_cells/n_cores) + chunks <- split(seq_len(n_cells),ceiling(seq_len(n_cells)/chunk_size)) + res_list <- parallel::mclapply(chunks,function(idx) + as.logical(sf::st_within(gc_sf[idx,],mask_union,sparse=FALSE)[,1L]),mc.cores=n_cores) + in_msk <- unlist(res_list) + } else { + in_msk <- as.logical(sf::st_within(gc_sf,mask_union,sparse=FALSE)[,1L]) + } + matrix(in_msk,nrow=N,ncol=N) +} + +estimate_concentration_field <- function( + mask, + transcript_coords, + D = 1.0, + lambda = 0.1, + production_rate = 1.0, + diffusion_length = NULL, + weight_col = NULL, + method = "fd", + grid_resolution = 256L, + boundary_condition = "dirichlet", + fd_solver = "direct", + sor_omega = 1.7, + sor_tol = 1e-6, + sor_max_iter = 1500L, + green_r_min_factor = 0.5, + kde_bandwidth = NULL, + include_external = FALSE, + normalize = FALSE, + log_transform = FALSE, + clip_negative = TRUE, + n_cores = 1L, + return_sources = TRUE, + plot = FALSE, + verbose = TRUE +) { + t0 <- proc.time()["elapsed"] + + # 0. Packages + req <- "sf"; if (method=="fd"&&fd_solver=="direct") req <- c(req,"Matrix") + miss <- req[!sapply(req,requireNamespace,quietly=TRUE)] + if (length(miss)>0L) stop("Install: install.packages(c(",paste0('"',miss,'"',collapse=","),"))") + + # 1. Physics + if (!is.null(diffusion_length)) { + L <- as.numeric(diffusion_length); kappa2 <- 1/L^2; kappa <- 1/L + } else { + if (lambda<=0 && method!="kde") + stop("`lambda` must be > 0 for a finite steady-state field.") + if (lambda>0) { L <- sqrt(D/lambda); kappa2 <- lambda/D; kappa <- sqrt(kappa2) + } else { L <- Inf; kappa2 <- 0; kappa <- 0 } + } + N <- as.integer(grid_resolution); nx_c <- as.integer(n_cores) + + if (verbose) { + cat("============================================================\n") + cat(" estimate_concentration_field\n") + cat("============================================================\n") + cat(sprintf(" Method: %s | D: %g | lambda: %g | L: %g | N: %d x %d\n", + method,D,lambda,L,N,N)) + if (method=="fd") cat(sprintf(" BC: %s | solver: %s\n",boundary_condition,fd_solver)) + cat("------------------------------------------------------------\n") + } + + # 2. Parse transcripts + if (is.matrix(transcript_coords)) transcript_coords <- as.data.frame(transcript_coords) + if (!all(c("x","y") %in% names(transcript_coords))) names(transcript_coords)[1:2] <- c("x","y") + tc <- transcript_coords[complete.cases(transcript_coords[,c("x","y")]),] + weights <- if (!is.null(weight_col)) tc[[weight_col]]*production_rate else + rep(production_rate,nrow(tc)) + + # 3. Filter by mask + mu_sf <- sf::st_union(mask) + tc_sf <- sf::st_as_sf(tc[,c("x","y")],coords=c("x","y"),crs=sf::st_crs(mask)) + in_msk <- as.logical(sf::st_within(tc_sf,mu_sf,sparse=FALSE)[,1L]) + n_tot <- nrow(tc); n_ext <- sum(!in_msk) + if (!include_external) { tc <- tc[in_msk,]; weights <- weights[in_msk] } + n_src <- nrow(tc) + if (n_src==0L) stop("No transcripts available. Check coords or set include_external=TRUE.") + if (verbose) cat(sprintf(" Transcripts: %d used (%d external %s)\n", + n_src, n_ext, if(include_external)"included" else "excluded")) + + # 4. Grid + bb <- sf::st_bbox(mu_sf); pad <- 0.03 + xpad <- (bb["xmax"]-bb["xmin"])*pad; ypad <- (bb["ymax"]-bb["ymin"])*pad + x_breaks <- seq(bb["xmin"]-xpad,bb["xmax"]+xpad,length.out=N+1L) + y_breaks <- seq(bb["ymin"]-ypad,bb["ymax"]+ypad,length.out=N+1L) + x_centers <- (x_breaks[-1L]+x_breaks[-(N+1L)])*0.5 + y_centers <- (y_breaks[-1L]+y_breaks[-(N+1L)])*0.5 + hx <- x_centers[2L]-x_centers[1L]; hy <- y_centers[2L]-y_centers[1L]; h <- (hx+hy)*0.5 + + # 5. Rasterize mask + if (verbose) cat(sprintf(" Rasterizing %dx%d grid...\n",N,N)) + gdf <- expand.grid(x=x_centers,y=y_centers) + gc_sf <- sf::st_as_sf(gdf,coords=c("x","y"),crs=sf::st_crs(mask)) + mask_mat <- .rasterize_mask(gc_sf,mu_sf,N,nx_c) + n_inside <- sum(mask_mat) + if (verbose) cat(sprintf(" Inside-mask cells: %d (%.1f%%)\n",n_inside,100*n_inside/N^2)) + + # 6. Bin sources + xi_s <- pmax(1L,pmin(N,findInterval(tc$x,x_breaks,rightmost.closed=TRUE))) + yi_s <- pmax(1L,pmin(N,findInterval(tc$y,y_breaks,rightmost.closed=TRUE))) + lin_s <- xi_s+(yi_s-1L)*N + src_vec <- numeric(N*N) + wt_agg <- rowsum(matrix(weights,ncol=1L),lin_s) + src_vec[as.integer(rownames(wt_agg))] <- wt_agg[,1L] + source_mat <- matrix(src_vec,nrow=N,ncol=N) + + # 7. Solve + t_solve <- proc.time()["elapsed"] + + field_mat <- switch(method, + + "fd" = { + inside_lin <- which(mask_mat); K <- length(inside_lin) + cell2sys <- integer(N*N); cell2sys[inside_lin] <- seq_len(K) + xi_in <- ((inside_lin-1L)%%N)+1L; yi_in <- ((inside_lin-1L)%/%N)+1L + nbr_xm <- ifelse(xi_in>1L,inside_lin-1L,0L) + nbr_xp <- ifelse(xi_in1L,inside_lin-N, 0L) + nbr_yp <- ifelse(yi_in0L&nb<=N*N;res<-logical(length(nb));res[ok]<-mask_mat[nb[ok]];res} + nbr_list <- list(nbr_xm,nbr_xp,nbr_ym,nbr_yp); wt_list <- c(nx_w,nx_w,ny_w,ny_w) + + if (fd_solver=="direct") { + diag_vals <- rep(-(2*nx_w+2*ny_w+kappa2),K) + off_rows <- integer(0); off_cols <- integer(0); off_vals <- numeric(0) + for (d in seq_along(nbr_list)) { + nb <- nbr_list[[d]]; wt <- wt_list[[d]] + is_in <- is_in_mask(nb); is_out <- (nb>0L)&!is_in; is_oob <- (nb==0L) + sel <- which(is_in) + if (length(sel)>0L){off_rows<-c(off_rows,sel);off_cols<-c(off_cols,cell2sys[nb[sel]]);off_vals<-c(off_vals,rep(wt,length(sel)))} + if (boundary_condition=="neumann") { + nc2 <- which(is_out|is_oob); diag_vals[nc2] <- diag_vals[nc2]+wt + } + } + A <- Matrix::sparseMatrix(i=c(seq_len(K),off_rows),j=c(seq_len(K),off_cols), + x=c(diag_vals,off_vals),dims=c(K,K)) + b_vec <- -source_mat[inside_lin]/(D*hx*hy) + if (verbose) cat(sprintf(" Sparse system: %dx%d, %d nnz\n",K,K,length(A@x))) + c_vec <- as.numeric(Matrix::solve(A,b_vec)) + out <- matrix(NA_real_,N,N); out[inside_lin] <- c_vec; out + + } else { + if (boundary_condition=="neumann") message("SOR uses Dirichlet BC only.") + xi_grid <- matrix(rep(seq_len(N),N),N,N) + yi_grid <- matrix(rep(seq_len(N),each=N),N,N) + is_red <- ((xi_grid+yi_grid)%%2L)==0L + ins_red <- mask_mat& is_red; ins_blk <- mask_mat&!is_red + C <- matrix(0,N,N); denom <- 2*nx_w+2*ny_w+kappa2 + src_dens <- source_mat/(D*hx*hy); converged <- FALSE + for (iter in seq_len(sor_max_iter)) { + C_prev <- C + Cxm <- rbind(matrix(0,1,N),C[1:(N-1),]); Cxp <- rbind(C[2:N,],matrix(0,1,N)) + Cym <- cbind(matrix(0,N,1),C[,1:(N-1)]); Cyp <- cbind(C[,2:N],matrix(0,N,1)) + Cgs <- (nx_w*(Cxm+Cxp)+ny_w*(Cym+Cyp)+src_dens)/denom + C[ins_red] <- (1-sor_omega)*C[ins_red]+sor_omega*Cgs[ins_red]; C[!mask_mat]<-0 + Cxm <- rbind(matrix(0,1,N),C[1:(N-1),]); Cxp <- rbind(C[2:N,],matrix(0,1,N)) + Cym <- cbind(matrix(0,N,1),C[,1:(N-1)]); Cyp <- cbind(C[,2:N],matrix(0,N,1)) + Cgs <- (nx_w*(Cxm+Cxp)+ny_w*(Cym+Cyp)+src_dens)/denom + C[ins_blk] <- (1-sor_omega)*C[ins_blk]+sor_omega*Cgs[ins_blk]; C[!mask_mat]<-0 + delta <- max(abs(C[mask_mat]-C_prev[mask_mat])) + if (is.finite(delta)&&delta0.") + Np <- 2L*N + xi_k <- c(0L:(N-1L),(-N):(-1L)); yi_k <- c(0L:(N-1L),(-N):(-1L)) + dx_mat <- outer(xi_k,rep(1L,Np))*hx; dy_mat <- outer(rep(1L,Np),yi_k)*hy + r_mat <- sqrt(dx_mat^2+dy_mat^2); r_min <- green_r_min_factor*h + r_mat[r_matfmn) field_mat <- (field_mat-fmn)/(fmx-fmn) + } + + t_total <- proc.time()["elapsed"]-t0 + if (verbose) { + cat(sprintf(" Solve: %.2fs | Total: %.2fs | Range: [%.4g, %.4g]\n", + t_solve_end-t_solve, t_total, + min(field_mat,na.rm=TRUE),max(field_mat,na.rm=TRUE))) + cat("============================================================\n") + } + + # 9. Plot + if (plot && requireNamespace("ggplot2",quietly=TRUE)) { + gdf <- expand.grid(x=x_centers,y=y_centers); gdf$field <- as.vector(field_mat) + gdf <- gdf[!is.na(gdf$field),] + print(ggplot2::ggplot(gdf,ggplot2::aes(x=x,y=y,fill=field))+ + ggplot2::geom_raster(interpolate=TRUE)+ + ggplot2::scale_fill_viridis_c(option="magma",name="Conc.",na.value="transparent")+ + ggplot2::coord_equal()+ggplot2::theme_minimal(base_size=11)+ + ggplot2::labs(title=paste0("Concentration | method='",method,"'"),x="X",y="Y")) + } + + # 10. Return + list( + field = field_mat, + x = x_centers, y = y_centers, hx = hx, hy = hy, + mask = mask_mat, + sources = if (return_sources) source_mat else NULL, + params = list(D=D,lambda=lambda,kappa2=kappa2,diffusion_length=L, + production_rate=production_rate,method=method, + grid_resolution=N,boundary_condition=if(method=="fd")boundary_condition else NA, + fd_solver=if(method=="fd")fd_solver else NA, + include_external=include_external,log_transform=log_transform,normalize=normalize), + diagnostics = list(n_transcripts_total=n_tot,n_transcripts_used=n_src, + n_transcripts_external=n_ext,n_cells_inside_mask=n_inside, + elapsed_total_s=t_total,elapsed_solve_s=t_solve_end-t_solve) + ) +} + +# ============================================================ +# UTILITY FUNCTIONS +# ============================================================ + +field_to_df <- function(result, inside_only=TRUE) { + df <- expand.grid(x=result$x,y=result$y) + df$field <- as.vector(result$field) + df$inside <- as.vector(result$mask) + if (!is.null(result$sources)) df$source <- as.vector(result$sources) + if (inside_only) df <- df[df$inside,] + df +} + +plot_concentration_field <- function( + result, transcript_coords=NULL, + show_sources=TRUE, show_contours=FALSE, n_contours=8L, + palette="magma", interpolate=TRUE, log_scale=FALSE, + pt_size=0.4, pt_alpha=0.4, pt_color="#00e5ff", title=NULL) { + if (!requireNamespace("ggplot2",quietly=TRUE)) + stop("Requires ggplot2.") + df <- field_to_df(result,inside_only=TRUE) + p <- result$params; L_str <- if(is.finite(p$diffusion_length)) round(p$diffusion_length,3) else "Inf" + tit <- if(is.null(title)) sprintf("Concentration | %s | D=%g, lambda=%g, L=%s", + p$method,p$D,p$lambda,L_str) else title + fc <- if (log_scale){df$lf<-log1p(df$field);"lf"} else "field" + g <- ggplot2::ggplot(df,ggplot2::aes(x=x,y=y,fill=.data[[fc]]))+ + ggplot2::geom_raster(interpolate=interpolate)+ + ggplot2::scale_fill_viridis_c(option=palette, + name=if(log_scale)"log1p(C)" else "Conc.", + na.value="transparent")+ + ggplot2::coord_equal()+ + ggplot2::labs(title=tit,x="X",y="Y")+ + ggplot2::theme_minimal(base_size=11)+ + ggplot2::theme(plot.title=ggplot2::element_text(face="bold")) + if (show_contours) + g <- g+ggplot2::geom_contour(data=df,ggplot2::aes(z=field), + color="white",alpha=0.4,linewidth=0.3,bins=n_contours) + if (show_sources && !is.null(transcript_coords)) + g <- g+ggplot2::geom_point(data=transcript_coords,ggplot2::aes(x=x,y=y), + inherit.aes=FALSE,color=pt_color,size=pt_size,alpha=pt_alpha) + g +} + +sweep_diffusion_length <- function(L_values, mask, transcript_coords, ...) { + do.call(rbind, lapply(L_values, function(Lv) { + cat(sprintf("\n--- Sweeping L = %g ---\n",Lv)) + res <- estimate_concentration_field(mask=mask,transcript_coords=transcript_coords, + diffusion_length=Lv,verbose=FALSE,...) + df <- field_to_df(res,inside_only=TRUE); df$L <- Lv; df + })) +} diff --git a/spatial-modeling-with-claude/vignette_concentration_field.Rmd b/spatial-modeling-with-claude/vignette_concentration_field.Rmd new file mode 100644 index 0000000..009a8f8 --- /dev/null +++ b/spatial-modeling-with-claude/vignette_concentration_field.Rmd @@ -0,0 +1,908 @@ +--- +title: "Estimating Steady-State Molecular Concentration Fields" +subtitle: "A guide to `estimate_concentration_field()`: physics, solvers, and tuning" +author: "Raredon Laboratory · Yale School of Medicine" +date: "`r Sys.Date()`" +output: + html_document: + toc: true + toc_float: + collapsed: false + smooth_scroll: true + toc_depth: 3 + theme: flatly + highlight: kate + code_folding: show + fig_width: 10 + fig_height: 5 + df_print: kable +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + warning = FALSE, + message = FALSE, + fig.align = "center", + cache = FALSE +) +``` + +--- + +# Overview + +Spatial transcriptomics data gives us discrete transcript or cell locations, but many biological processes — ligand gradients, cytokine fields, metabolite concentrations — are continuous. `estimate_concentration_field()` bridges this gap by computing the **steady-state concentration field** that a set of point sources (mRNA transcripts, detected proteins) would produce under a physically motivated diffusion-clearance model. + +The result is a dense concentration map over the tissue, interpolated from point source locations. This can be used to: + +- Identify where a signalling molecule is most concentrated +- Compute the concentration seen by each cell +- Create two-gene ratio maps (spatial domains of ligand/receptor balance) +- Visualise how diffusion length affects signal range + +--- + +# The physical model + +The function solves the **screened Poisson equation** (also called the steady-state diffusion-clearance PDE): + +$$D \nabla^2 C(\mathbf{x}) - \lambda C(\mathbf{x}) + s(\mathbf{x}) = 0$$ + +where: + +| Symbol | Meaning | Parameter | +|---|---|---| +| $C(\mathbf{x})$ | Steady-state concentration at position **x** | — (what we solve for) | +| $D$ | Diffusion coefficient (spatial spread rate) | `D` | +| $\lambda$ | First-order clearance rate | `lambda` | +| $s(\mathbf{x})$ | Source density (production rate per unit area) | `production_rate` | + +The key derived quantity is the **diffusion length**: + +$$L = \sqrt{D / \lambda}$$ + +$L$ sets the characteristic distance over which concentration decays from a point source. It is the single most important parameter to tune. The shape of the field depends only on $L$; the amplitude scales as $1/D$. + +> **Practical interpretation:** $L$ is roughly the radius of influence of a single transcript. At distance $r \gg L$ from a source, the concentration is negligible. At $r \ll L$, concentration is near its maximum. + +--- + +# Setup + +```{r libraries} +library(sf) +library(ggplot2) +library(patchwork) +source("fit_spatial_mask.R") +source("estimate_concentration_field.R") + +set.seed(2024) +``` + +--- + +# Shared helpers + +```{r helpers} +# ---- Plot theme ---- +theme_demo <- function(bs = 9.5) + theme_minimal(base_size = bs) + + theme(plot.title = element_text(face = "bold", size = bs + 0.5), + plot.subtitle = element_text(size = bs - 1, color = "grey40", + margin = margin(b = 4)), + panel.grid = element_line(color = "grey94"), + legend.key.width = unit(0.35, "cm"), + legend.title = element_text(size = bs - 1), + legend.text = element_text(size = bs - 1.5), + axis.text = element_text(size = bs - 2)) + +# ---- Field visualiser ---- +# Uses geom_raster (correct for continuous fields on regular grids). +# show_contours overlays iso-concentration lines. +plot_field <- function(result, transcripts = NULL, + title = "", subtitle = "", + palette = "magma", log_scale = FALSE, + show_pts = TRUE, show_contours = TRUE, n_contours = 6L, + pt_size = 0.35, pt_alpha = 0.45, pt_color = "#00e5ff", + fill_label = "Conc.", symmetric = FALSE) { + df <- field_to_df(result, inside_only = TRUE) + fill_col <- if (log_scale) { df$fv <- log1p(df$field); "fv" } else "field" + fill_lbl <- if (log_scale) paste0("log\u2081\u208a(", fill_label, ")") else fill_label + p <- ggplot(df, aes(x = x, y = y, fill = .data[[fill_col]])) + + geom_raster(interpolate = TRUE) + coord_equal() + + labs(title = title, subtitle = subtitle, x = "X", y = "Y") + + theme_demo() + if (symmetric) { + lim <- max(abs(df[[fill_col]]), na.rm = TRUE) + p <- p + scale_fill_distiller(palette = "RdBu", limits = c(-lim, lim), + name = fill_lbl, na.value = "transparent") + } else { + p <- p + scale_fill_viridis_c(option = palette, name = fill_lbl, + na.value = "transparent") + } + if (show_contours && any(!is.na(df$field))) + p <- p + geom_contour(aes(z = field), color = "white", + alpha = 0.35, linewidth = 0.25, bins = n_contours) + if (show_pts && !is.null(transcripts)) + p <- p + geom_point(data = transcripts, aes(x = x, y = y), + inherit.aes = FALSE, color = pt_color, + size = pt_size, alpha = pt_alpha) + p +} + +# ---- Sample points uniformly from inside a mask ---- +sample_in_mask <- function(mask_poly, n, max_tries = 20) { + bb <- sf::st_bbox(mask_poly); mu <- sf::st_union(mask_poly) + pts <- data.frame(x = numeric(0), y = numeric(0)) + for (i in seq_len(max_tries)) { + if (nrow(pts) >= n) break + cnd <- data.frame(x = runif(n * 8, bb["xmin"], bb["xmax"]), + y = runif(n * 8, bb["ymin"], bb["ymax"])) + ok <- as.logical(sf::st_within( + sf::st_as_sf(cnd, coords = c("x","y")), mu, sparse = FALSE)[, 1L]) + pts <- rbind(pts, cnd[ok, ]) + } + pts[seq_len(min(n, nrow(pts))), ] +} +``` + +--- + +# Function reference: `estimate_concentration_field()` + +```r +estimate_concentration_field( + mask, # sfc polygon — output of fit_spatial_mask() + transcript_coords, # data.frame with columns x and y (plus optional weight_col) + + # Physics + D = 1.0, # diffusion coefficient (arbitrary units) + lambda = 0.1, # first-order clearance rate + diffusion_length = NULL, # override: set L directly; NULL uses sqrt(D/lambda) + production_rate = 1.0, # multiplied by weights if weight_col is set + + # Solver + method = "fd", # "fd" | "green" | "kde" + grid_resolution = 256L, # grid cells per axis (N × N) + boundary_condition = "dirichlet", # "dirichlet" | "neumann" (fd only) + fd_solver = "direct", # "direct" | "iterative" (fd only) + + # Weighting + weight_col = NULL, # column name for per-transcript weights (e.g. "umi") + + # Optional + include_external = FALSE, # include transcripts outside mask? + normalize = FALSE, # rescale field to [0, 1]? + log_transform = FALSE, # apply log1p() after solving? + clip_negative = TRUE, # set small negatives to 0? + n_cores = 1L, + plot = FALSE, + verbose = TRUE +) +``` + +## Return value + +A named list containing: + +| Element | Contents | +|---|---| +| `field` | N×N matrix; `NA` outside the mask | +| `x`, `y` | Grid centre coordinates (length N each) | +| `hx`, `hy` | Grid cell width and height | +| `mask` | N×N logical matrix (TRUE = inside mask) | +| `sources` | N×N matrix of binned source density | +| `params` | List of all input parameters | +| `diagnostics` | Timing, counts, etc. | + +Use `field_to_df(result)` to convert to a tidy data frame for ggplot. + +--- + +# Section 1 — Synthetic tissue and transcript layout + +We build a kidney-bean-shaped tissue with two synthetic genes: + +- **Gene A**: focal cluster at upper-left, plus sparse background. High UMI count (focal source). +- **Gene B**: 500 transcripts with a right-biased spatial distribution. Low UMI count (diffuse source). + +```{r sec1-tissue} +in_kidney <- function(x, y, oa = 9, ob = 7, ncx = 5, nr = 3.8) + (x/oa)^2 + (y/ob)^2 < 1 & sqrt((x - ncx)^2 + y^2) >= nr + +cands <- data.frame(x = runif(18000, -11, 11), y = runif(18000, -9, 9)) +tissue_pts <- cands[in_kidney(cands$x, cands$y), ][1:3000, ] + +tissue_mask <- fit_spatial_mask(tissue_pts, method = "raster", + raster_resolution = 256L, raster_sigma = 0.7, raster_threshold = 0.15, + verbose = FALSE) + +mu_t <- sf::st_union(tissue_mask) +``` + +```{r sec1-transcripts} +# Gene A: focal cluster at (-4.5, 3.8) + sparse background +tc_A_cl <- data.frame(x = rnorm(350, -4.5, 1.2), y = rnorm(350, 3.8, 0.9)) +A_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_A_cl, coords = c("x","y")), mu_t, sparse = FALSE)[, 1L]) +tc_A_cl <- tc_A_cl[A_in, ] +tc_A_sc <- sample_in_mask(tissue_mask, 50) +tc_A <- rbind(tc_A_cl, tc_A_sc) +tc_A$umi <- c(rpois(nrow(tc_A_cl), 15), rpois(nrow(tc_A_sc), 2)) + +# Gene B: right-biased random distribution +tc_B_all <- sample_in_mask(tissue_mask, 2000) +prob_B <- plogis(tc_B_all$x / 3) +tc_B <- tc_B_all[runif(nrow(tc_B_all)) < prob_B, ][1:500, ] +tc_B$umi <- rpois(nrow(tc_B), 5) +``` + +```{r sec1-plot, fig.height=5} +tissue_sf <- sf::st_sf(geometry = tissue_mask) + +ggplot() + + geom_sf(data = tissue_sf, fill = "grey92", color = "grey60", linewidth = 0.4) + + geom_point(data = tc_A, aes(x = x, y = y, color = "Gene A", size = log1p(umi)), + alpha = 0.6) + + geom_point(data = tc_B, aes(x = x, y = y, color = "Gene B", size = log1p(umi)), + alpha = 0.5) + + scale_color_manual(values = c("Gene A" = "#e74c3c", "Gene B" = "#2980b9"), + name = NULL) + + scale_size_continuous(range = c(0.3, 2.5), name = "log(UMI+1)") + + coord_sf(datum = NA) + theme_demo() + + labs(title = "Section 1: Synthetic kidney-bean tissue", + subtitle = "Gene A: focal upper-left | Gene B: broad right-biased", + x = "X", y = "Y") +``` + +--- + +# Section 2 — Method comparison: fd, green, kde + +Three solvers are available. Each makes different assumptions about the domain: + +| Method | How it works | Boundary handling | Best for | +|---|---|---|---| +| `"fd"` | Sparse finite-difference linear system | **Exact** Dirichlet or Neumann BCs | **Default; recommended** | +| `"green"` | FFT convolution with analytic Green's function $K_0(\kappa r)$ | **None** — infinite domain | Fast interior estimates; quick sweeps | +| `"kde"` | Gaussian kernel smoothing | None | Phenomenological baseline | + +```{r sec2, cache=TRUE} +pb <- list(D = 1, lambda = 0.3, production_rate = 1, + grid_resolution = 256L, verbose = FALSE) + +res_fd <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A, + method = "fd", fd_solver = "direct", + boundary_condition = "dirichlet"), pb)) + +res_green <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A, + method = "green"), pb)) + +res_kde <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A, + method = "kde", kde_bandwidth = sqrt(1/0.3)), pb)) + +res_diff <- res_fd +res_diff$field <- res_fd$field - res_green$field +``` + +```{r sec2-plot, fig.height=8} +(plot_field(res_fd, tc_A, title = "2a. fd (Dirichlet)", + subtitle = "Exact BCs; recommended") + + plot_field(res_green, tc_A, title = "2b. green (FFT)", + subtitle = "Infinite domain; no boundary effects")) / +(plot_field(res_kde, tc_A, title = "2c. kde", + subtitle = "Bandwidth = L; phenomenological") + + plot_field(res_diff, title = "2d. fd − green", + subtitle = "fd correctly suppresses near-edge concentration", + symmetric = TRUE, show_contours = FALSE, show_pts = FALSE)) + +plot_annotation( + title = "Section 2: Solver Comparison (Gene A, D=1, λ=0.3, L≈1.83)", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**Key observations:** + +- The `fd` and `green` maps look very similar in the interior (panel 2d shows only small differences away from the edges). This confirms that the physics is consistent between methods. +- The **difference map** (2d) shows that `fd` correctly drives concentration to zero at the tissue boundary (Dirichlet BC), while `green` ignores the boundary entirely. This matters most when sources are near the edge. +- `kde` produces a visually similar pattern to `fd` because the bandwidth was set equal to $L$, but it has no physical interpretation and no BCs. + +**When to use `method = "green"`:** When you are doing a rapid parameter sweep over many $L$ values and don't need exact boundary conditions. It runs faster than `fd` at high resolution. + +--- + +# Section 3 — Diffusion length sweep: the most important parameter + +The diffusion length $L = \sqrt{D/\lambda}$ is the primary parameter to tune. It determines how far each transcript's signal spreads. Small $L$: fields are tight and peaked around clusters. Large $L$: fields fill the entire tissue. + +```{r sec3, cache=TRUE} +L_vals <- c(0.5, 1.5, 4.0, 10.0) + +sw3 <- lapply(L_vals, function(Lv) { + res <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, diffusion_length = Lv, production_rate = 1, + method = "fd", fd_solver = "direct", grid_resolution = 256L, + verbose = FALSE) + # Normalise each panel independently for visual comparison + f <- res$field + res$field <- (f - min(f, na.rm = TRUE)) / diff(range(f, na.rm = TRUE)) + plot_field(res, tc_A, + title = sprintf("L = %.1f", Lv), + subtitle = sprintf("λ ≈ %.3f", 1/Lv^2), + fill_label = "Norm. Conc.", pt_size = 0.4) + + scale_fill_viridis_c(option = "magma", name = "Norm.\nConc.", + limits = c(0, 1), na.value = "transparent") +}) +``` + +```{r sec3-plot, fig.height=4} +wrap_plots(sw3, nrow = 1) + + plot_annotation( + title = "Section 3: Diffusion Length Sweep (each panel normalised independently)", + subtitle = "Small L → tight peaks | Large L → tissue-wide field", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +```{r sec3-peaks} +peaks <- sapply(L_vals, function(Lv) { + res <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, diffusion_length = Lv, + method = "fd", fd_solver = "direct", grid_resolution = 256L, + verbose = FALSE) + max(res$field, na.rm = TRUE) +}) +data.frame(L = L_vals, peak_concentration = round(peaks, 5)) +``` + +### How to choose L in practice + +There is no single correct answer — the right $L$ is biologically motivated. Ask: + +1. **What is the typical inter-cluster distance?** $L$ should be 10–20% of this distance if you want fields that peak at sources and drop between them. Larger values produce smoother, more "territorial" fields. +2. **Are you modelling a small diffusing molecule or a large one?** Small molecules (e.g. metabolites, small cytokines) have higher $D$ and thus larger $L$. Large molecules (e.g. ECM-bound ligands) have small $D$. +3. **Does your field look biologically reasonable?** Inspect the output and ask whether the gradients match known biology. + +> **Shortcut:** set `diffusion_length = L` directly to bypass the `D`/`lambda` decomposition. The shape depends only on $L$; $D$ only affects amplitude. + +--- + +# Section 4 — Fixed L, varying D and lambda + +This section makes the key mathematical point: **field shape is determined entirely by $L$**. Changing $D$ and $\lambda$ while keeping $L = \sqrt{D/\lambda}$ constant produces identical spatial patterns, only scaling the amplitude by $1/D$. + +```{r sec4, cache=TRUE} +DL_cases <- list( + list(D = 0.1, lam = 0.025, label = "D=0.1, λ=0.025"), + list(D = 1, lam = 0.25, label = "D=1, λ=0.25"), + list(D = 5, lam = 1.25, label = "D=5, λ=1.25"), + list(D = 20, lam = 5, label = "D=20, λ=5") +) + +dl_pl <- lapply(DL_cases, function(co) { + res <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = co$D, lambda = co$lam, production_rate = 1, + method = "fd", fd_solver = "direct", grid_resolution = 256L, + verbose = FALSE) + pk <- round(max(res$field, na.rm = TRUE), 5) + plot_field(res, tc_A, + title = co$label, + subtitle = sprintf("L=2 fixed | peak=%.5f", pk), + pt_size = 0.3) +}) +``` + +```{r sec4-plot, fig.height=4} +wrap_plots(dl_pl, nrow = 1) + + plot_annotation( + title = "Section 4: Fixed L=2, Varying D and λ", + subtitle = "Shape IDENTICAL (same L). Amplitude ∝ 1/D.", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +**Implication for parameter setting:** When fitting to data, first determine the spatial extent of influence ($L$), then fix either $D$ or $\lambda$ based on biophysical priors. The third parameter is then determined. + +--- + +# Section 5 — Boundary conditions + +The `fd` method supports two boundary conditions: + +- **Dirichlet** (`boundary_condition = "dirichlet"`): $C = 0$ at the tissue boundary. Physically: molecules are absorbed or degrade instantly at the boundary. This is appropriate for most spatial transcriptomics sections where the tissue edge is a hard barrier. +- **Neumann** (`boundary_condition = "neumann"`): $\partial C / \partial n = 0$ at the boundary (zero normal flux). Physically: no molecules cross the boundary — they are reflected back in. This is appropriate when modelling a sealed container. + +```{r sec5, cache=TRUE} +res_dir <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.2, method = "fd", fd_solver = "direct", + boundary_condition = "dirichlet", grid_resolution = 256L, verbose = FALSE) + +res_neu <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.2, method = "fd", fd_solver = "direct", + boundary_condition = "neumann", grid_resolution = 256L, verbose = FALSE) + +res_bcd <- res_dir +res_bcd$field <- res_neu$field - res_dir$field +``` + +```{r sec5-crosssection} +sl <- function(res, lbl) { + df <- field_to_df(res) + s <- df[abs(df$y) < res$hy * 1.5 & !is.na(df$field), ] + s <- s[order(s$x), ]; s$BC <- lbl; s +} +sl_all <- rbind(sl(res_dir, "Dirichlet"), sl(res_neu, "Neumann")) + +p5d <- ggplot(sl_all, aes(x = x, y = field, color = BC)) + + geom_line(linewidth = 0.8) + + scale_color_manual(values = c("Dirichlet" = "#e74c3c", + "Neumann" = "#2980b9")) + + labs(title = "5d. Cross-section at y ≈ 0", + subtitle = "Dirichlet drops to 0; Neumann stays elevated", + x = "X", y = "Conc.", color = "BC") + + theme_demo() +``` + +```{r sec5-plot, fig.height=8} +(plot_field(res_dir, tc_A, + title = "5a. Dirichlet (C=0 at boundary)", + subtitle = "D=1, λ=0.2, L≈2.24") + + plot_field(res_neu, tc_A, + title = "5b. Neumann (∂C/∂n=0)", + subtitle = "Molecules reflected; higher near edges", + palette = "plasma")) / +(plot_field(res_bcd, + title = "5c. Neumann − Dirichlet", + subtitle = "Neumann retains more concentration near boundary", + symmetric = TRUE, show_contours = FALSE, show_pts = FALSE) + + p5d) + +plot_annotation( + title = "Section 5: Boundary Condition Comparison", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**Recommendation:** Use `"dirichlet"` for tissue sections. Neumann BCs are mainly useful when comparing to analytic solutions or modelling sealed compartments. + +--- + +# Section 6 — UMI weighting vs. equal weighting + +Each transcript can carry a weight (e.g. its UMI count) via the `weight_col` argument. This means a cell with 50 detected transcripts of a gene contributes 50× more source density than a cell with 1. + +```{r sec6, cache=TRUE} +res_uw <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.3, method = "fd", fd_solver = "direct", + grid_resolution = 256L, weight_col = NULL, verbose = FALSE) + +res_wt <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_A, + D = 1, lambda = 0.3, method = "fd", fd_solver = "direct", + grid_resolution = 256L, weight_col = "umi", verbose = FALSE) + +# Normalise both for visual comparison +nrm <- function(r) { + f <- r$field + r$field <- (f - min(f, na.rm=TRUE)) / diff(range(f, na.rm=TRUE)) + r +} +``` + +```{r sec6-plot, fig.height=4} +plot_field(nrm(res_uw), tc_A, + title = "6a. Unweighted", + subtitle = "All transcripts equal weight", + fill_label = "Norm. Conc.") + +plot_field(nrm(res_wt), tc_A, + title = "6b. UMI-weighted", + subtitle = "High-UMI cluster source amplified", + palette = "plasma", + fill_label = "Norm. Conc.") + +plot_annotation( + title = "Section 6: UMI Weighting", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +**When to use UMI weighting:** When transcript counts meaningfully reflect expression level differences. Note that this amplifies both true signal and noise — noisy high-UMI outliers can dominate the field. Inspect your UMI distribution before applying weights. + +--- + +# Section 7 — External transcripts + +`include_external = TRUE` bins transcripts that fall outside the mask into the nearest inside-edge grid cell, allowing them to contribute to the field. This is useful when you have transcripts just outside the mask boundary (e.g. due to mask fitting inaccuracy) that you still want to include. + +```{r sec7, cache=TRUE} +tc_ext <- data.frame(x = rnorm(80, 6.5, 0.4), + y = rnorm(80, 0, 0.5), + umi = rpois(80, 10)) +ext_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_ext, coords = c("x","y")), mu_t, sparse = FALSE)[, 1L]) +tc_ext <- tc_ext[!ext_in, ] # keep only those outside the mask +tc_Ax <- rbind(tc_A, tc_ext) + +res_ne <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_Ax, + D = 1, lambda = 0.15, method = "fd", fd_solver = "direct", + grid_resolution = 256L, include_external = FALSE, verbose = FALSE) + +res_we <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_Ax, + D = 1, lambda = 0.15, method = "fd", fd_solver = "direct", + grid_resolution = 256L, include_external = TRUE, verbose = FALSE) + +ext_pt_layer <- geom_point(data = tc_ext, aes(x = x, y = y), + color = "orange", size = 0.9, alpha = 0.8, + inherit.aes = FALSE) +``` + +```{r sec7-plot, fig.height=4} +(plot_field(res_ne, tc_A, title = "7a. include_external = FALSE") + + ext_pt_layer) + +(plot_field(res_we, tc_A, title = "7b. include_external = TRUE", + palette = "plasma") + + ext_pt_layer) + +plot_annotation( + title = "Section 7: External Transcripts (orange = outside mask)", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +--- + +# Section 8 — Holey mask: vascular clearance + +This section demonstrates a key strength of `method = "fd"`: it correctly handles holes (e.g. vessel lumens) within the tissue mask. The `fd` solver zeroes the concentration inside each hole, modelling vessels as clearance sinks. The `green` FFT method ignores topology entirely — it treats the holes as normal tissue. + +```{r sec8-mask, cache=TRUE} +vd <- list(list(cx = -2, cy = 1.5, r = 1.2), + list(cx = 2.5, cy = -2, r = 1.0), + list(cx = -4.5, cy = -2.5, r = 0.8)) +in_v <- function(x, y) Reduce(`|`, lapply(vd, function(v) + sqrt((x - v$cx)^2 + (y - v$cy)^2) < v$r)) + +n_h <- 3000 +cnd_h <- data.frame(x = runif(n_h * 8, -11, 11), + y = runif(n_h * 8, -9, 9)) +kp_h <- in_kidney(cnd_h$x, cnd_h$y) & !in_v(cnd_h$x, cnd_h$y) +pts_h <- cnd_h[kp_h, ][1:n_h, ] + +holey_mask <- fit_spatial_mask(pts_h, method = "raster", + raster_resolution = 256L, raster_sigma = 0.4, + raster_threshold = 0.2, verbose = FALSE) + +tc_h <- sample_in_mask(holey_mask, 400) +``` + +```{r sec8-solve, cache=TRUE} +res_hfd <- estimate_concentration_field( + mask = holey_mask, transcript_coords = tc_h, + D = 1, lambda = 0.15, method = "fd", fd_solver = "direct", + grid_resolution = 300L, verbose = FALSE) + +res_htight <- estimate_concentration_field( + mask = holey_mask, transcript_coords = tc_h, + D = 1, lambda = 1.0, method = "fd", fd_solver = "direct", + grid_resolution = 300L, verbose = FALSE) + +res_hgrn <- estimate_concentration_field( + mask = holey_mask, transcript_coords = tc_h, + D = 1, lambda = 0.15, method = "green", + grid_resolution = 300L, verbose = FALSE) + +res_hdiff <- res_hfd +res_hdiff$field <- res_hfd$field - res_hgrn$field +``` + +```{r sec8-plot, fig.height=8} +(plot_field(res_hfd, tc_h, + title = "8a. fd | L ≈ 2.58", + subtitle = "Vessel lumens correctly zeroed", + pt_size = 0.3) + + plot_field(res_htight, tc_h, + title = "8b. fd | L = 1 (tight)", + subtitle = "Steeper depletion near vessels", + palette = "inferno", pt_size = 0.3)) / +(plot_field(res_hgrn, tc_h, + title = "8c. green — topology blind", + subtitle = "FFT ignores holes entirely", + palette = "plasma", pt_size = 0.3) + + plot_field(res_hdiff, + title = "8d. fd − green", + subtitle = "fd zeros lumens; green does not", + symmetric = TRUE, show_contours = FALSE, + show_pts = FALSE)) + +plot_annotation( + title = "Section 8: Holey Mask — Vascular Clearance", + subtitle = "Use method='fd' whenever the mask has holes", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +> **Important:** If your tissue mask has holes (vessel lumens, necrotic cores), always use `method = "fd"`. The `green` and `kde` methods are unaware of domain topology. + +--- + +# Section 9 — Wide dynamic range and log-scale visualisation + +When transcript counts span several orders of magnitude (e.g. a dense focal cluster alongside sparse background), the linear concentration scale will be dominated by the peak and the background will appear flat. The `log1p` transform reveals structure at both ends of the dynamic range. + +```{r sec9, cache=TRUE} +tc_w <- rbind( + data.frame(x = rnorm(20, -5, 0.3), y = rnorm(20, 4, 0.3), umi = rpois(20, 200)), + data.frame(x = rnorm(300, 0, 4), y = rnorm(300, 0, 3), umi = rpois(300, 1))) +tc_w_in <- as.logical(sf::st_within( + sf::st_as_sf(tc_w[, c("x","y")], coords = c("x","y")), + mu_t, sparse = FALSE)[, 1L]) +tc_w <- tc_w[tc_w_in, ] + +res_w <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_w, + D = 1, lambda = 0.5, method = "fd", fd_solver = "direct", + grid_resolution = 256L, weight_col = "umi", verbose = FALSE) +``` + +```{r sec9-plot, fig.height=4} +plot_field(res_w, tc_w, + title = "9a. Linear scale", + subtitle = "Background invisible; cluster dominates", + pt_color = "white") + +plot_field(res_w, tc_w, + log_scale = TRUE, + title = "9b. log₁₊ scale", + subtitle = "Background and cluster both visible", + palette = "viridis", pt_color = "white") + +plot_annotation( + title = "Section 9: Wide Dynamic Range", + subtitle = "Use log_scale=TRUE when max/min UMI ratio > ~20", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +You can also apply the log transform inside the solver with `log_transform = TRUE`, which is equivalent but applies the transform before returning the field matrix. + +--- + +# Section 10 — Two-gene ratio map + +A **log₂(A/B) ratio map** shows where Gene A has higher concentration relative to Gene B, and vice versa. This is analogous to a log-fold-change map, but in space rather than between conditions. Blue regions favour B; yellow/red regions favour A; white is balanced. + +```{r sec10, cache=TRUE} +ph <- list(D = 1, lambda = 0.25, production_rate = 1, + method = "fd", fd_solver = "direct", + grid_resolution = 256L, verbose = FALSE) + +res_gA <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_A), ph)) +res_gB <- do.call(estimate_concentration_field, + c(list(mask = tissue_mask, transcript_coords = tc_B), ph)) + +res_rt <- res_gA +res_rt$field <- log2((res_gA$field + 1e-6) / (res_gB$field + 1e-6)) +``` + +```{r sec10-plot, fig.height=8} +p_ratio <- plot_field(res_rt, + title = "10c. log₂(A/B) ratio", + subtitle = "Red=A territory | Blue=B territory | White=balanced", + symmetric = TRUE, show_contours = TRUE, n_contours = 5, + show_pts = FALSE, fill_label = "log₂(A/B)") + + geom_point(data = tc_A, aes(x = x, y = y), + color = "#e74c3c", size = 0.3, alpha = 0.4, + inherit.aes = FALSE) + + geom_point(data = tc_B, aes(x = x, y = y), + color = "#27ae60", size = 0.3, alpha = 0.4, + inherit.aes = FALSE) + +p_dens <- ggplot(field_to_df(res_rt), aes(x = field, + fill = ifelse(field < 0, "B dominant", "A dominant"))) + + geom_density(alpha = 0.55) + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") + + scale_fill_manual(values = c("B dominant" = "#2980b9", + "A dominant" = "#c0392b"), + name = NULL) + + labs(title = "10d. Distribution of log₂(A/B)", + subtitle = "Fraction of tissue in each territory", + x = "log₂(A/B)", y = "Density") + + theme_demo() + +(plot_field(res_gA, tc_A, title = "10a. Gene A field") + + plot_field(res_gB, tc_B, title = "10b. Gene B field", palette = "viridis")) / +(p_ratio + p_dens) + +plot_annotation( + title = "Section 10: Two-Gene Ratio Map", + theme = theme(plot.title = element_text(face = "bold", size = 13))) +``` + +The `1e-6` pseudocount in the ratio calculation prevents division-by-zero and log(0) in regions where one gene has near-zero concentration. + +--- + +# Section 11 — 100k transcripts and solver timing + +This section benchmarks the three solvers on 100,000 transcripts at two grid resolutions. + +```{r sec11, cache=TRUE} +tc_100k <- sample_in_mask(tissue_mask, 100000) +cat(sprintf("Sampled %d transcripts.\n", nrow(tc_100k))) + +t1 <- system.time(r1 <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_100k, + D = 1, lambda = 0.3, method = "fd", fd_solver = "direct", + grid_resolution = 256L, verbose = TRUE)) + +t2 <- system.time(r2 <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_100k, + D = 1, lambda = 0.3, method = "fd", fd_solver = "iterative", + sor_omega = 1.75, grid_resolution = 512L, verbose = TRUE)) + +t3 <- system.time(r3 <- estimate_concentration_field( + mask = tissue_mask, transcript_coords = tc_100k, + D = 1, lambda = 0.3, method = "green", + grid_resolution = 512L, verbose = TRUE)) +``` + +```{r sec11-table} +data.frame( + Solver = c("fd/direct (N=256)", "fd/SOR (N=512)", "green/FFT (N=512)"), + Time_s = round(c(t1["elapsed"], t2["elapsed"], t3["elapsed"]), 2), + Has_BCs = c("Yes", "Yes", "No"), + Handles_holes = c("Yes", "Yes", "No"), + Notes = c("Sparse LU; recommended up to N=400", + "Iterative; use for N > 400", + "Fastest; infinite domain only") +) +``` + +```{r sec11-plot, fig.height=4} +sub <- tc_100k[sample(nrow(tc_100k), 2000), ] +plot_field(r1, sub, + title = sprintf("11a. fd/direct (%.1f s)", t1["elapsed"]), + subtitle = "N=256 | sparse LU", + pt_size = 0.15, pt_alpha = 0.2) + +plot_field(r2, sub, + title = sprintf("11b. fd/SOR (%.1f s)", t2["elapsed"]), + subtitle = "N=512 | iterative", + palette = "plasma", pt_size = 0.15, pt_alpha = 0.2) + +plot_field(r3, sub, + title = sprintf("11c. green/FFT (%.1f s)", t3["elapsed"]), + subtitle = "N=512 | FFT", + palette = "viridis", pt_size = 0.15, pt_alpha = 0.2) + +plot_annotation( + title = "Section 11: 100k Transcripts — Solver Comparison", + subtitle = "2,000 of 100,000 points shown", + theme = theme(plot.title = element_text(face = "bold", size = 13), + plot.subtitle = element_text(size = 9, color = "grey40"))) +``` + +### Solver selection guide + +| Situation | Recommended solver | +|---|---| +| N ≤ 400, holey mask, or exact BCs needed | `fd`, `fd_solver = "direct"` | +| N > 400, high-res output needed | `fd`, `fd_solver = "iterative"` (SOR) | +| Fast sweep over L values, no BCs | `"green"` | +| Quick visual check | `"kde"` | + +--- + +# Section 12 — Quantitative field extraction + +Beyond visualisation, the field output can be queried numerically. Here we demonstrate the main extraction operations. + +```{r sec12} +res_q <- res_fd # reuse the Gene A result from Section 2 + +# ---- Bilinear interpolation at arbitrary query points ---- +interpolate_field <- function(result, query_pts) { + xg <- result$x; yg <- result$y; f <- result$field + vapply(seq_len(nrow(query_pts)), function(i) { + qx <- query_pts$x[i]; qy <- query_pts$y[i] + xi <- findInterval(qx, xg); yi <- findInterval(qy, yg) + if (xi < 1 || xi >= length(xg) || yi < 1 || yi >= length(yg)) + return(NA_real_) + wx <- (qx - xg[xi]) / (xg[xi+1] - xg[xi]) + wy <- (qy - yg[yi]) / (yg[yi+1] - yg[yi]) + (1-wx)*(1-wy)*f[xi,yi] + wx*(1-wy)*f[xi+1,yi] + + (1-wx)*wy *f[xi,yi+1] + wx*wy *f[xi+1,yi+1] + }, numeric(1)) +} +``` + +```{r sec12-queries} +queries <- data.frame( + label = c("Cluster core", "Cluster edge", "Tissue centre", + "Far right", "Near notch"), + x = c(-4.5, -3.0, 0.0, 5.0, 3.0), + y = c( 3.8, 2.5, 0.0, 0.0, 0.0)) +queries$concentration <- interpolate_field(res_q, queries) +queries +``` + +```{r sec12-stats} +df_q <- field_to_df(res_q) + +# 12b: Mean concentration by tissue half +cat(sprintf("Mean concentration:\n Left half (x < 0): %.5f\n Right half (x ≥ 0): %.5f\n", + mean(df_q$field[df_q$x < 0 & !is.na(df_q$field)]), + mean(df_q$field[df_q$x >= 0 & !is.na(df_q$field)]))) + +# 12c: Peak location +pk <- which.max(res_q$field) +N <- length(res_q$x) +cat(sprintf("\nPeak: x=%.2f, y=%.2f, value=%.6f\n", + res_q$x[((pk-1L) %% N) + 1L], + res_q$y[((pk-1L) %/% N) + 1L], + res_q$field[pk])) + +# 12d: Field integral +total <- sum(res_q$field[res_q$mask], na.rm = TRUE) * res_q$hx * res_q$hy +theory <- nrow(tc_A) * 1.0 / 0.3 # n * production_rate / lambda +cat(sprintf("\nField integral: %.4f\nTheory (n·r/λ): %.4f\nAgreement: %.1f%%\n", + total, theory, 100 * (1 - abs(total - theory) / theory))) +``` + +```{r sec12-profile, fig.height=3.5} +prof <- df_q[abs(df_q$y) < res_q$hy * 1.5 & !is.na(df_q$field), ] +ggplot(prof[order(prof$x), ], aes(x = x, y = field)) + + geom_line(color = "#e74c3c", linewidth = 0.8) + + geom_area(fill = "#e74c3c", alpha = 0.15) + + geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") + + labs(title = "12e. Concentration profile along y ≈ 0", + subtitle = "Cluster peak at x ≈ −4.5; near-zero at right edge", + x = "X", y = "Concentration") + + theme_demo() +``` + +--- + +# Parameter quick-reference + +``` +estimate_concentration_field() — parameter guide +───────────────────────────────────────────────────────────────────── + +PHYSICS + diffusion_length Primary tuning knob. Set directly: L = 0.5 to 10 + Rule: L ~ 10–20% of inter-cluster distance + D Affects amplitude only (amplitude ∝ 1/D) + lambda Affects amplitude only; lambda = D / L^2 + production_rate Global amplitude scaling; use weight_col for per-cell + +SOLVER + method = "fd" Default. Use whenever you need accurate BCs or holes. + method = "green" Faster for sweeps; ignores domain shape. + method = "kde" Quick baseline; no physics. + + fd_solver = "direct" N ≤ 400 (sparse LU decomposition) + fd_solver = "iterative" N > 400 (red-black SOR) + + boundary_condition = "dirichlet" C=0 at boundary. Default. Tissue sections. + boundary_condition = "neumann" dC/dn=0. Sealed system. + + grid_resolution = 256 Explore. 512 for publication quality. + +WEIGHTS + weight_col = "umi" Use UMI counts as source strength. + Careful: amplifies outliers. + +POST-PROCESSING + log_transform = TRUE Apply log1p() after solving. Useful for wide dynamic range. + normalize = TRUE Rescale to [0,1] for comparison across conditions. + clip_negative = TRUE Remove small numerical negatives. Default on. +───────────────────────────────────────────────────────────────────── +``` + +--- + +# Session information + +```{r session-info} +sessionInfo() +``` diff --git a/spatial-modeling-with-claude/vignette_concentration_field.html b/spatial-modeling-with-claude/vignette_concentration_field.html new file mode 100644 index 0000000..83e4d19 --- /dev/null +++ b/spatial-modeling-with-claude/vignette_concentration_field.html @@ -0,0 +1,2928 @@ + + + + + + + + + + + + + + + +Estimating Steady-State Molecular Concentration Fields + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+
+
+
+ +
+ + + + + + + +
+
+

Overview

+

Spatial transcriptomics data gives us discrete transcript or cell +locations, but many biological processes — ligand gradients, cytokine +fields, metabolite concentrations — are continuous. +estimate_concentration_field() bridges this gap by +computing the steady-state concentration field that a +set of point sources (mRNA transcripts, detected proteins) would produce +under a physically motivated diffusion-clearance model.

+

The result is a dense concentration map over the tissue, interpolated +from point source locations. This can be used to:

+
    +
  • Identify where a signalling molecule is most concentrated
  • +
  • Compute the concentration seen by each cell
  • +
  • Create two-gene ratio maps (spatial domains of ligand/receptor +balance)
  • +
  • Visualise how diffusion length affects signal range
  • +
+
+
+
+

The physical model

+

The function solves the screened Poisson equation +(also called the steady-state diffusion-clearance PDE):

+

\[D \nabla^2 C(\mathbf{x}) - \lambda +C(\mathbf{x}) + s(\mathbf{x}) = 0\]

+

where:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SymbolMeaningParameter
\(C(\mathbf{x})\)Steady-state concentration at position x— (what we solve for)
\(D\)Diffusion coefficient (spatial spread rate)D
\(\lambda\)First-order clearance ratelambda
\(s(\mathbf{x})\)Source density (production rate per unit area)production_rate
+

The key derived quantity is the diffusion +length:

+

\[L = \sqrt{D / \lambda}\]

+

\(L\) sets the characteristic +distance over which concentration decays from a point source. It is the +single most important parameter to tune. The shape of the field depends +only on \(L\); the amplitude scales as +\(1/D\).

+
+

Practical interpretation: \(L\) is roughly the radius of influence of a +single transcript. At distance \(r \gg +L\) from a source, the concentration is negligible. At \(r \ll L\), concentration is near its +maximum.

+
+
+
+
+

Setup

+
library(sf)
+library(ggplot2)
+library(patchwork)
+source("fit_spatial_mask.R")
+source("estimate_concentration_field.R")
+
+set.seed(2024)
+
+
+
+

Shared helpers

+
# ---- Plot theme ----
+theme_demo <- function(bs = 9.5)
+  theme_minimal(base_size = bs) +
+  theme(plot.title    = element_text(face = "bold", size = bs + 0.5),
+        plot.subtitle = element_text(size = bs - 1, color = "grey40",
+                                     margin = margin(b = 4)),
+        panel.grid    = element_line(color = "grey94"),
+        legend.key.width  = unit(0.35, "cm"),
+        legend.title      = element_text(size = bs - 1),
+        legend.text       = element_text(size = bs - 1.5),
+        axis.text         = element_text(size = bs - 2))
+
+# ---- Field visualiser ----
+# Uses geom_raster (correct for continuous fields on regular grids).
+# show_contours overlays iso-concentration lines.
+plot_field <- function(result, transcripts = NULL,
+                       title = "", subtitle = "",
+                       palette = "magma", log_scale = FALSE,
+                       show_pts = TRUE, show_contours = TRUE, n_contours = 6L,
+                       pt_size = 0.35, pt_alpha = 0.45, pt_color = "#00e5ff",
+                       fill_label = "Conc.", symmetric = FALSE) {
+  df       <- field_to_df(result, inside_only = TRUE)
+  fill_col <- if (log_scale) { df$fv <- log1p(df$field); "fv" } else "field"
+  fill_lbl <- if (log_scale) paste0("log\u2081\u208a(", fill_label, ")") else fill_label
+  p <- ggplot(df, aes(x = x, y = y, fill = .data[[fill_col]])) +
+    geom_raster(interpolate = TRUE) + coord_equal() +
+    labs(title = title, subtitle = subtitle, x = "X", y = "Y") +
+    theme_demo()
+  if (symmetric) {
+    lim <- max(abs(df[[fill_col]]), na.rm = TRUE)
+    p   <- p + scale_fill_distiller(palette = "RdBu", limits = c(-lim, lim),
+                                     name = fill_lbl, na.value = "transparent")
+  } else {
+    p <- p + scale_fill_viridis_c(option = palette, name = fill_lbl,
+                                   na.value = "transparent")
+  }
+  if (show_contours && any(!is.na(df$field)))
+    p <- p + geom_contour(aes(z = field), color = "white",
+                          alpha = 0.35, linewidth = 0.25, bins = n_contours)
+  if (show_pts && !is.null(transcripts))
+    p <- p + geom_point(data = transcripts, aes(x = x, y = y),
+                        inherit.aes = FALSE, color = pt_color,
+                        size = pt_size, alpha = pt_alpha)
+  p
+}
+
+# ---- Sample points uniformly from inside a mask ----
+sample_in_mask <- function(mask_poly, n, max_tries = 20) {
+  bb  <- sf::st_bbox(mask_poly); mu <- sf::st_union(mask_poly)
+  pts <- data.frame(x = numeric(0), y = numeric(0))
+  for (i in seq_len(max_tries)) {
+    if (nrow(pts) >= n) break
+    cnd <- data.frame(x = runif(n * 8, bb["xmin"], bb["xmax"]),
+                      y = runif(n * 8, bb["ymin"], bb["ymax"]))
+    ok  <- as.logical(sf::st_within(
+      sf::st_as_sf(cnd, coords = c("x","y")), mu, sparse = FALSE)[, 1L])
+    pts <- rbind(pts, cnd[ok, ])
+  }
+  pts[seq_len(min(n, nrow(pts))), ]
+}
+
+
+
+

Function reference: estimate_concentration_field()

+
estimate_concentration_field(
+  mask,               # sfc polygon — output of fit_spatial_mask()
+  transcript_coords,  # data.frame with columns x and y (plus optional weight_col)
+
+  # Physics
+  D                  = 1.0,      # diffusion coefficient (arbitrary units)
+  lambda             = 0.1,      # first-order clearance rate
+  diffusion_length   = NULL,     # override: set L directly; NULL uses sqrt(D/lambda)
+  production_rate    = 1.0,      # multiplied by weights if weight_col is set
+
+  # Solver
+  method             = "fd",     # "fd" | "green" | "kde"
+  grid_resolution    = 256L,     # grid cells per axis (N × N)
+  boundary_condition = "dirichlet",  # "dirichlet" | "neumann"  (fd only)
+  fd_solver          = "direct", # "direct" | "iterative"  (fd only)
+
+  # Weighting
+  weight_col         = NULL,     # column name for per-transcript weights (e.g. "umi")
+
+  # Optional
+  include_external   = FALSE,    # include transcripts outside mask?
+  normalize          = FALSE,    # rescale field to [0, 1]?
+  log_transform      = FALSE,    # apply log1p() after solving?
+  clip_negative      = TRUE,     # set small negatives to 0?
+  n_cores            = 1L,
+  plot               = FALSE,
+  verbose            = TRUE
+)
+
+

Return value

+

A named list containing:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ElementContents
fieldN×N matrix; NA outside the mask
x, yGrid centre coordinates (length N each)
hx, hyGrid cell width and height
maskN×N logical matrix (TRUE = inside mask)
sourcesN×N matrix of binned source density
paramsList of all input parameters
diagnosticsTiming, counts, etc.
+

Use field_to_df(result) to convert to a tidy data frame +for ggplot.

+
+
+
+
+

Section 1 — Synthetic tissue and transcript layout

+

We build a kidney-bean-shaped tissue with two synthetic genes:

+
    +
  • Gene A: focal cluster at upper-left, plus sparse +background. High UMI count (focal source).
  • +
  • Gene B: 500 transcripts with a right-biased spatial +distribution. Low UMI count (diffuse source).
  • +
+
in_kidney <- function(x, y, oa = 9, ob = 7, ncx = 5, nr = 3.8)
+  (x/oa)^2 + (y/ob)^2 < 1 & sqrt((x - ncx)^2 + y^2) >= nr
+
+cands      <- data.frame(x = runif(18000, -11, 11), y = runif(18000, -9, 9))
+tissue_pts <- cands[in_kidney(cands$x, cands$y), ][1:3000, ]
+
+tissue_mask <- fit_spatial_mask(tissue_pts, method = "raster",
+  raster_resolution = 256L, raster_sigma = 0.7, raster_threshold = 0.15,
+  verbose = FALSE)
+
+mu_t <- sf::st_union(tissue_mask)
+
# Gene A: focal cluster at (-4.5, 3.8) + sparse background
+tc_A_cl <- data.frame(x = rnorm(350, -4.5, 1.2), y = rnorm(350, 3.8, 0.9))
+A_in    <- as.logical(sf::st_within(
+  sf::st_as_sf(tc_A_cl, coords = c("x","y")), mu_t, sparse = FALSE)[, 1L])
+tc_A_cl <- tc_A_cl[A_in, ]
+tc_A_sc <- sample_in_mask(tissue_mask, 50)
+tc_A    <- rbind(tc_A_cl, tc_A_sc)
+tc_A$umi <- c(rpois(nrow(tc_A_cl), 15), rpois(nrow(tc_A_sc), 2))
+
+# Gene B: right-biased random distribution
+tc_B_all <- sample_in_mask(tissue_mask, 2000)
+prob_B   <- plogis(tc_B_all$x / 3)
+tc_B     <- tc_B_all[runif(nrow(tc_B_all)) < prob_B, ][1:500, ]
+tc_B$umi <- rpois(nrow(tc_B), 5)
+
tissue_sf <- sf::st_sf(geometry = tissue_mask)
+
+ggplot() +
+  geom_sf(data = tissue_sf, fill = "grey92", color = "grey60", linewidth = 0.4) +
+  geom_point(data = tc_A, aes(x = x, y = y, color = "Gene A", size = log1p(umi)),
+             alpha = 0.6) +
+  geom_point(data = tc_B, aes(x = x, y = y, color = "Gene B", size = log1p(umi)),
+             alpha = 0.5) +
+  scale_color_manual(values = c("Gene A" = "#e74c3c", "Gene B" = "#2980b9"),
+                     name = NULL) +
+  scale_size_continuous(range = c(0.3, 2.5), name = "log(UMI+1)") +
+  coord_sf(datum = NA) + theme_demo() +
+  labs(title    = "Section 1: Synthetic kidney-bean tissue",
+       subtitle = "Gene A: focal upper-left | Gene B: broad right-biased",
+       x = "X", y = "Y")
+

+
+
+
+

Section 2 — Method comparison: fd, green, kde

+

Three solvers are available. Each makes different assumptions about +the domain:

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodHow it worksBoundary handlingBest for
"fd"Sparse finite-difference linear systemExact Dirichlet or Neumann BCsDefault; recommended
"green"FFT convolution with analytic Green’s function \(K_0(\kappa r)\)None — infinite domainFast interior estimates; quick sweeps
"kde"Gaussian kernel smoothingNonePhenomenological baseline
+
pb <- list(D = 1, lambda = 0.3, production_rate = 1,
+           grid_resolution = 256L, verbose = FALSE)
+
+res_fd    <- do.call(estimate_concentration_field,
+  c(list(mask = tissue_mask, transcript_coords = tc_A,
+         method = "fd", fd_solver = "direct",
+         boundary_condition = "dirichlet"), pb))
+
+res_green <- do.call(estimate_concentration_field,
+  c(list(mask = tissue_mask, transcript_coords = tc_A,
+         method = "green"), pb))
+
+res_kde   <- do.call(estimate_concentration_field,
+  c(list(mask = tissue_mask, transcript_coords = tc_A,
+         method = "kde", kde_bandwidth = sqrt(1/0.3)), pb))
+
+res_diff  <- res_fd
+res_diff$field <- res_fd$field - res_green$field
+
(plot_field(res_fd,    tc_A, title = "2a. fd (Dirichlet)",
+            subtitle = "Exact BCs; recommended") +
+ plot_field(res_green, tc_A, title = "2b. green (FFT)",
+            subtitle = "Infinite domain; no boundary effects")) /
+(plot_field(res_kde,   tc_A, title = "2c. kde",
+            subtitle = "Bandwidth = L; phenomenological") +
+ plot_field(res_diff, title = "2d. fd − green",
+            subtitle = "fd correctly suppresses near-edge concentration",
+            symmetric = TRUE, show_contours = FALSE, show_pts = FALSE)) +
+plot_annotation(
+  title = "Section 2: Solver Comparison (Gene A, D=1, λ=0.3, L≈1.83)",
+  theme = theme(plot.title = element_text(face = "bold", size = 13)))
+

+

Key observations:

+
    +
  • The fd and green maps look very similar in +the interior (panel 2d shows only small differences away from the +edges). This confirms that the physics is consistent between +methods.
  • +
  • The difference map (2d) shows that fd +correctly drives concentration to zero at the tissue boundary (Dirichlet +BC), while green ignores the boundary entirely. This +matters most when sources are near the edge.
  • +
  • kde produces a visually similar pattern to +fd because the bandwidth was set equal to \(L\), but it has no physical interpretation +and no BCs.
  • +
+

When to use method = "green": When you +are doing a rapid parameter sweep over many \(L\) values and don’t need exact boundary +conditions. It runs faster than fd at high resolution.

+
+
+
+

Section 3 — Diffusion length sweep: the most important +parameter

+

The diffusion length \(L = +\sqrt{D/\lambda}\) is the primary parameter to tune. It +determines how far each transcript’s signal spreads. Small \(L\): fields are tight and peaked around +clusters. Large \(L\): fields fill the +entire tissue.

+
L_vals <- c(0.5, 1.5, 4.0, 10.0)
+
+sw3 <- lapply(L_vals, function(Lv) {
+  res <- estimate_concentration_field(
+    mask = tissue_mask, transcript_coords = tc_A,
+    D = 1, diffusion_length = Lv, production_rate = 1,
+    method = "fd", fd_solver = "direct", grid_resolution = 256L,
+    verbose = FALSE)
+  # Normalise each panel independently for visual comparison
+  f <- res$field
+  res$field <- (f - min(f, na.rm = TRUE)) / diff(range(f, na.rm = TRUE))
+  plot_field(res, tc_A,
+             title      = sprintf("L = %.1f", Lv),
+             subtitle   = sprintf("λ ≈ %.3f", 1/Lv^2),
+             fill_label = "Norm. Conc.", pt_size = 0.4) +
+  scale_fill_viridis_c(option = "magma", name = "Norm.\nConc.",
+                        limits = c(0, 1), na.value = "transparent")
+})
+
wrap_plots(sw3, nrow = 1) +
+  plot_annotation(
+    title    = "Section 3: Diffusion Length Sweep (each panel normalised independently)",
+    subtitle = "Small L → tight peaks | Large L → tissue-wide field",
+    theme    = theme(plot.title    = element_text(face = "bold", size = 13),
+                     plot.subtitle = element_text(size = 9, color = "grey40")))
+

+
peaks <- sapply(L_vals, function(Lv) {
+  res <- estimate_concentration_field(
+    mask = tissue_mask, transcript_coords = tc_A,
+    D = 1, diffusion_length = Lv,
+    method = "fd", fd_solver = "direct", grid_resolution = 256L,
+    verbose = FALSE)
+  max(res$field, na.rm = TRUE)
+})
+data.frame(L = L_vals, peak_concentration = round(peaks, 5))
+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
Lpeak_concentration
0.510.96477
1.539.55711
4.078.01534
10.098.86167
+
+
+

How to choose L in practice

+

There is no single correct answer — the right \(L\) is biologically motivated. Ask:

+
    +
  1. What is the typical inter-cluster distance? \(L\) should be 10–20% of this distance if +you want fields that peak at sources and drop between them. Larger +values produce smoother, more “territorial” fields.
  2. +
  3. Are you modelling a small diffusing molecule or a large +one? Small molecules (e.g. metabolites, small cytokines) have +higher \(D\) and thus larger \(L\). Large molecules (e.g. ECM-bound +ligands) have small \(D\).
  4. +
  5. Does your field look biologically reasonable? +Inspect the output and ask whether the gradients match known +biology.
  6. +
+
+

Shortcut: set diffusion_length = L +directly to bypass the D/lambda decomposition. +The shape depends only on \(L\); \(D\) only affects amplitude.

+
+
+
+
+
+

Section 4 — Fixed L, varying D and lambda

+

This section makes the key mathematical point: field shape is +determined entirely by \(L\). +Changing \(D\) and \(\lambda\) while keeping \(L = \sqrt{D/\lambda}\) constant produces +identical spatial patterns, only scaling the amplitude by \(1/D\).

+
DL_cases <- list(
+  list(D = 0.1, lam = 0.025, label = "D=0.1, λ=0.025"),
+  list(D = 1,   lam = 0.25,  label = "D=1,   λ=0.25"),
+  list(D = 5,   lam = 1.25,  label = "D=5,   λ=1.25"),
+  list(D = 20,  lam = 5,     label = "D=20,  λ=5")
+)
+
+dl_pl <- lapply(DL_cases, function(co) {
+  res <- estimate_concentration_field(
+    mask = tissue_mask, transcript_coords = tc_A,
+    D = co$D, lambda = co$lam, production_rate = 1,
+    method = "fd", fd_solver = "direct", grid_resolution = 256L,
+    verbose = FALSE)
+  pk <- round(max(res$field, na.rm = TRUE), 5)
+  plot_field(res, tc_A,
+             title    = co$label,
+             subtitle = sprintf("L=2 fixed | peak=%.5f", pk),
+             pt_size  = 0.3)
+})
+
wrap_plots(dl_pl, nrow = 1) +
+  plot_annotation(
+    title    = "Section 4: Fixed L=2, Varying D and λ",
+    subtitle = "Shape IDENTICAL (same L). Amplitude ∝ 1/D.",
+    theme    = theme(plot.title    = element_text(face = "bold", size = 13),
+                     plot.subtitle = element_text(size = 9, color = "grey40")))
+

+

Implication for parameter setting: When fitting to +data, first determine the spatial extent of influence (\(L\)), then fix either \(D\) or \(\lambda\) based on biophysical priors. The +third parameter is then determined.

+
+
+
+

Section 5 — Boundary conditions

+

The fd method supports two boundary conditions:

+
    +
  • Dirichlet +(boundary_condition = "dirichlet"): \(C = 0\) at the tissue boundary. Physically: +molecules are absorbed or degrade instantly at the boundary. This is +appropriate for most spatial transcriptomics sections where the tissue +edge is a hard barrier.
  • +
  • Neumann +(boundary_condition = "neumann"): \(\partial C / \partial n = 0\) at the +boundary (zero normal flux). Physically: no molecules cross the boundary +— they are reflected back in. This is appropriate when modelling a +sealed container.
  • +
+
res_dir <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_A,
+  D = 1, lambda = 0.2, method = "fd", fd_solver = "direct",
+  boundary_condition = "dirichlet", grid_resolution = 256L, verbose = FALSE)
+
+res_neu <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_A,
+  D = 1, lambda = 0.2, method = "fd", fd_solver = "direct",
+  boundary_condition = "neumann", grid_resolution = 256L, verbose = FALSE)
+
+res_bcd <- res_dir
+res_bcd$field <- res_neu$field - res_dir$field
+
sl <- function(res, lbl) {
+  df <- field_to_df(res)
+  s  <- df[abs(df$y) < res$hy * 1.5 & !is.na(df$field), ]
+  s  <- s[order(s$x), ]; s$BC <- lbl; s
+}
+sl_all <- rbind(sl(res_dir, "Dirichlet"), sl(res_neu, "Neumann"))
+
+p5d <- ggplot(sl_all, aes(x = x, y = field, color = BC)) +
+  geom_line(linewidth = 0.8) +
+  scale_color_manual(values = c("Dirichlet" = "#e74c3c",
+                                 "Neumann"   = "#2980b9")) +
+  labs(title    = "5d. Cross-section at y ≈ 0",
+       subtitle = "Dirichlet drops to 0; Neumann stays elevated",
+       x = "X", y = "Conc.", color = "BC") +
+  theme_demo()
+
(plot_field(res_dir, tc_A,
+            title    = "5a. Dirichlet (C=0 at boundary)",
+            subtitle = "D=1, λ=0.2, L≈2.24") +
+ plot_field(res_neu, tc_A,
+            title    = "5b. Neumann (∂C/∂n=0)",
+            subtitle = "Molecules reflected; higher near edges",
+            palette  = "plasma")) /
+(plot_field(res_bcd,
+            title    = "5c. Neumann − Dirichlet",
+            subtitle = "Neumann retains more concentration near boundary",
+            symmetric = TRUE, show_contours = FALSE, show_pts = FALSE) +
+ p5d) +
+plot_annotation(
+  title = "Section 5: Boundary Condition Comparison",
+  theme = theme(plot.title = element_text(face = "bold", size = 13)))
+

+

Recommendation: Use "dirichlet" for +tissue sections. Neumann BCs are mainly useful when comparing to +analytic solutions or modelling sealed compartments.

+
+
+
+

Section 6 — UMI weighting vs. equal weighting

+

Each transcript can carry a weight (e.g. its UMI count) via the +weight_col argument. This means a cell with 50 detected +transcripts of a gene contributes 50× more source density than a cell +with 1.

+
res_uw <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_A,
+  D = 1, lambda = 0.3, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, weight_col = NULL, verbose = FALSE)
+
+res_wt <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_A,
+  D = 1, lambda = 0.3, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, weight_col = "umi", verbose = FALSE)
+
+# Normalise both for visual comparison
+nrm <- function(r) {
+  f <- r$field
+  r$field <- (f - min(f, na.rm=TRUE)) / diff(range(f, na.rm=TRUE))
+  r
+}
+
plot_field(nrm(res_uw), tc_A,
+           title      = "6a. Unweighted",
+           subtitle   = "All transcripts equal weight",
+           fill_label = "Norm. Conc.") +
+plot_field(nrm(res_wt), tc_A,
+           title      = "6b. UMI-weighted",
+           subtitle   = "High-UMI cluster source amplified",
+           palette    = "plasma",
+           fill_label = "Norm. Conc.") +
+plot_annotation(
+  title = "Section 6: UMI Weighting",
+  theme = theme(plot.title = element_text(face = "bold", size = 13)))
+

+

When to use UMI weighting: When transcript counts +meaningfully reflect expression level differences. Note that this +amplifies both true signal and noise — noisy high-UMI outliers can +dominate the field. Inspect your UMI distribution before applying +weights.

+
+
+
+

Section 7 — External transcripts

+

include_external = TRUE bins transcripts that fall +outside the mask into the nearest inside-edge grid cell, allowing them +to contribute to the field. This is useful when you have transcripts +just outside the mask boundary (e.g. due to mask fitting inaccuracy) +that you still want to include.

+
tc_ext <- data.frame(x = rnorm(80, 6.5, 0.4),
+                      y = rnorm(80, 0, 0.5),
+                      umi = rpois(80, 10))
+ext_in <- as.logical(sf::st_within(
+  sf::st_as_sf(tc_ext, coords = c("x","y")), mu_t, sparse = FALSE)[, 1L])
+tc_ext  <- tc_ext[!ext_in, ]   # keep only those outside the mask
+tc_Ax   <- rbind(tc_A, tc_ext)
+
+res_ne <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_Ax,
+  D = 1, lambda = 0.15, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, include_external = FALSE, verbose = FALSE)
+
+res_we <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_Ax,
+  D = 1, lambda = 0.15, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, include_external = TRUE, verbose = FALSE)
+
+ext_pt_layer <- geom_point(data = tc_ext, aes(x = x, y = y),
+                             color = "orange", size = 0.9, alpha = 0.8,
+                             inherit.aes = FALSE)
+
(plot_field(res_ne, tc_A, title = "7a. include_external = FALSE") +
+ ext_pt_layer) +
+(plot_field(res_we, tc_A, title = "7b. include_external = TRUE",
+            palette = "plasma") +
+ ext_pt_layer) +
+plot_annotation(
+  title = "Section 7: External Transcripts (orange = outside mask)",
+  theme = theme(plot.title = element_text(face = "bold", size = 13)))
+

+
+
+
+

Section 8 — Holey mask: vascular clearance

+

This section demonstrates a key strength of +method = "fd": it correctly handles holes (e.g. vessel +lumens) within the tissue mask. The fd solver zeroes the +concentration inside each hole, modelling vessels as clearance sinks. +The green FFT method ignores topology entirely — it treats +the holes as normal tissue.

+
vd    <- list(list(cx = -2,  cy =  1.5, r = 1.2),
+               list(cx =  2.5, cy = -2,   r = 1.0),
+               list(cx = -4.5, cy = -2.5, r = 0.8))
+in_v  <- function(x, y) Reduce(`|`, lapply(vd, function(v)
+  sqrt((x - v$cx)^2 + (y - v$cy)^2) < v$r))
+
+n_h   <- 3000
+cnd_h <- data.frame(x = runif(n_h * 8, -11, 11),
+                     y = runif(n_h * 8, -9,   9))
+kp_h  <- in_kidney(cnd_h$x, cnd_h$y) & !in_v(cnd_h$x, cnd_h$y)
+pts_h <- cnd_h[kp_h, ][1:n_h, ]
+
+holey_mask <- fit_spatial_mask(pts_h, method = "raster",
+  raster_resolution = 256L, raster_sigma = 0.4,
+  raster_threshold  = 0.2,  verbose = FALSE)
+
+tc_h <- sample_in_mask(holey_mask, 400)
+
res_hfd    <- estimate_concentration_field(
+  mask = holey_mask, transcript_coords = tc_h,
+  D = 1, lambda = 0.15, method = "fd", fd_solver = "direct",
+  grid_resolution = 300L, verbose = FALSE)
+
+res_htight <- estimate_concentration_field(
+  mask = holey_mask, transcript_coords = tc_h,
+  D = 1, lambda = 1.0, method = "fd", fd_solver = "direct",
+  grid_resolution = 300L, verbose = FALSE)
+
+res_hgrn   <- estimate_concentration_field(
+  mask = holey_mask, transcript_coords = tc_h,
+  D = 1, lambda = 0.15, method = "green",
+  grid_resolution = 300L, verbose = FALSE)
+
+res_hdiff  <- res_hfd
+res_hdiff$field <- res_hfd$field - res_hgrn$field
+
(plot_field(res_hfd,    tc_h,
+            title    = "8a. fd | L ≈ 2.58",
+            subtitle = "Vessel lumens correctly zeroed",
+            pt_size  = 0.3) +
+ plot_field(res_htight, tc_h,
+            title    = "8b. fd | L = 1 (tight)",
+            subtitle = "Steeper depletion near vessels",
+            palette  = "inferno", pt_size = 0.3)) /
+(plot_field(res_hgrn,  tc_h,
+            title    = "8c. green — topology blind",
+            subtitle = "FFT ignores holes entirely",
+            palette  = "plasma", pt_size = 0.3) +
+ plot_field(res_hdiff,
+            title    = "8d. fd − green",
+            subtitle = "fd zeros lumens; green does not",
+            symmetric = TRUE, show_contours = FALSE,
+            show_pts  = FALSE)) +
+plot_annotation(
+  title    = "Section 8: Holey Mask — Vascular Clearance",
+  subtitle = "Use method='fd' whenever the mask has holes",
+  theme    = theme(plot.title    = element_text(face = "bold", size = 13),
+                   plot.subtitle = element_text(size = 9, color = "grey40")))
+

+
+

Important: If your tissue mask has holes (vessel +lumens, necrotic cores), always use method = "fd". The +green and kde methods are unaware of domain +topology.

+
+
+
+
+

Section 9 — Wide dynamic range and log-scale visualisation

+

When transcript counts span several orders of magnitude (e.g. a dense +focal cluster alongside sparse background), the linear concentration +scale will be dominated by the peak and the background will appear flat. +The log1p transform reveals structure at both ends of the +dynamic range.

+
tc_w <- rbind(
+  data.frame(x = rnorm(20, -5, 0.3),  y = rnorm(20, 4, 0.3),  umi = rpois(20, 200)),
+  data.frame(x = rnorm(300,  0, 4),   y = rnorm(300, 0, 3),   umi = rpois(300, 1)))
+tc_w_in <- as.logical(sf::st_within(
+  sf::st_as_sf(tc_w[, c("x","y")], coords = c("x","y")),
+  mu_t, sparse = FALSE)[, 1L])
+tc_w <- tc_w[tc_w_in, ]
+
+res_w <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_w,
+  D = 1, lambda = 0.5, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, weight_col = "umi", verbose = FALSE)
+
plot_field(res_w, tc_w,
+           title    = "9a. Linear scale",
+           subtitle = "Background invisible; cluster dominates",
+           pt_color = "white") +
+plot_field(res_w, tc_w,
+           log_scale = TRUE,
+           title    = "9b. log₁₊ scale",
+           subtitle = "Background and cluster both visible",
+           palette  = "viridis", pt_color = "white") +
+plot_annotation(
+  title    = "Section 9: Wide Dynamic Range",
+  subtitle = "Use log_scale=TRUE when max/min UMI ratio > ~20",
+  theme    = theme(plot.title    = element_text(face = "bold", size = 13),
+                   plot.subtitle = element_text(size = 9, color = "grey40")))
+

+

You can also apply the log transform inside the solver with +log_transform = TRUE, which is equivalent but applies the +transform before returning the field matrix.

+
+
+
+

Section 10 — Two-gene ratio map

+

A log₂(A/B) ratio map shows where Gene A has higher +concentration relative to Gene B, and vice versa. This is analogous to a +log-fold-change map, but in space rather than between conditions. Blue +regions favour B; yellow/red regions favour A; white is balanced.

+
ph <- list(D = 1, lambda = 0.25, production_rate = 1,
+           method = "fd", fd_solver = "direct",
+           grid_resolution = 256L, verbose = FALSE)
+
+res_gA <- do.call(estimate_concentration_field,
+  c(list(mask = tissue_mask, transcript_coords = tc_A), ph))
+res_gB <- do.call(estimate_concentration_field,
+  c(list(mask = tissue_mask, transcript_coords = tc_B), ph))
+
+res_rt        <- res_gA
+res_rt$field  <- log2((res_gA$field + 1e-6) / (res_gB$field + 1e-6))
+
p_ratio <- plot_field(res_rt,
+  title    = "10c. log₂(A/B) ratio",
+  subtitle = "Red=A territory | Blue=B territory | White=balanced",
+  symmetric = TRUE, show_contours = TRUE, n_contours = 5,
+  show_pts = FALSE, fill_label = "log₂(A/B)") +
+  geom_point(data = tc_A, aes(x = x, y = y),
+             color = "#e74c3c", size = 0.3, alpha = 0.4,
+             inherit.aes = FALSE) +
+  geom_point(data = tc_B, aes(x = x, y = y),
+             color = "#27ae60", size = 0.3, alpha = 0.4,
+             inherit.aes = FALSE)
+
+p_dens <- ggplot(field_to_df(res_rt), aes(x = field,
+    fill = ifelse(field < 0, "B dominant", "A dominant"))) +
+  geom_density(alpha = 0.55) +
+  geom_vline(xintercept = 0, linetype = "dashed", color = "grey40") +
+  scale_fill_manual(values = c("B dominant" = "#2980b9",
+                                "A dominant" = "#c0392b"),
+                    name = NULL) +
+  labs(title    = "10d. Distribution of log₂(A/B)",
+       subtitle = "Fraction of tissue in each territory",
+       x = "log₂(A/B)", y = "Density") +
+  theme_demo()
+
+(plot_field(res_gA, tc_A, title = "10a. Gene A field") +
+ plot_field(res_gB, tc_B, title = "10b. Gene B field", palette = "viridis")) /
+(p_ratio + p_dens) +
+plot_annotation(
+  title = "Section 10: Two-Gene Ratio Map",
+  theme = theme(plot.title = element_text(face = "bold", size = 13)))
+

+

The 1e-6 pseudocount in the ratio calculation prevents +division-by-zero and log(0) in regions where one gene has near-zero +concentration.

+
+
+
+

Section 11 — 100k transcripts and solver timing

+

This section benchmarks the three solvers on 100,000 transcripts at +two grid resolutions.

+
tc_100k <- sample_in_mask(tissue_mask, 100000)
+cat(sprintf("Sampled %d transcripts.\n", nrow(tc_100k)))
+
## Sampled 100000 transcripts.
+
t1 <- system.time(r1 <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_100k,
+  D = 1, lambda = 0.3, method = "fd", fd_solver = "direct",
+  grid_resolution = 256L, verbose = TRUE))
+
## ============================================================
+##   estimate_concentration_field
+## ============================================================
+##   Method: fd | D: 1 | lambda: 0.3 | L: 1.82574 | N: 256 x 256
+##   BC: dirichlet | solver: direct
+## ------------------------------------------------------------
+##   Transcripts: 100000 used (0 external excluded)
+##   Rasterizing 256x256 grid...
+##   Inside-mask cells: 54302 (82.9%)
+##   Sparse system: 54302x54302, 270424 nnz
+##   Solve: 0.70s | Total: 1.32s | Range: [16.38, 986.3]
+## ============================================================
+
t2 <- system.time(r2 <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_100k,
+  D = 1, lambda = 0.3, method = "fd", fd_solver = "iterative",
+  sor_omega = 1.75, grid_resolution = 512L, verbose = TRUE))
+
## ============================================================
+##   estimate_concentration_field
+## ============================================================
+##   Method: fd | D: 1 | lambda: 0.3 | L: 1.82574 | N: 512 x 512
+##   BC: dirichlet | solver: iterative
+## ------------------------------------------------------------
+##   Transcripts: 100000 used (0 external excluded)
+##   Rasterizing 512x512 grid...
+##   Inside-mask cells: 217162 (82.8%)
+
##   Solve: 32.44s | Total: 33.85s | Range: [7.691, 916.7]
+## ============================================================
+
t3 <- system.time(r3 <- estimate_concentration_field(
+  mask = tissue_mask, transcript_coords = tc_100k,
+  D = 1, lambda = 0.3, method = "green",
+  grid_resolution = 512L, verbose = TRUE))
+
## ============================================================
+##   estimate_concentration_field
+## ============================================================
+##   Method: green | D: 1 | lambda: 0.3 | L: 1.82574 | N: 512 x 512
+## ------------------------------------------------------------
+##   Transcripts: 100000 used (0 external excluded)
+##   Rasterizing 512x512 grid...
+##   Inside-mask cells: 217162 (82.8%)
+##   Solve: 0.36s | Total: 1.73s | Range: [399.2, 1016]
+## ============================================================
+
data.frame(
+  Solver          = c("fd/direct (N=256)", "fd/SOR (N=512)", "green/FFT (N=512)"),
+  Time_s          = round(c(t1["elapsed"], t2["elapsed"], t3["elapsed"]), 2),
+  Has_BCs         = c("Yes", "Yes", "No"),
+  Handles_holes   = c("Yes", "Yes", "No"),
+  Notes           = c("Sparse LU; recommended up to N=400",
+                       "Iterative; use for N > 400",
+                       "Fastest; infinite domain only")
+)
+
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SolverTime_sHas_BCsHandles_holesNotes
fd/direct (N=256)1.32YesYesSparse LU; recommended up to N=400
fd/SOR (N=512)33.85YesYesIterative; use for N > 400
green/FFT (N=512)1.73NoNoFastest; infinite domain only
+
+
sub <- tc_100k[sample(nrow(tc_100k), 2000), ]
+plot_field(r1, sub,
+           title    = sprintf("11a. fd/direct (%.1f s)", t1["elapsed"]),
+           subtitle = "N=256 | sparse LU",
+           pt_size = 0.15, pt_alpha = 0.2) +
+plot_field(r2, sub,
+           title    = sprintf("11b. fd/SOR (%.1f s)", t2["elapsed"]),
+           subtitle = "N=512 | iterative",
+           palette  = "plasma", pt_size = 0.15, pt_alpha = 0.2) +
+plot_field(r3, sub,
+           title    = sprintf("11c. green/FFT (%.1f s)", t3["elapsed"]),
+           subtitle = "N=512 | FFT",
+           palette  = "viridis", pt_size = 0.15, pt_alpha = 0.2) +
+plot_annotation(
+  title    = "Section 11: 100k Transcripts — Solver Comparison",
+  subtitle = "2,000 of 100,000 points shown",
+  theme    = theme(plot.title    = element_text(face = "bold", size = 13),
+                   plot.subtitle = element_text(size = 9, color = "grey40")))
+

+
+

Solver selection guide

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + +
SituationRecommended solver
N ≤ 400, holey mask, or exact BCs neededfd, fd_solver = "direct"
N > 400, high-res output neededfd, fd_solver = "iterative" (SOR)
Fast sweep over L values, no BCs"green"
Quick visual check"kde"
+
+
+
+
+

Section 12 — Quantitative field extraction

+

Beyond visualisation, the field output can be queried numerically. +Here we demonstrate the main extraction operations.

+
res_q <- res_fd   # reuse the Gene A result from Section 2
+
+# ---- Bilinear interpolation at arbitrary query points ----
+interpolate_field <- function(result, query_pts) {
+  xg <- result$x; yg <- result$y; f <- result$field
+  vapply(seq_len(nrow(query_pts)), function(i) {
+    qx <- query_pts$x[i]; qy <- query_pts$y[i]
+    xi <- findInterval(qx, xg); yi <- findInterval(qy, yg)
+    if (xi < 1 || xi >= length(xg) || yi < 1 || yi >= length(yg))
+      return(NA_real_)
+    wx <- (qx - xg[xi]) / (xg[xi+1] - xg[xi])
+    wy <- (qy - yg[yi]) / (yg[yi+1] - yg[yi])
+    (1-wx)*(1-wy)*f[xi,yi]   + wx*(1-wy)*f[xi+1,yi] +
+    (1-wx)*wy   *f[xi,yi+1] + wx*wy    *f[xi+1,yi+1]
+  }, numeric(1))
+}
+
queries <- data.frame(
+  label = c("Cluster core", "Cluster edge", "Tissue centre",
+             "Far right", "Near notch"),
+  x = c(-4.5, -3.0, 0.0,  5.0, 3.0),
+  y = c( 3.8,  2.5, 0.0,  0.0, 0.0))
+queries$concentration <- interpolate_field(res_q, queries)
+queries
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
labelxyconcentration
Cluster core-4.53.846.4145453
Cluster edge-3.02.522.7933802
Tissue centre0.00.02.1617289
Far right5.00.0NA
Near notch3.00.00.7897652
+
+
df_q <- field_to_df(res_q)
+
+# 12b: Mean concentration by tissue half
+cat(sprintf("Mean concentration:\n  Left half  (x < 0): %.5f\n  Right half (x ≥ 0): %.5f\n",
+    mean(df_q$field[df_q$x <  0 & !is.na(df_q$field)]),
+    mean(df_q$field[df_q$x >= 0 & !is.na(df_q$field)])))
+
## Mean concentration:
+##   Left half  (x < 0): 6.15321
+##   Right half (x ≥ 0): 0.53608
+
# 12c: Peak location
+pk <- which.max(res_q$field)
+N  <- length(res_q$x)
+cat(sprintf("\nPeak: x=%.2f, y=%.2f, value=%.6f\n",
+    res_q$x[((pk-1L) %% N) + 1L],
+    res_q$y[((pk-1L) %/% N) + 1L],
+    res_q$field[pk]))
+
## 
+## Peak: x=-4.27, y=3.88, value=47.085967
+
# 12d: Field integral
+total  <- sum(res_q$field[res_q$mask], na.rm = TRUE) * res_q$hx * res_q$hy
+theory <- nrow(tc_A) * 1.0 / 0.3   # n * production_rate / lambda
+cat(sprintf("\nField integral: %.4f\nTheory (n·r/λ): %.4f\nAgreement: %.1f%%\n",
+    total, theory, 100 * (1 - abs(total - theory) / theory)))
+
## 
+## Field integral: 1103.1321
+## Theory (n·r/λ): 1333.3333
+## Agreement: 82.7%
+
prof <- df_q[abs(df_q$y) < res_q$hy * 1.5 & !is.na(df_q$field), ]
+ggplot(prof[order(prof$x), ], aes(x = x, y = field)) +
+  geom_line(color = "#e74c3c", linewidth = 0.8) +
+  geom_area(fill = "#e74c3c", alpha = 0.15) +
+  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
+  labs(title    = "12e. Concentration profile along y ≈ 0",
+       subtitle = "Cluster peak at x ≈ −4.5; near-zero at right edge",
+       x = "X", y = "Concentration") +
+  theme_demo()
+

+
+
+
+

Parameter quick-reference

+
estimate_concentration_field() — parameter guide
+─────────────────────────────────────────────────────────────────────
+
+PHYSICS
+  diffusion_length  Primary tuning knob. Set directly: L = 0.5 to 10
+                    Rule: L ~ 10–20% of inter-cluster distance
+  D                 Affects amplitude only (amplitude ∝ 1/D)
+  lambda            Affects amplitude only; lambda = D / L^2
+  production_rate   Global amplitude scaling; use weight_col for per-cell
+
+SOLVER
+  method = "fd"     Default. Use whenever you need accurate BCs or holes.
+  method = "green"  Faster for sweeps; ignores domain shape.
+  method = "kde"    Quick baseline; no physics.
+
+  fd_solver = "direct"     N ≤ 400  (sparse LU decomposition)
+  fd_solver = "iterative"  N > 400  (red-black SOR)
+
+  boundary_condition = "dirichlet"  C=0 at boundary. Default. Tissue sections.
+  boundary_condition = "neumann"    dC/dn=0. Sealed system.
+
+  grid_resolution = 256   Explore. 512 for publication quality.
+
+WEIGHTS
+  weight_col = "umi"   Use UMI counts as source strength.
+                        Careful: amplifies outliers.
+
+POST-PROCESSING
+  log_transform = TRUE    Apply log1p() after solving. Useful for wide dynamic range.
+  normalize = TRUE        Rescale to [0,1] for comparison across conditions.
+  clip_negative = TRUE    Remove small numerical negatives. Default on.
+─────────────────────────────────────────────────────────────────────
+
+
+
+

Session information

+
sessionInfo()
+
## R version 4.5.2 (2025-10-31)
+## Platform: aarch64-apple-darwin20
+## Running under: macOS Sonoma 14.6.1
+## 
+## Matrix products: default
+## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
+## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
+## 
+## locale:
+## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+## 
+## time zone: America/New_York
+## tzcode source: internal
+## 
+## attached base packages:
+## [1] stats     graphics  grDevices utils     datasets  methods   base     
+## 
+## other attached packages:
+## [1] patchwork_1.3.2 ggplot2_4.0.1   sf_1.0-21      
+## 
+## loaded via a namespace (and not attached):
+##  [1] sass_0.4.10        generics_0.1.4     class_7.3-23       KernSmooth_2.23-26
+##  [5] lattice_0.22-7     digest_0.6.39      magrittr_2.0.4     evaluate_1.0.5    
+##  [9] grid_4.5.2         RColorBrewer_1.1-3 fastmap_1.2.0      Matrix_1.7-4      
+## [13] jsonlite_2.0.0     e1071_1.7-16       DBI_1.2.3          viridisLite_0.4.2 
+## [17] scales_1.4.0       codetools_0.2-20   isoband_0.3.0      jquerylib_0.1.4   
+## [21] cli_3.6.5          rlang_1.1.7        units_0.8-7        withr_3.0.2       
+## [25] cachem_1.1.0       yaml_2.3.12        otel_0.2.0         tools_4.5.2       
+## [29] concaveman_1.2.0   dplyr_1.1.4        vctrs_0.7.1        R6_2.6.1          
+## [33] proxy_0.4-27       lifecycle_1.0.5    classInt_0.4-11    MASS_7.3-65       
+## [37] pkgconfig_2.0.3    pillar_1.11.1      bslib_0.10.0       gtable_0.3.6      
+## [41] glue_1.8.0         Rcpp_1.1.1         xfun_0.56          tibble_3.3.1      
+## [45] tidyselect_1.2.1   rstudioapi_0.17.1  knitr_1.51         dichromat_2.0-0.1 
+## [49] farver_2.1.2       htmltools_0.5.9    rmarkdown_2.30     labeling_0.4.3    
+## [53] compiler_4.5.2     S7_0.2.1
+
+ + + +
+
+ +
+ + + + + + + + + + + + + + + + + diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec1-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec1-plot-1.png new file mode 100644 index 0000000..0408397 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec1-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec10-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec10-plot-1.png new file mode 100644 index 0000000..4d75c93 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec10-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec11-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec11-plot-1.png new file mode 100644 index 0000000..0f0e21e Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec11-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec12-profile-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec12-profile-1.png new file mode 100644 index 0000000..b52cce3 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec12-profile-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec2-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec2-plot-1.png new file mode 100644 index 0000000..d7799ec Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec2-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec3-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec3-plot-1.png new file mode 100644 index 0000000..15407c8 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec3-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec4-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec4-plot-1.png new file mode 100644 index 0000000..0e4fcfe Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec4-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec5-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec5-plot-1.png new file mode 100644 index 0000000..f9ddb61 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec5-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec6-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec6-plot-1.png new file mode 100644 index 0000000..f71ca08 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec6-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec7-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec7-plot-1.png new file mode 100644 index 0000000..065b7af Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec7-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec8-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec8-plot-1.png new file mode 100644 index 0000000..9428ce0 Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec8-plot-1.png differ diff --git a/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec9-plot-1.png b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec9-plot-1.png new file mode 100644 index 0000000..9c1083a Binary files /dev/null and b/spatial-modeling-with-claude/vignette_concentration_field_files/figure-html/sec9-plot-1.png differ