diff --git a/DESCRIPTION b/DESCRIPTION index 5b05a50..e3ddc0e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: fresh Title: Freshwater Referenced Spatial Hydrology -Version: 0.21.0 +Version: 0.22.0 Authors@R: c( person("Allan", "Irvine", , "al@newgraphenvironment.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-3495-2128")), diff --git a/NEWS.md b/NEWS.md index e63b8db..0a86f3f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,16 @@ +# fresh 0.22.0 + +`frs_habitat_overlay()` simplified — drop `format` and `long_value_col` parameters; accept only the canonical source-table shape ([#177](https://github.com/NewGraphEnvironment/fresh/issues/177)). + +- **Canonical shape**: one row per (segment × species), with join keys in `by`, the species code in `species_col` (default `"species_code"`), and one indicator column per habitat type. Indicator coercion accepts integer 1, text `'true'`/`'t'`/`'1'` (case + whitespace insensitive), boolean. +- **Dropped paths** (breaking, pre-1.0): `format = "wide"` per-species-suffix layout (`spawning_sk`, `rearing_sk`) and `format = "long"` (`habitat_type` rows + `habitat_ind` indicator). Neither had current production consumers — the wide-suffix layout was scoped for direct reads of `bcfishpass.streams_habitat_known` (never integrated); the long format was link's read of bcfishpass's pre-2026-04-26 CSV (bcfishpass moved to a different shape on 2026-04-26). +- **New parameter**: `species_col` (default `"species_code"`). Was added in PR #176's first attempt as an additive bolt-on; this release lands it as the only path. +- **Non-canonical sources**: transform first via a SQL view, R pivot, or upstream adapter (e.g., link's forthcoming `lnk_ingest_bcfishpass()`), then call overlay against the canonical-shape view. Shape-translation lives with the consumer; fresh stays a thin SQL adapter. +- **Bridge mode** (`bridge = NULL`) unchanged — orthogonal to source shape. +- Tests: dropped wide-suffix and long-format paths; canonical-shape integration tests exercise integer + text + boolean indicators, additive guard, custom `species_col`, custom `by`, and bridge mode. + +Coordinated link release (0.12.0) updates the call site in `lnk_pipeline_classify`. + # fresh 0.21.0 `frs_habitat_overlay()` rename + 3-way bridge join. Pre-1.0 cleanup driven by review of v0.20.0; no deprecation alias. diff --git a/R/frs_habitat_overlay.R b/R/frs_habitat_overlay.R index 9a545d4..7badbdd 100644 --- a/R/frs_habitat_overlay.R +++ b/R/frs_habitat_overlay.R @@ -8,26 +8,35 @@ #' the mechanism is generic: any boolean-flagged source over any #' boolean-flagged target. #' -#' Two source-table shapes (`format`): +#' ## Source-table shape #' -#' - **`"wide"`** — one row per segment, columns named -#' `{habitat_type}_{species_lower}` (e.g. `spawning_sk`). Boolean. -#' Matches the bcfishpass `streams_habitat_known` convention. -#' - **`"long"`** — one row per (segment × species × habitat_type), -#' with `species_code`, `habitat_type`, and an indicator column -#' (`long_value_col`, default `habitat_ind`). Indicator can be -#' boolean or text (`'TRUE'`/`'true'`/`'t'` case + whitespace -#' insensitive). Matches link's `user_habitat_classification` -#' table. +#' One row per (segment × species). Each row has: #' -#' Two join modes (`bridge`): +#' - the join keys named in `by` (default `c("blue_line_key", +#' "downstream_route_measure")`) +#' - a column carrying the species code (named in `species_col`, +#' default `"species_code"`) +#' - one column per habitat type (named in `habitat_types`, default +#' `c("spawning", "rearing", "lake_rearing", "wetland_rearing")`) +#' +#' Indicator columns can be integer (`1` truthy, `0`/`NULL` falsy), +#' text (`'true'`/`'t'`/`'1'` truthy, anything else falsy, case + +#' whitespace insensitive), or boolean. +#' +#' Sources in other shapes — bcfishpass's pre-2026-04-26 long format +#' (`habitat_type` rows + `habitat_ind` indicator), or the +#' per-species-suffixed wide layout (`spawning_sk`, `rearing_sk`) — +#' transform first via a SQL view or `data-raw/` script, then call +#' overlay. Shape-translation lives with the consumer. +#' +#' ## Two join modes (`bridge`) #' #' - **Direct (`bridge = NULL`)** — the `to` table has the join keys #' directly. SQL does `to. = from.` (point match). #' - **Bridged (`bridge = ""`)** — the `to` table is #' keyed by `id_segment` (e.g. `fresh.streams_habitat`) and lacks #' the geographic keys in `by`. The bridge table provides the -#' link, with id_segment + range columns. SQL does a 3-way join: +#' link, with `id_segment` + range columns. SQL does a 3-way join: #' #' ``` #' to.id_segment = bridge.id_segment @@ -47,7 +56,8 @@ #' #' @param conn A [DBI::DBIConnection-class] object. #' @param from Character. Schema-qualified source table providing -#' the flags to overlay. Wide- or long-format per `format`. +#' the flags to overlay. Must follow the canonical shape — see +#' "Source-table shape" above. #' @param to Character. Schema-qualified destination table to UPDATE #' in place. Must have boolean columns named in `habitat_types` #' plus a `species_code` column. Either has the join keys (`by`) @@ -65,16 +75,14 @@ #' (default) processes every species code present in `to`. #' @param habitat_types Character vector. Habitat-type columns to OR #' in. Defaults to the four standard ones: `c("spawning", -#' "rearing", "lake_rearing", "wetland_rearing")`. Must be a -#' subset of the columns present in `to`. +#' "rearing", "lake_rearing", "wetland_rearing")`. Each must be +#' present in both `to` (as a boolean column) and `from` (as a +#' per-row indicator column). #' @param by Character vector. Columns used to match `from` to either #' `to` (when `bridge = NULL`) or to `bridge` (when bridge supplied). #' Default `c("blue_line_key", "downstream_route_measure")`. -#' @param format Character. `"wide"` (default) or `"long"`. -#' @param long_value_col Character. For `format = "long"`, the column -#' name in `from` that holds the indicator. Default -#' `"habitat_ind"`. Accepts boolean or `'true'`/`'t'` text -#' (case + whitespace insensitive). +#' @param species_col Character. Name of the column in `from` carrying +#' the species code per row. Default `"species_code"`. #' @param verbose Logical. Print per-species per-habitat summary. #' Default `TRUE`. #' @@ -88,15 +96,30 @@ #' # Direct join (target has the keys): #' frs_habitat_overlay(conn, #' from = "ws.user_habitat_classification", -#' to = "ws.streams_habitat_keyed", -#' format = "long") +#' to = "ws.streams_habitat_keyed") #' #' # Bridged join (target is fresh.streams_habitat, keyed by id_segment): #' frs_habitat_overlay(conn, #' from = "ws.user_habitat_classification", #' to = "fresh.streams_habitat", -#' bridge = "fresh.streams", -#' format = "long") +#' bridge = "fresh.streams") +#' +#' # Source uses a non-canonical shape (e.g. legacy long format): +#' # transform first via a SQL view, then overlay against the view. +#' DBI::dbExecute(conn, " +#' CREATE OR REPLACE VIEW ws.uhc_canonical AS +#' SELECT blue_line_key, downstream_route_measure, +#' upstream_route_measure, species_code, +#' MAX(CASE WHEN habitat_type = 'spawning' +#' THEN habitat_ind::text END) AS spawning, +#' MAX(CASE WHEN habitat_type = 'rearing' +#' THEN habitat_ind::text END) AS rearing +#' FROM ws.user_habitat_classification_long +#' GROUP BY 1,2,3,4") +#' frs_habitat_overlay(conn, +#' from = "ws.uhc_canonical", +#' to = "fresh.streams_habitat", +#' bridge = "fresh.streams") #' } frs_habitat_overlay <- function(conn, from, to, bridge = NULL, @@ -104,12 +127,9 @@ frs_habitat_overlay <- function(conn, from, to, habitat_types = c("spawning", "rearing", "lake_rearing", "wetland_rearing"), by = c("blue_line_key", "downstream_route_measure"), - format = c("wide", "long"), - long_value_col = "habitat_ind", + species_col = "species_code", verbose = TRUE) { - format <- match.arg(format) - # --- Argument validation --- stopifnot( inherits(conn, "DBIConnection"), @@ -117,7 +137,7 @@ frs_habitat_overlay <- function(conn, from, to, is.character(to), length(to) == 1L, nchar(to) > 0, is.character(habitat_types), length(habitat_types) > 0, is.character(by), length(by) > 0, - is.character(long_value_col), length(long_value_col) == 1L + is.character(species_col), length(species_col) == 1L, nchar(species_col) > 0 ) if (!is.null(bridge)) { stopifnot(is.character(bridge), length(bridge) == 1L, nchar(bridge) > 0) @@ -125,7 +145,7 @@ frs_habitat_overlay <- function(conn, from, to, } .frs_validate_identifier(from, "from") .frs_validate_identifier(to, "to") - .frs_validate_identifier(long_value_col, "long_value_col") + .frs_validate_identifier(species_col, "species_col") for (b in by) .frs_validate_identifier(b, "by column") for (h in habitat_types) .frs_validate_identifier(h, "habitat_types entry") if (!is.null(species)) { @@ -137,8 +157,7 @@ frs_habitat_overlay <- function(conn, from, to, } } - # Validate habitat_types are columns in `to` so we don't UPDATE - # mid-loop and crash on a missing column halfway through. + # --- Validate target columns --- to_parts <- strsplit(to, "\\.", fixed = FALSE)[[1]] if (length(to_parts) != 2L) { stop("`to` must be schema-qualified (e.g. 'working.streams_habitat')", @@ -167,7 +186,7 @@ frs_habitat_overlay <- function(conn, from, to, } } - # --- Discover from-table columns --- + # --- Validate source columns --- from_parts <- strsplit(from, "\\.", fixed = FALSE)[[1]] if (length(from_parts) != 2L) { stop("`from` must be schema-qualified (e.g. 'working.user_habitat_classification')", @@ -182,19 +201,15 @@ frs_habitat_overlay <- function(conn, from, to, stop(sprintf("from table %s not found or has no columns", from), call. = FALSE) } - - # --- Long-format: validate required columns up front --- - if (format == "long") { - required_long <- c(by, "species_code", "habitat_type", long_value_col) - missing_long <- setdiff(required_long, from_cols) - if (length(missing_long) > 0) { - stop(sprintf( - "long-format `from` table %s missing required columns: %s", - from, paste(missing_long, collapse = ", ")), call. = FALSE) - } + required <- c(by, species_col, habitat_types) + missing_required <- setdiff(required, from_cols) + if (length(missing_required) > 0) { + stop(sprintf( + "`from` table %s missing required columns: %s", + from, paste(missing_required, collapse = ", ")), call. = FALSE) } - # --- Bridge validation: must have id_segment + by + range columns --- + # --- Validate bridge if provided --- if (!is.null(bridge)) { bridge_parts <- strsplit(bridge, "\\.", fixed = FALSE)[[1]] if (length(bridge_parts) != 2L) { @@ -218,15 +233,12 @@ frs_habitat_overlay <- function(conn, from, to, # --- Build join clause once --- if (is.null(bridge)) { - # Direct: to ↔ from on by columns from_clause <- sprintf("FROM %s AS k", from) join_pred <- paste(sprintf("h.%s = k.%s", by, by), collapse = " AND ") } else { # 3-way: to.id_segment = bridge.id_segment + bridge ranges contain from. # Range columns are handled by the >= / <= predicates below; strip - # them from the equality `by` clause so we don't double-constrain - # (point match on drm would always fail when the from range is - # wider than the bridge segment). + # them from the equality `by` clause so we don't double-constrain. range_cols <- c("downstream_route_measure", "upstream_route_measure") by_eq <- setdiff(by, range_cols) if (length(by_eq) == 0) { @@ -244,48 +256,21 @@ frs_habitat_overlay <- function(conn, from, to, # --- OR in flags per (habitat_type, species) --- total_updates <- 0L for (sp in species) { - sp_lower <- tolower(sp) for (hab in habitat_types) { - - sql <- if (format == "wide") { - col <- paste0(hab, "_", sp_lower) - if (!col %in% from_cols) { - if (verbose) { - cat(sprintf(" skip %s/%s (no column `%s` in %s)\n", - sp, hab, col, from)) - } - next - } - sprintf( - "UPDATE %s AS h - SET %s = TRUE - %s - WHERE %s - AND h.species_code = %s - AND k.%s IS TRUE - AND (h.%s IS NULL OR h.%s = FALSE)", - to, hab, from_clause, join_pred, - .frs_quote_string(sp), - col, hab, hab) - } else { - # long format - sprintf( - "UPDATE %s AS h - SET %s = TRUE - %s - WHERE %s - AND h.species_code = %s - AND k.species_code = %s - AND k.habitat_type = %s - AND (lower(trim(k.%s::text)) IN ('true', 't')) - AND (h.%s IS NULL OR h.%s = FALSE)", - to, hab, from_clause, join_pred, - .frs_quote_string(sp), - .frs_quote_string(sp), - .frs_quote_string(hab), - long_value_col, - hab, hab) - } + sql <- sprintf( + "UPDATE %s AS h + SET %s = TRUE + %s + WHERE %s + AND h.species_code = %s + AND k.%s = %s + AND lower(trim(k.%s::text)) IN ('true', 't', '1') + AND (h.%s IS NULL OR h.%s = FALSE)", + to, hab, from_clause, join_pred, + .frs_quote_string(sp), + species_col, .frs_quote_string(sp), + hab, + hab, hab) n <- .frs_db_execute(conn, sql) total_updates <- total_updates + n diff --git a/man/frs_aggregate.Rd b/man/frs_aggregate.Rd index 556536b..e76895b 100644 --- a/man/frs_aggregate.Rd +++ b/man/frs_aggregate.Rd @@ -156,7 +156,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_break.Rd b/man/frs_break.Rd index 781398a..f7e24e1 100644 --- a/man/frs_break.Rd +++ b/man/frs_break.Rd @@ -145,7 +145,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_break_apply.Rd b/man/frs_break_apply.Rd index c3fde77..1f83256 100644 --- a/man/frs_break_apply.Rd +++ b/man/frs_break_apply.Rd @@ -108,7 +108,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_break_find.Rd b/man/frs_break_find.Rd index 18364d3..0c6846d 100644 --- a/man/frs_break_find.Rd +++ b/man/frs_break_find.Rd @@ -118,7 +118,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_break_validate.Rd b/man/frs_break_validate.Rd index 445e84a..aeb5964 100644 --- a/man/frs_break_validate.Rd +++ b/man/frs_break_validate.Rd @@ -87,7 +87,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_categorize.Rd b/man/frs_categorize.Rd index faf0a1d..aa8f45a 100644 --- a/man/frs_categorize.Rd +++ b/man/frs_categorize.Rd @@ -75,7 +75,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_classify.Rd b/man/frs_classify.Rd index 123b452..6489a5e 100644 --- a/man/frs_classify.Rd +++ b/man/frs_classify.Rd @@ -201,7 +201,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_cluster.Rd b/man/frs_cluster.Rd index 9c4c393..d283054 100644 --- a/man/frs_cluster.Rd +++ b/man/frs_cluster.Rd @@ -151,7 +151,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_col_generate.Rd b/man/frs_col_generate.Rd index 82a63bc..c58487d 100644 --- a/man/frs_col_generate.Rd +++ b/man/frs_col_generate.Rd @@ -104,7 +104,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_col_join.Rd b/man/frs_col_join.Rd index 7a0c28f..2fc59ca 100644 --- a/man/frs_col_join.Rd +++ b/man/frs_col_join.Rd @@ -96,7 +96,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_extract.Rd b/man/frs_extract.Rd index 83aa3cc..c579933 100644 --- a/man/frs_extract.Rd +++ b/man/frs_extract.Rd @@ -110,7 +110,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_feature_find.Rd b/man/frs_feature_find.Rd index 39617b3..91213bd 100644 --- a/man/frs_feature_find.Rd +++ b/man/frs_feature_find.Rd @@ -118,7 +118,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_feature_index.Rd b/man/frs_feature_index.Rd index 4d9ab82..c776535 100644 --- a/man/frs_feature_index.Rd +++ b/man/frs_feature_index.Rd @@ -107,7 +107,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_habitat.Rd b/man/frs_habitat.Rd index 2b6e948..3f57094 100644 --- a/man/frs_habitat.Rd +++ b/man/frs_habitat.Rd @@ -253,7 +253,9 @@ Other habitat: \code{\link{frs_feature_index}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_habitat_access.Rd b/man/frs_habitat_access.Rd index bdb7156..ae8ba32 100644 --- a/man/frs_habitat_access.Rd +++ b/man/frs_habitat_access.Rd @@ -85,7 +85,9 @@ Other habitat: \code{\link{frs_feature_index}()}, \code{\link{frs_habitat}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_habitat_overlay.Rd b/man/frs_habitat_overlay.Rd index efe1a73..ec9db30 100644 --- a/man/frs_habitat_overlay.Rd +++ b/man/frs_habitat_overlay.Rd @@ -12,8 +12,7 @@ frs_habitat_overlay( species = NULL, habitat_types = c("spawning", "rearing", "lake_rearing", "wetland_rearing"), by = c("blue_line_key", "downstream_route_measure"), - format = c("wide", "long"), - long_value_col = "habitat_ind", + species_col = "species_code", verbose = TRUE ) } @@ -21,7 +20,8 @@ frs_habitat_overlay( \item{conn}{A \link[DBI:DBIConnection-class]{DBI::DBIConnection} object.} \item{from}{Character. Schema-qualified source table providing -the flags to overlay. Wide- or long-format per \code{format}.} +the flags to overlay. Must follow the canonical shape — see +"Source-table shape" above.} \item{to}{Character. Schema-qualified destination table to UPDATE in place. Must have boolean columns named in \code{habitat_types} @@ -44,19 +44,16 @@ schemas would need a follow-up parameterisation. (default) processes every species code present in \code{to}.} \item{habitat_types}{Character vector. Habitat-type columns to OR -in. Defaults to the four standard ones: \code{c("spawning", "rearing", "lake_rearing", "wetland_rearing")}. Must be a -subset of the columns present in \code{to}.} +in. Defaults to the four standard ones: \code{c("spawning", "rearing", "lake_rearing", "wetland_rearing")}. Each must be +present in both \code{to} (as a boolean column) and \code{from} (as a +per-row indicator column).} \item{by}{Character vector. Columns used to match \code{from} to either \code{to} (when \code{bridge = NULL}) or to \code{bridge} (when bridge supplied). Default \code{c("blue_line_key", "downstream_route_measure")}.} -\item{format}{Character. \code{"wide"} (default) or \code{"long"}.} - -\item{long_value_col}{Character. For \code{format = "long"}, the column -name in \code{from} that holds the indicator. Default -\code{"habitat_ind"}. Accepts boolean or \code{'true'}/\code{'t'} text -(case + whitespace insensitive).} +\item{species_col}{Character. Name of the column in \code{from} carrying +the species code per row. Default \code{"species_code"}.} \item{verbose}{Logical. Print per-species per-habitat summary. Default \code{TRUE}.} @@ -74,27 +71,36 @@ the mechanism is generic: any boolean-flagged source over any boolean-flagged target. } \details{ -Two source-table shapes (\code{format}): +\subsection{Source-table shape}{ + +One row per (segment × species). Each row has: \itemize{ -\item \strong{\code{"wide"}} — one row per segment, columns named -\verb{\{habitat_type\}_\{species_lower\}} (e.g. \code{spawning_sk}). Boolean. -Matches the bcfishpass \code{streams_habitat_known} convention. -\item \strong{\code{"long"}} — one row per (segment × species × habitat_type), -with \code{species_code}, \code{habitat_type}, and an indicator column -(\code{long_value_col}, default \code{habitat_ind}). Indicator can be -boolean or text (\code{'TRUE'}/\code{'true'}/\code{'t'} case + whitespace -insensitive). Matches link's \code{user_habitat_classification} -table. +\item the join keys named in \code{by} (default \code{c("blue_line_key", "downstream_route_measure")}) +\item a column carrying the species code (named in \code{species_col}, +default \code{"species_code"}) +\item one column per habitat type (named in \code{habitat_types}, default +\code{c("spawning", "rearing", "lake_rearing", "wetland_rearing")}) +} + +Indicator columns can be integer (\code{1} truthy, \code{0}/\code{NULL} falsy), +text (\code{'true'}/\code{'t'}/\code{'1'} truthy, anything else falsy, case + +whitespace insensitive), or boolean. + +Sources in other shapes — bcfishpass's pre-2026-04-26 long format +(\code{habitat_type} rows + \code{habitat_ind} indicator), or the +per-species-suffixed wide layout (\code{spawning_sk}, \code{rearing_sk}) — +transform first via a SQL view or \verb{data-raw/} script, then call +overlay. Shape-translation lives with the consumer. } -Two join modes (\code{bridge}): +\subsection{Two join modes (\code{bridge})}{ \itemize{ \item \strong{Direct (\code{bridge = NULL})} — the \code{to} table has the join keys directly. SQL does \verb{to. = from.} (point match). \item \strong{Bridged (\code{bridge = ""})} — the \code{to} table is keyed by \code{id_segment} (e.g. \code{fresh.streams_habitat}) and lacks the geographic keys in \code{by}. The bridge table provides the -link, with id_segment + range columns. SQL does a 3-way join: +link, with \code{id_segment} + range columns. SQL does a 3-way join: \if{html}{\out{
}}\preformatted{to.id_segment = bridge.id_segment AND bridge. = from. @@ -112,20 +118,36 @@ providing \code{id_segment} + the join-key columns + range columns works. Use cases include lake / wetland centerline segments or cottonwood-polygon segmentations pinned to a hydrology network. } +} \examples{ \dontrun{ # Direct join (target has the keys): frs_habitat_overlay(conn, from = "ws.user_habitat_classification", - to = "ws.streams_habitat_keyed", - format = "long") + to = "ws.streams_habitat_keyed") # Bridged join (target is fresh.streams_habitat, keyed by id_segment): frs_habitat_overlay(conn, from = "ws.user_habitat_classification", to = "fresh.streams_habitat", - bridge = "fresh.streams", - format = "long") + bridge = "fresh.streams") + +# Source uses a non-canonical shape (e.g. legacy long format): +# transform first via a SQL view, then overlay against the view. +DBI::dbExecute(conn, " + CREATE OR REPLACE VIEW ws.uhc_canonical AS + SELECT blue_line_key, downstream_route_measure, + upstream_route_measure, species_code, + MAX(CASE WHEN habitat_type = 'spawning' + THEN habitat_ind::text END) AS spawning, + MAX(CASE WHEN habitat_type = 'rearing' + THEN habitat_ind::text END) AS rearing + FROM ws.user_habitat_classification_long + GROUP BY 1,2,3,4") +frs_habitat_overlay(conn, + from = "ws.uhc_canonical", + to = "fresh.streams_habitat", + bridge = "fresh.streams") } } \seealso{ diff --git a/man/frs_habitat_partition.Rd b/man/frs_habitat_partition.Rd index c414ac5..ef04bc7 100644 --- a/man/frs_habitat_partition.Rd +++ b/man/frs_habitat_partition.Rd @@ -106,6 +106,8 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()}, \code{\link{frs_network_segment}()} } diff --git a/man/frs_habitat_species.Rd b/man/frs_habitat_species.Rd index 3e00d7b..3bc8ed6 100644 --- a/man/frs_habitat_species.Rd +++ b/man/frs_habitat_species.Rd @@ -85,7 +85,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_network_segment}()} } \concept{habitat} diff --git a/man/frs_network_segment.Rd b/man/frs_network_segment.Rd index 9a38a82..e091961 100644 --- a/man/frs_network_segment.Rd +++ b/man/frs_network_segment.Rd @@ -148,7 +148,9 @@ Other habitat: \code{\link{frs_habitat}()}, \code{\link{frs_habitat_access}()}, \code{\link{frs_habitat_classify}()}, +\code{\link{frs_habitat_overlay}()}, \code{\link{frs_habitat_partition}()}, +\code{\link{frs_habitat_predicates}()}, \code{\link{frs_habitat_species}()} } \concept{habitat} diff --git a/planning/findings.md b/planning/findings.md index 64f97ba..69c62fd 100644 --- a/planning/findings.md +++ b/planning/findings.md @@ -1,39 +1,86 @@ -# Findings +# Findings — drop format param -## frs_habitat current flow (to replace) +## Why drop, not extend -1. Load params from CSV -2. Build species spec per WSG via frs_wsg_species -3. Phase 1: frs_habitat_partition per WSG (extract + enrich + gradient breaks + habitat breaks) -4. Phase 2: frs_habitat_species per (WSG, species) (copy table + break_apply + classify) -5. Phase 3: persist via to_prefix -6. Phase 4: cleanup +`format = c("wide", "long")` was scoped for two source-table shapes: -## New flow +1. **wide (per-species suffix):** `spawning_sk`, `rearing_sk`, ... — one row per segment, all species' all habitats in one row. Originally for direct reads of `bcfishpass.streams_habitat_known`. **Zero current production consumers.** +2. **long:** `species_code` + `habitat_type` + `habitat_ind` text rows. One row per (segment × species × habitat_type). Was link's read of pre-2026-04-26 bcfishpass `user_habitat_classification.csv`. **Bcfishpass moved away from this shape.** -1. Load params from CSV -2. Build species spec per WSG -3. Per WSG (parallel via mirai): - a. Generate gradient barriers at unique access thresholds - b. frs_network_segment (extract + enrich + break at barriers + user sources) - c. frs_habitat_classify (long-format, species-specific accessibility) - d. Append to to_streams (if provided) - e. Cleanup working tables -4. Done — no separate Phase 2/3 +Bcfishpass's authoritative CSV (post-2026-04-26) is a third shape: row-per-(segment × species) with per-habitat indicator columns (`spawning`, `rearing`). PR #176 first tried to bolt this on as `species_col` parameter inside `format = "wide"`. That compounded the API. -## Key simplification +YAGNI says: ship the one shape that has a real consumer. Add format back when a real second consumer with a different shape appears. -Old: per-species tables with duplicated geometry, separate access + habitat breaks -New: one streams table per WSG, one habitat table (long format), classify by attribute +## What stays parameterized -## Worker isolation +Column names remain caller-customizable: -Each mirai worker: -- Opens own DB connection (password param) -- Creates working.streams_{wsg} + working.streams_{wsg}_breaks -- Writes to to_habitat (shared table — needs row-level isolation via species_code + id_segment) -- Appends to to_streams (shared table — needs WSG isolation via watershed_group_code) +- `species_col` — name of the column in source carrying species (default `"species_code"`) +- `by` — join keys (default `c("blue_line_key", "downstream_route_measure")`) +- `habitat_types` — habitat columns to overlay (default the four standard ones) -Potential conflict: two workers writing to to_habitat simultaneously. -PG handles concurrent INSERTs fine — no locking issue for appends. -The DELETE (idempotent cleanup) could conflict if two workers process same WSG — but we dedupe WSGs before dispatching so this won't happen. +Indicator coercion is universal: `lower(trim(::text)) IN ('true', 't', '1')` matches `1`, `'true'`, `'t'`, boolean TRUE; everything else (NULL, `0`, `'false'`, `'f'`) falsy. + +Bridge mode (3-way join via segments table) is orthogonal to source shape and stays. + +## Non-canonical sources (Option B) + +A consumer with a non-canonical source transforms first — SQL view, R pivot, or (forthcoming) link's `lnk_ingest_bcfishpass()`. fresh stays a thin SQL adapter for one shape; shape-knowledge lives where it belongs (with the consumer). + +That keeps fresh's API narrow and lets each consumer own its shape adapter. The pre-2026-04-26 bcfishpass long format is recoverable via a 5-line SQL view (the same one I drafted earlier in `lnk_pipeline_classify.R` and reverted). Wide-suffix similarly. + +## Test fixture audit + +Existing `test-frs_habitat_overlay.R` (~660 lines, 67 tests) breaks into: + +- **Argument validation** (lines 32-65, 149-173, 225-235): mostly retained, drop checks for dropped params (`format`, `long_value_col`) +- **Empty target table** (69-87): retained; canonical shape +- **Missing source column** (91-113): adapt — `mk_dbq` used wide-suffix; rewrite for canonical shape (missing habitat column instead of missing per-species-suffix) +- **SQL shape** (117-147): rewrite for canonical shape — verify single-dispatch SQL emits the right WHERE clause with `species_col` + indicator predicate +- **Custom `by`** (177-196): retained; bridge orthogonal +- **NULL species** (200-223): retained; species discovery from target table +- **Wide integration** (237-379): adapt to canonical shape +- **Long-format unit + integration** (382-528): drop entirely +- **Bridge** (532-660): retained for the canonical shape + +Net: ~150 lines of test code removed, ~60 lines added/adjusted. + +## Migration mapping for link + +Link's `lnk_pipeline_classify.R` currently calls: + +```r +fresh::frs_habitat_overlay(conn, + from = paste0(schema, ".user_habitat_classification"), + to = "fresh.streams_habitat", + bridge = "fresh.streams", + species = species, + format = "long", # drop + long_value_col = "habitat_ind", # drop + verbose = FALSE) +``` + +Becomes (after fresh 0.22.0): + +```r +fresh::frs_habitat_overlay(conn, + from = paste0(schema, ".user_habitat_classification"), + to = "fresh.streams_habitat", + bridge = "fresh.streams", + species = species, + species_col = "species_code", + habitat_types = c("spawning", "rearing"), + verbose = FALSE) +``` + +Source table shape (already loaded by `lnk_pipeline_prepare`) is the new bcfishpass shape since the daily auto-sync. Caller-side change is the function args — three lines. + +## Why species_col defaults to "species_code" + +Bcfishpass uses `species_code` as the column name. So does fresh's own `streams_habitat`. So does link's `parameters_fresh.csv`. Convention across the ecosystem. Default-args-match-convention is cleaner than "user must always specify". + +## Version bump rationale + +0.21.0 → 0.22.0. Minor (not patch) because of breaking signature change. Pre-1.0, single consumer (link), coordinated release window — documented breaking changes are acceptable per the convention reflected in 0.21.0's NEWS ("Pre-1.0 cleanup driven by review of v0.20.0; no deprecation alias"). + +No deprecation alias for `format` either. The two callers (link's broken `format = "long"` call site; fresh's own tests) both update in this same coordinated work. diff --git a/planning/progress.md b/planning/progress.md index 0f341a1..ce67553 100644 --- a/planning/progress.md +++ b/planning/progress.md @@ -1,9 +1,9 @@ -# Progress +# Progress — drop format param -## Session: 2026-04-05 +## Session 2026-04-27 -- [x] Plan created -- [x] Phase 1: Rewrite frs_habitat (183s ADMS sequential, species-specific access works) -- [ ] Phase 2: Test -- [ ] Phase 3: Benchmark -- [ ] Phase 4: Deprecate + release +- Branch `overlay-species-col` reset to main (was carrying species_col bolt-on; pivoted to drop-format simplification per design discussion) +- PWF baseline written +- Issue #177 updated in place: title + body now reflect "drop format" scope (was "layout decomposition") +- Cross-repo coordination: crate-Claude thread closed today (`link/comms/crate/20260427_fresh_bcfishpass_csv_consumers.md`); decision recorded — link will canonicalize at ingest via forthcoming `lnk_ingest_bcfishpass()`, so fresh sees only canonical shape +- Next: Phase 2 — rewrite `frs_habitat_overlay()` diff --git a/planning/task_plan.md b/planning/task_plan.md index 759d09f..17229fe 100644 --- a/planning/task_plan.md +++ b/planning/task_plan.md @@ -1,65 +1,56 @@ -# Refactor frs_habitat → unified pipeline - -## Goal - -Rewrite `frs_habitat()` to wrap `frs_network_segment()` + `frs_habitat_classify()` instead of the old `frs_habitat_partition()` + `frs_habitat_species()`. Support province-wide runs, mirai parallel, `to_streams`/`to_habitat` output params. - -## Current State - -- `frs_network_segment()` — exported, tested, working -- `frs_habitat_classify()` — exported, tested, working -- `frs_habitat()` — still uses old pipeline (partition + species) -- Old functions: `frs_habitat_partition`, `frs_habitat_species`, `frs_habitat_access` — to deprecate - -## Key Design Decisions - -- `to_streams` + `to_habitat` params on `frs_habitat()` (not `to_prefix`) -- Gradient barriers generated per WSG inside the function (user doesn't pre-generate) -- Access thresholds derived from `params_fresh$access_gradient_max` per species -- Labels: `gradient_{pct}` for gradient barriers, user controls labels for break_sources -- mirai parallel: each worker runs segment + classify for one WSG -- Workers need `password` param for DB reconnection +# Task: Drop frs_habitat_overlay format param; accept only canonical shape (#177) + +`frs_habitat_overlay()` carries `format = c("wide", "long")` and +`long_value_col` for two source-table shapes that have no current +production consumer. Bcfishpass changed its authoritative CSV shape on +2026-04-26 to a third shape (row-per-(segment × species) with +per-habitat indicator columns) — that's the only shape link's pipeline +needs to consume. + +PR #176's first attempt added a `species_col` parameter as an additive +third dispatch. That paper-over compounded the API. Cleaner answer: +drop `format` + `long_value_col` entirely. Hard-code the canonical +shape. Column names stay parameterized via `species_col`, `by`, +`habitat_types`. Callers with non-canonical sources transform first +(SQL view, R pivot, or link's forthcoming `lnk_ingest_bcfishpass()`). ## Phases -### Phase 1: Rewrite frs_habitat [pending] -- New params: `to_streams`, `to_habitat`, `break_sources`, `password`, `workers` -- Remove: `to_prefix`, `falls`, `falls_where` (already removed in earlier work) -- Flow per WSG: - 1. Generate gradient barriers at all unique access thresholds - 2. `frs_network_segment(aoi = wsg, to = working_table, break_sources = gradient + user sources)` - 3. `frs_habitat_classify(table = working_table, to = to_habitat, species = wsg_species)` - 4. Append working_table to `to_streams` (if provided) - 5. Clean up working tables -- mirai: each WSG is one task, workers run the full per-WSG flow +- [ ] Phase 1 — PWF baseline (this file + findings + progress) +- [ ] Phase 2 — Rewrite `frs_habitat_overlay()`: drop `format` + `long_value_col` from signature; require `species_col` (default `"species_code"`); single SQL dispatch path. Universal indicator coercion `lower(trim(::text)) IN ('true', 't', '1')`. Update validation: `from` must contain `by` + `species_col` + each `habitat_types` column. Bridge mode retained (orthogonal to source shape). +- [ ] Phase 3 — Update `frs_habitat_overlay.Rd` doc + examples. New canonical-shape worked example. Document the transform-first pattern for non-canonical sources. +- [ ] Phase 4 — Drop tests for `format = "wide"` (suffix) and `format = "long"` paths. Keep + adapt the `species_col` integration tests as primary. Adjust mocks accordingly. +- [ ] Phase 5 — `/code-check` on staged diff +- [ ] Phase 6 — `devtools::test()` full suite — verify no regressions in non-overlay tests +- [ ] Phase 7 — NEWS entry + version bump 0.21.0 → 0.22.0 (breaking — pre-1.0; documented) +- [ ] Phase 8 — Force-push to overlay-species-col branch; update PR #176 title + body to reflect simplified scope; mark ready for review + +## Critical files -### Phase 2: Test [pending] -- Sequential: single WSG (ADMS) -- Sequential: multi-WSG (ADMS + BULK) -- Parallel: multi-WSG with mirai (4 workers) -- Verify species-specific accessibility (CO < ST < BT) -- Verify persistent tables accumulate across WSGs -- Run devtools::test() — 528 should still pass +- `R/frs_habitat_overlay.R` — rewrite signature + dispatch (drops ~100 lines net) +- `tests/testthat/test-frs_habitat_overlay.R` — drop wide-suffix + long-format tests; canonical-shape tests become primary +- `man/frs_habitat_overlay.Rd` — regenerated +- `NAMESPACE` — unchanged (no new exports) +- `NEWS.md`, `DESCRIPTION` — release artifacts -### Phase 3: Benchmark + province test [pending] -- Script for province-wide run using the refactored frs_habitat -- Compare timing to old pipeline -- Log to scripts/habitat/logs/ +## Acceptance -### Phase 4: Deprecate old functions [pending] -- Add .Deprecated() to frs_habitat_partition, frs_habitat_species, frs_habitat_access -- Update CLAUDE.md architecture section -- NEWS.md + version bump to v0.8.0 +- `frs_habitat_overlay()` signature has no `format`, no `long_value_col` +- `species_col` is a parameter with default `"species_code"` +- Single dispatch path: `WHERE k. = '' AND lower(trim(k.::text)) IN ('true','t','1') AND additive_guard` +- All retained tests pass; dropped tests removed cleanly +- Bridge mode still works (verified by bridge-mode integration test, retained) +- `/code-check` clean +- NEWS + DESCRIPTION updated; PR #176 force-pushed and ready -## Files to Modify +## Risks -- `R/frs_habitat.R` — rewrite main function -- `R/frs_habitat.R` — add .Deprecated to old functions -- `man/frs_habitat.Rd` — regenerate -- `scripts/habitat/habitat_province.R` — update to use frs_habitat() -- `NEWS.md` — v0.8.0 entry -- `DESCRIPTION` — version bump +- **Existing test fixtures need rewriting**: tests previously using wide-suffix or long-format data must either be dropped (no current consumer) or rewritten to canonical shape. Audit each test's intent — if it tests overlay behavior generically, rewrite; if it tests a specific dropped path, drop. +- **Mock helpers** (`mk_dbq` etc.) are tied to wide-suffix expectations. Update or replace for canonical shape. +- **Link's call site is currently broken** against the new CSV. Coordinated update lands as the link follow-up PR. Until that lands, link's `tar_make()` won't run — acceptable, vignette is already pulled from the site. -## Errors Encountered +## Not in this PR -(none yet) +- crate's `lnk_ingest_bcfishpass()` registry-driven canonicalization — separate impl-plan thread + PRs in link + crate +- link's call-site update — separate PR after fresh merges (small, targeted; bumps fresh dep to >= 0.22.0) +- `lnk_pipeline_classify.R` SQL pivot view (the temporary unblock I drafted earlier and reverted) — not needed; link will pass canonical-shape data or transform first diff --git a/tests/testthat/test-frs_habitat_overlay.R b/tests/testthat/test-frs_habitat_overlay.R index dae017f..004c6cb 100644 --- a/tests/testthat/test-frs_habitat_overlay.R +++ b/tests/testthat/test-frs_habitat_overlay.R @@ -1,22 +1,28 @@ # Unit tests: frs_habitat_overlay # -# Mocked tests cover argument validation, column-skip behaviour, SQL -# shape. Integration tests at the bottom hit a small fixture on a -# live fwapg. - -# Helper: build a dbGetQuery mock that returns -# - species list for `SELECT DISTINCT species_code` -# - destination (`to`) table columns when info_schema targets `table_name = 'h'` -# - source (`from`) table columns when targeting `table_name = 'k'` -mk_dbq <- function(species, table_cols, known_cols) { +# Mocked tests cover argument validation, SQL shape, and edge cases. +# Integration tests at the bottom hit a small fixture on a live DB. +# +# Source-table shape contract (canonical): one row per (segment x +# species). Each row carries `by` columns + `species_col` (default +# "species_code") + one indicator column per habitat type. Indicator +# coercion accepts integer 1, text 'true'/'t'/'1' (case + whitespace +# insensitive), boolean. + +# Helper: build a dbGetQuery mock with separate species/target/source +# branches. `to_cols` and `from_cols` describe column inventories +# returned by information_schema.columns lookups. +mk_dbq <- function(species, to_cols, from_cols, bridge_cols = character(0)) { function(conn, sql) { if (grepl("DISTINCT species_code", sql)) { data.frame(species_code = species) } else if (grepl("information_schema", sql)) { if (grepl("table_name = 'h'", sql)) { - data.frame(column_name = table_cols) + data.frame(column_name = to_cols) + } else if (grepl("table_name = 's'", sql)) { + data.frame(column_name = bridge_cols) } else { - data.frame(column_name = known_cols) + data.frame(column_name = from_cols) } } else { data.frame() @@ -29,22 +35,27 @@ TBL_DEFAULT <- c("id_segment", "species_code", "blue_line_key", "downstream_route_measure", "spawning", "rearing", "lake_rearing", "wetland_rearing") +# Canonical-shape source columns (default `by` + species_col + habitat_types) +SRC_DEFAULT <- c("blue_line_key", "downstream_route_measure", + "species_code", + "spawning", "rearing", "lake_rearing", "wetland_rearing") + # -- Argument validation ------------------------------------------------------- -test_that("frs_habitat_overlay validates conn", { +test_that("validates conn", { expect_error( frs_habitat_overlay("not a conn", from = "x.z", to = "x.y"), "DBIConnection") }) -test_that("frs_habitat_overlay validates from / to are schema-qualified strings", { +test_that("validates from / to are schema-qualified strings", { fake <- structure(list(), class = "DBIConnection") expect_error(frs_habitat_overlay(fake, from = "x.y", to = "")) expect_error(frs_habitat_overlay(fake, from = "", to = "x.y")) expect_error(frs_habitat_overlay(fake, c("a", "b"), "x.y")) }) -test_that("frs_habitat_overlay rejects malicious from / to identifiers", { +test_that("rejects malicious from / to identifiers", { fake <- structure(list(), class = "DBIConnection") expect_error( frs_habitat_overlay(fake, from = "ws.k; DROP TABLE x; --", to = "ws.h")) @@ -52,84 +63,116 @@ test_that("frs_habitat_overlay rejects malicious from / to identifiers", { frs_habitat_overlay(fake, from = "ws.k", to = "ws.h; DROP TABLE x; --")) }) -test_that("frs_habitat_overlay requires schema-qualified known table", { +test_that("rejects malicious species_col identifier", { + fake <- structure(list(), class = "DBIConnection") + expect_error( + frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", + species = "CO", + species_col = "species; DROP TABLE x; --"), + "invalid") +}) + +test_that("rejects malformed species codes (non-alphabetic)", { + fake <- structure(list(), class = "DBIConnection") + expect_error( + frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", species = "CO; DROP --"), + "alphabetic") + expect_error( + frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", species = c("CO", "BAD CODE")), + "alphabetic") +}) + +test_that("requires schema-qualified from / to", { fake <- structure(list(), class = "DBIConnection") - # `table = "ws.h"` so the table_cols mock branch matches; known is - # the unqualified one being tested. mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", mk_dbq("CO", TBL_DEFAULT, character(0))) expect_error( frs_habitat_overlay(fake, from = "no_schema", to = "ws.h", - species = "CO"), + species = "CO"), "schema-qualified") }) -# -- Empty table -> no-op ------------------------------------------------------ +test_that("rejects habitat_types not in target table", { + fake <- structure(list(), class = "DBIConnection") + mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", function(conn, sql) { + if (grepl("information_schema", sql)) { + data.frame(column_name = c("id_segment", "species_code", "spawning")) + } else { + data.frame(species_code = "CO") + } + }) + expect_error( + frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", species = "CO", + habitat_types = c("spawning", "rearing"), + verbose = FALSE), + "habitat_types not found") +}) + +test_that("rejects from table missing required columns", { + fake <- structure(list(), class = "DBIConnection") + # Source missing `rearing` column + mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", + mk_dbq("CO", TBL_DEFAULT, + c("blue_line_key", "downstream_route_measure", + "species_code", "spawning"))) + expect_error( + frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", + species = "CO", + habitat_types = c("spawning", "rearing"), + verbose = FALSE), + "missing required columns") +}) -test_that("empty table -> early return without error", { +test_that("rejects from table missing species_col", { fake <- structure(list(), class = "DBIConnection") + # Source has habitat columns but no species_code + mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", + mk_dbq("CO", TBL_DEFAULT, + c("blue_line_key", "downstream_route_measure", + "spawning", "rearing"))) + expect_error( + frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", + species = "CO", + habitat_types = c("spawning", "rearing"), + verbose = FALSE), + "species_code") +}) +# -- Empty table -> no-op ------------------------------------------------------ + +test_that("empty target table -> early return without error", { + fake <- structure(list(), class = "DBIConnection") mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", function(conn, sql) { if (grepl("DISTINCT species_code", sql)) { data.frame(species_code = character(0)) } else if (grepl("information_schema", sql) && grepl("table_name = 'h'", sql)) { - # table validation: species discovery returned 0, so we still - # check habitat_types here. Provide all habitat columns. data.frame(column_name = TBL_DEFAULT) } else { data.frame(column_name = character(0)) } }) - expect_silent(suppressMessages( frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", verbose = FALSE) )) }) -# -- Missing column for a (species, habitat) pair -> skip --------------------- - -test_that("missing per-species column is skipped, not an error", { - fake <- structure(list(), class = "DBIConnection") - exec_calls <- list() - - mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", - mk_dbq( - species = "CT", - table_cols = TBL_DEFAULT, - known_cols = c("blue_line_key", "downstream_route_measure", - "spawning_bt", "rearing_bt", - "spawning_co", "rearing_co"))) - mockery::stub(frs_habitat_overlay, ".frs_db_execute", function(conn, sql) { - exec_calls[[length(exec_calls) + 1L]] <<- sql - 0L - }) - - out <- expect_output( - frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", species = "CT", verbose = TRUE), - "skip CT/spawning") - - # No UPDATEs should have run for CT (no matching columns) - expect_length(exec_calls, 0) -}) - -# -- SQL shape: correct columns + values --------------------------------------- +# -- SQL shape: canonical-shape dispatch --------------------------------------- -test_that("UPDATE SQL ORs in TRUE on matching segments only", { +test_that("UPDATE SQL emits canonical-shape WHERE clause", { fake <- structure(list(), class = "DBIConnection") captured <- list() mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", - mk_dbq("CO", TBL_DEFAULT, - c("blue_line_key", "downstream_route_measure", "spawning_co"))) + mk_dbq("CO", TBL_DEFAULT, SRC_DEFAULT)) mockery::stub(frs_habitat_overlay, ".frs_db_execute", function(conn, sql) { captured[[length(captured) + 1L]] <<- sql 3L }) invisible(frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", - species = "CO", - habitat_types = "spawning", - verbose = FALSE)) + species = "CO", + habitat_types = "spawning", + verbose = FALSE)) expect_length(captured, 1) sql <- captured[[1]] @@ -139,37 +182,34 @@ test_that("UPDATE SQL ORs in TRUE on matching segments only", { expect_match(sql, "h\\.blue_line_key = k\\.blue_line_key") expect_match(sql, "h\\.downstream_route_measure = k\\.downstream_route_measure") expect_match(sql, "h\\.species_code = 'CO'") - expect_match(sql, "k\\.spawning_co IS TRUE") + expect_match(sql, "k\\.species_code = 'CO'") + expect_match(sql, "lower\\(trim\\(k\\.spawning::text\\)\\) IN \\('true', 't', '1'\\)") # Idempotency / additive guard — assert the parens so a refactor - # that drops them (which would break AND/OR precedence) fails the - # test. + # that drops them (which would break AND/OR precedence) fails. expect_match(sql, "AND \\(h\\.spawning IS NULL OR h\\.spawning = FALSE\\)") }) -test_that("rejects malformed species codes (non-alphabetic)", { - fake <- structure(list(), class = "DBIConnection") - expect_error( - frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", species = "CO; DROP --"), - "alphabetic") - expect_error( - frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", species = c("CO", "BAD CODE")), - "alphabetic") -}) - -test_that("rejects habitat_types not in target table", { +test_that("custom species_col interpolated into SQL", { fake <- structure(list(), class = "DBIConnection") - mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", function(conn, sql) { - if (grepl("information_schema", sql)) { - data.frame(column_name = c("id_segment", "species_code", "spawning")) - } else { - data.frame(species_code = "CO") - } + captured <- list() + src_cols <- c("blue_line_key", "downstream_route_measure", + "sp_code", "spawning") + mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", + mk_dbq("CO", TBL_DEFAULT, src_cols)) + mockery::stub(frs_habitat_overlay, ".frs_db_execute", function(conn, sql) { + captured[[length(captured) + 1L]] <<- sql + 0L }) - expect_error( - frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", species = "CO", - habitat_types = c("spawning", "rearing"), - verbose = FALSE), - "habitat_types not found") + + invisible(frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", + species = "CO", + habitat_types = "spawning", + species_col = "sp_code", + verbose = FALSE)) + + expect_length(captured, 1) + expect_match(captured[[1]], "k\\.sp_code = 'CO'") + expect_no_match(captured[[1]], "k\\.species_code = ") }) # -- Custom join key parameter (`by`) ---------------------------------------- @@ -177,18 +217,18 @@ test_that("rejects habitat_types not in target table", { test_that("custom by= produces matching join predicate", { fake <- structure(list(), class = "DBIConnection") captured <- character(0) - + src_cols <- c("id_segment", "species_code", "spawning") mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", - mk_dbq("CO", TBL_DEFAULT, c("id_segment", "spawning_co"))) + mk_dbq("CO", TBL_DEFAULT, src_cols)) mockery::stub(frs_habitat_overlay, ".frs_db_execute", function(conn, sql) { captured <<- c(captured, sql); 0L }) frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", - species = "CO", - habitat_types = "spawning", - by = "id_segment", - verbose = FALSE) + species = "CO", + habitat_types = "spawning", + by = "id_segment", + verbose = FALSE) expect_length(captured, 1) expect_match(captured, "h\\.id_segment = k\\.id_segment") @@ -199,462 +239,285 @@ test_that("custom by= produces matching join predicate", { test_that("NULL species pulls from table.species_code", { fake <- structure(list(), class = "DBIConnection") - - species_seen <- character(0) + captured <- list() mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", - mk_dbq(c("BT", "CO"), TBL_DEFAULT, - c("blue_line_key", "downstream_route_measure", - "spawning_bt", "spawning_co"))) + mk_dbq(c("BT", "CO", "ST"), TBL_DEFAULT, SRC_DEFAULT)) mockery::stub(frs_habitat_overlay, ".frs_db_execute", function(conn, sql) { - if (grepl("species_code = '([A-Z]+)'", sql)) { - m <- regmatches(sql, regexpr("species_code = '([A-Z]+)'", sql)) - species_seen <<- c(species_seen, sub(".*'([A-Z]+)'.*", "\\1", m)) - } - 1L + captured[[length(captured) + 1L]] <<- sql; 0L }) frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", - habitat_types = "spawning", - verbose = FALSE) + habitat_types = "spawning", verbose = FALSE) - expect_setequal(species_seen, c("BT", "CO")) + # Three species x one habitat type = three UPDATE calls + expect_length(captured, 3) + expect_true(any(grepl("species_code = 'BT'", captured))) + expect_true(any(grepl("species_code = 'CO'", captured))) + expect_true(any(grepl("species_code = 'ST'", captured))) }) # -- Identifier injection ---------------------------------------------------- -test_that("malicious identifiers are rejected by validator", { +test_that("malicious identifiers rejected by validator", { fake <- structure(list(), class = "DBIConnection") expect_error( - frs_habitat_overlay(fake, "ws.h; DROP TABLE x; --", "ws.k")) - expect_error( - frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", by = "bad-name")) + frs_habitat_overlay(fake, from = "ws.k; DROP TABLE x; --", to = "ws.h")) expect_error( - frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", habitat_types = "spawn'; DROP --")) + frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", + habitat_types = c("ok_col", "bad; DROP --"))) }) # -- Integration: real DB, small fixture -------------------------------------- -test_that("integration: known segment flips FALSE -> TRUE on a fixture", { +test_that("integration: canonical shape flips matching species/habitat", { skip_if_not(.frs_db_available(), "DB not available") conn <- frs_db_conn() on.exit({ - .frs_test_drop(conn, "working.test_known_h") - .frs_test_drop(conn, "working.test_known_k") + .frs_test_drop(conn, "working.test_overlay_h") + .frs_test_drop(conn, "working.test_overlay_k") DBI::dbDisconnect(conn) }) - # Note: assumes `working` schema already exists. Other fresh - # integration tests rely on the same convention; CREATE SCHEMA - # requires DB-level DDL which the shared connection lacks. - - # Build a 3-segment streams_habitat fixture with all FALSE DBI::dbExecute(conn, - "CREATE TABLE working.test_known_h ( + "CREATE TABLE working.test_overlay_h ( id_segment integer, species_code text, blue_line_key bigint, downstream_route_measure double precision, spawning boolean, rearing boolean, lake_rearing boolean, wetland_rearing boolean)") DBI::dbExecute(conn, - "INSERT INTO working.test_known_h VALUES - (1, 'CO', 100, 10.0, FALSE, FALSE, FALSE, FALSE), - (2, 'CO', 100, 20.0, TRUE, FALSE, FALSE, FALSE), - (3, 'CO', 100, 30.0, FALSE, FALSE, FALSE, FALSE)") + "INSERT INTO working.test_overlay_h VALUES + (1, 'SK', 100, 10.0, FALSE, FALSE, FALSE, FALSE), + (2, 'CO', 100, 10.0, FALSE, FALSE, FALSE, FALSE), + (3, 'BT', 100, 20.0, FALSE, FALSE, FALSE, FALSE), + (4, 'CO', 100, 30.0, FALSE, FALSE, FALSE, FALSE)") - # Known-habitat fixture: segment at DRM=10 has spawning_co; segment - # at DRM=20 has rearing_co (already classified spawning, will flip - # rearing too); segment at DRM=30 has nothing. + # Mix of integer 1, NULL, integer 0 to exercise indicator coercion. DBI::dbExecute(conn, - "CREATE TABLE working.test_known_k ( + "CREATE TABLE working.test_overlay_k ( blue_line_key bigint, downstream_route_measure double precision, - spawning_co boolean, rearing_co boolean)") + species_code text, spawning integer, rearing integer)") DBI::dbExecute(conn, - "INSERT INTO working.test_known_k VALUES - (100, 10.0, TRUE, FALSE), - (100, 20.0, FALSE, TRUE), - (100, 30.0, FALSE, FALSE)") + "INSERT INTO working.test_overlay_k VALUES + (100, 10.0, 'SK', 1, NULL), + (100, 10.0, 'CO', 1, 1), + (100, 20.0, 'BT', NULL, 1), + (100, 30.0, 'CO', 0, NULL)") invisible(frs_habitat_overlay(conn, - to = "working.test_known_h", - from = "working.test_known_k", - species = "CO", + from = "working.test_overlay_k", + to = "working.test_overlay_h", + species = c("SK", "CO", "BT"), habitat_types = c("spawning", "rearing"), verbose = FALSE)) result <- DBI::dbGetQuery(conn, - "SELECT id_segment, spawning, rearing - FROM working.test_known_h ORDER BY id_segment") - - expect_equal(result$spawning, c(TRUE, TRUE, FALSE)) - expect_equal(result$rearing, c(FALSE, TRUE, FALSE)) + "SELECT id_segment, species_code, spawning, rearing + FROM working.test_overlay_h ORDER BY id_segment") + + # SK seg 1: spawning=1 -> TRUE; rearing NULL -> stays FALSE + expect_equal(result$spawning[result$id_segment == 1L], TRUE) + expect_equal(result$rearing[result$id_segment == 1L], FALSE) + # CO seg 2: both flip TRUE + expect_equal(result$spawning[result$id_segment == 2L], TRUE) + expect_equal(result$rearing[result$id_segment == 2L], TRUE) + # BT seg 3: spawning NULL -> stays; rearing=1 -> TRUE + expect_equal(result$spawning[result$id_segment == 3L], FALSE) + expect_equal(result$rearing[result$id_segment == 3L], TRUE) + # CO seg 4: spawning=0 -> stays FALSE (additive guard); rearing NULL -> stays + expect_equal(result$spawning[result$id_segment == 4L], FALSE) + expect_equal(result$rearing[result$id_segment == 4L], FALSE) }) -test_that("integration: already-TRUE rows are not re-touched (additive-only)", { +test_that("integration: text 'TRUE'/'true'/'t' indicators flip", { skip_if_not(.frs_db_available(), "DB not available") conn <- frs_db_conn() on.exit({ - .frs_test_drop(conn, "working.test_known_h3") - .frs_test_drop(conn, "working.test_known_k3") + .frs_test_drop(conn, "working.test_overlay_txt_h") + .frs_test_drop(conn, "working.test_overlay_txt_k") DBI::dbDisconnect(conn) }) DBI::dbExecute(conn, - "CREATE TABLE working.test_known_h3 ( + "CREATE TABLE working.test_overlay_txt_h ( id_segment integer, species_code text, blue_line_key bigint, downstream_route_measure double precision, spawning boolean, rearing boolean, lake_rearing boolean, wetland_rearing boolean)") - # Both segments START as spawning = TRUE DBI::dbExecute(conn, - "INSERT INTO working.test_known_h3 VALUES - (1, 'CO', 100, 10.0, TRUE, FALSE, FALSE, FALSE), - (2, 'CO', 100, 20.0, TRUE, FALSE, FALSE, FALSE)") + "INSERT INTO working.test_overlay_txt_h VALUES + (1, 'CO', 100, 10.0, FALSE, FALSE, FALSE, FALSE), + (2, 'CO', 100, 20.0, FALSE, FALSE, FALSE, FALSE), + (3, 'CO', 100, 30.0, FALSE, FALSE, FALSE, FALSE), + (4, 'CO', 100, 40.0, FALSE, FALSE, FALSE, FALSE)") + DBI::dbExecute(conn, - "CREATE TABLE working.test_known_k3 ( + "CREATE TABLE working.test_overlay_txt_k ( blue_line_key bigint, downstream_route_measure double precision, - spawning_co boolean)") - # Known says both are spawning. The additive guard should make the - # UPDATE skip both rows (already TRUE), returning rowcount 0. + species_code text, spawning text, rearing text)") DBI::dbExecute(conn, - "INSERT INTO working.test_known_k3 VALUES - (100, 10.0, TRUE), - (100, 20.0, TRUE)") + "INSERT INTO working.test_overlay_txt_k VALUES + (100, 10.0, 'CO', 'TRUE', 'false'), + (100, 20.0, 'CO', 'true', 'f'), + (100, 30.0, 'CO', 't', ''), + (100, 40.0, 'CO', 'maybe', NULL)") - # capture rowcount via verbose path: the cat() output reports - # `/: segments flipped`. Use direct dbExecute to read - # the matching count. invisible(frs_habitat_overlay(conn, - to = "working.test_known_h3", - from = "working.test_known_k3", - species = "CO", habitat_types = "spawning", + from = "working.test_overlay_txt_k", + to = "working.test_overlay_txt_h", + species = "CO", + habitat_types = c("spawning", "rearing"), verbose = FALSE)) - # Both rows should still be TRUE; nothing flipped — additive guard - # held. Verifying by query. result <- DBI::dbGetQuery(conn, - "SELECT spawning FROM working.test_known_h3 ORDER BY id_segment") - expect_equal(result$spawning, c(TRUE, TRUE)) + "SELECT id_segment, spawning, rearing + FROM working.test_overlay_txt_h ORDER BY id_segment") + # Seg 1-3: 'TRUE'/'true'/'t' all flip spawning; 'false'/'f'/'' do not + expect_equal(result$spawning, c(TRUE, TRUE, TRUE, FALSE)) + expect_equal(result$rearing, c(FALSE, FALSE, FALSE, FALSE)) }) -test_that("integration: idempotent on second call", { +test_that("integration: already-TRUE rows are not re-touched (additive-only)", { skip_if_not(.frs_db_available(), "DB not available") conn <- frs_db_conn() on.exit({ - .frs_test_drop(conn, "working.test_known_h2") - .frs_test_drop(conn, "working.test_known_k2") + .frs_test_drop(conn, "working.test_overlay_addh") + .frs_test_drop(conn, "working.test_overlay_addk") DBI::dbDisconnect(conn) }) - # Note: assumes `working` schema already exists. Other fresh - # integration tests rely on the same convention; CREATE SCHEMA - # requires DB-level DDL which the shared connection lacks. DBI::dbExecute(conn, - "CREATE TABLE working.test_known_h2 ( + "CREATE TABLE working.test_overlay_addh ( id_segment integer, species_code text, blue_line_key bigint, downstream_route_measure double precision, spawning boolean, rearing boolean, lake_rearing boolean, wetland_rearing boolean)") DBI::dbExecute(conn, - "INSERT INTO working.test_known_h2 VALUES - (1, 'CO', 100, 10.0, FALSE, FALSE, FALSE, FALSE)") + "INSERT INTO working.test_overlay_addh VALUES + (1, 'CO', 100, 10.0, TRUE, FALSE, FALSE, FALSE), + (2, 'CO', 100, 20.0, TRUE, FALSE, FALSE, FALSE)") DBI::dbExecute(conn, - "CREATE TABLE working.test_known_k2 ( + "CREATE TABLE working.test_overlay_addk ( blue_line_key bigint, downstream_route_measure double precision, - spawning_co boolean)") + species_code text, spawning integer)") DBI::dbExecute(conn, - "INSERT INTO working.test_known_k2 VALUES (100, 10.0, TRUE)") + "INSERT INTO working.test_overlay_addk VALUES + (100, 10.0, 'CO', 1), + (100, 20.0, 'CO', 1)") invisible(frs_habitat_overlay(conn, - from = "working.test_known_k2", - to = "working.test_known_h2", - species = "CO", habitat_types = "spawning", verbose = FALSE)) - invisible(frs_habitat_overlay(conn, - from = "working.test_known_k2", - to = "working.test_known_h2", - species = "CO", habitat_types = "spawning", verbose = FALSE)) + to = "working.test_overlay_addh", + from = "working.test_overlay_addk", + species = "CO", habitat_types = "spawning", + verbose = FALSE)) + # Both stay TRUE; the additive guard means no re-flip happened. result <- DBI::dbGetQuery(conn, - "SELECT spawning FROM working.test_known_h2") - expect_true(result$spawning) -}) - -# -- Long format ----------------------------------------------------------- - -test_that("long format: required columns checked up front", { - fake <- structure(list(), class = "DBIConnection") - mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", - mk_dbq("CO", TBL_DEFAULT, - c("blue_line_key", "downstream_route_measure"))) # missing species_code, habitat_type, habitat_ind - expect_error( - frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", - species = "CO", format = "long"), - "missing required columns") + "SELECT spawning FROM working.test_overlay_addh ORDER BY id_segment") + expect_equal(result$spawning, c(TRUE, TRUE)) }) -test_that("long format: SQL filters by species_code + habitat_type", { - fake <- structure(list(), class = "DBIConnection") - captured <- list() - mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", - mk_dbq("CO", TBL_DEFAULT, - c("blue_line_key", "downstream_route_measure", - "species_code", "habitat_type", "habitat_ind"))) - mockery::stub(frs_habitat_overlay, ".frs_db_execute", function(conn, sql) { - captured[[length(captured) + 1L]] <<- sql; 0L - }) - invisible(frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", - species = "CO", habitat_types = "spawning", - format = "long", verbose = FALSE)) - - expect_length(captured, 1) - sql <- captured[[1]] - expect_match(sql, "h\\.species_code = 'CO'") - expect_match(sql, "k\\.species_code = 'CO'") - expect_match(sql, "k\\.habitat_type = 'spawning'") - expect_match(sql, "lower\\(trim\\(k\\.habitat_ind::text\\)\\) IN \\('true', 't'\\)") - # Same additive guard - expect_match(sql, "AND \\(h\\.spawning IS NULL OR h\\.spawning = FALSE\\)") -}) +# -- Bridge: range-containment 3-way join ------------------------------------- -test_that("long format: custom long_value_col interpolated", { +test_that("bridge requires schema-qualified name", { fake <- structure(list(), class = "DBIConnection") - captured <- character(0) mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", - mk_dbq("CO", TBL_DEFAULT, - c("blue_line_key", "downstream_route_measure", - "species_code", "habitat_type", "is_known"))) - mockery::stub(frs_habitat_overlay, ".frs_db_execute", function(conn, sql) { - captured <<- c(captured, sql); 0L - }) - frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", - species = "CO", habitat_types = "spawning", - format = "long", long_value_col = "is_known", verbose = FALSE) - - expect_match(captured, "lower\\(trim\\(k\\.is_known::text\\)\\) IN \\(") - expect_no_match(captured, "habitat_ind") -}) - -test_that("long format: malicious long_value_col rejected by validator", { - fake <- structure(list(), class = "DBIConnection") + mk_dbq("CO", TBL_DEFAULT, SRC_DEFAULT)) expect_error( frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", - format = "long", long_value_col = "bad'; DROP --")) + bridge = "no_schema", species = "CO"), + "schema-qualified") }) -# -- Integration: long format with text TRUE/FALSE indicator ----------------- - -test_that("integration (long): text 'TRUE' indicator flips segment", { +test_that("bridge: missing required columns errors clearly", { skip_if_not(.frs_db_available(), "DB not available") conn <- frs_db_conn() on.exit({ - .frs_test_drop(conn, "working.test_known_long_h") - .frs_test_drop(conn, "working.test_known_long_k") + .frs_test_drop(conn, "working.test_bridge_h") + .frs_test_drop(conn, "working.test_bridge_s") + .frs_test_drop(conn, "working.test_bridge_k") DBI::dbDisconnect(conn) }) - DBI::dbExecute(conn, - "CREATE TABLE working.test_known_long_h ( + "CREATE TABLE working.test_bridge_h ( id_segment integer, species_code text, - blue_line_key bigint, downstream_route_measure double precision, spawning boolean, rearing boolean, lake_rearing boolean, wetland_rearing boolean)") DBI::dbExecute(conn, - "INSERT INTO working.test_known_long_h VALUES - (1, 'CO', 100, 10.0, FALSE, FALSE, FALSE, FALSE), - (2, 'CO', 100, 20.0, FALSE, FALSE, FALSE, FALSE)") - - # Long-format known table: text indicator + "INSERT INTO working.test_bridge_h VALUES (1, 'CO', FALSE, FALSE, FALSE, FALSE)") DBI::dbExecute(conn, - "CREATE TABLE working.test_known_long_k ( - blue_line_key bigint, downstream_route_measure double precision, - species_code text, habitat_type text, habitat_ind text)") + "CREATE TABLE working.test_bridge_s (id_segment integer)") # missing range cols DBI::dbExecute(conn, - "INSERT INTO working.test_known_long_k VALUES - (100, 10.0, 'CO', 'spawning', 'TRUE'), - (100, 20.0, 'CO', 'spawning', 'FALSE')") - - invisible(frs_habitat_overlay(conn, - to = "working.test_known_long_h", - from = "working.test_known_long_k", - species = "CO", habitat_types = "spawning", - format = "long", verbose = FALSE)) - - result <- DBI::dbGetQuery(conn, - "SELECT id_segment, spawning FROM working.test_known_long_h ORDER BY id_segment") - # Segment 1 flips (habitat_ind = 'TRUE'); segment 2 stays (habitat_ind = 'FALSE') - expect_equal(result$spawning, c(TRUE, FALSE)) -}) - -test_that("integration (long): boolean-typed indicator works", { - # Locks in the contract that long format accepts a boolean column, - # not just text. Postgres casts boolean to text as 'true'/'false' - # which the SQL filter normalises to lowercase before matching. - skip_if_not(.frs_db_available(), "DB not available") - conn <- frs_db_conn() - on.exit({ - .frs_test_drop(conn, "working.test_known_long_bool_h") - .frs_test_drop(conn, "working.test_known_long_bool_k") - DBI::dbDisconnect(conn) - }) - - DBI::dbExecute(conn, - "CREATE TABLE working.test_known_long_bool_h ( - id_segment integer, species_code text, + "CREATE TABLE working.test_bridge_k ( blue_line_key bigint, downstream_route_measure double precision, - spawning boolean, rearing boolean, - lake_rearing boolean, wetland_rearing boolean)") - DBI::dbExecute(conn, - "INSERT INTO working.test_known_long_bool_h VALUES - (1, 'CO', 100, 10.0, FALSE, FALSE, FALSE, FALSE), - (2, 'CO', 100, 20.0, FALSE, FALSE, FALSE, FALSE)") - - # Long-format known table: BOOLEAN indicator (not text) - DBI::dbExecute(conn, - "CREATE TABLE working.test_known_long_bool_k ( - blue_line_key bigint, downstream_route_measure double precision, - species_code text, habitat_type text, habitat_ind boolean)") - DBI::dbExecute(conn, - "INSERT INTO working.test_known_long_bool_k VALUES - (100, 10.0, 'CO', 'spawning', TRUE), - (100, 20.0, 'CO', 'spawning', FALSE)") - - invisible(frs_habitat_overlay(conn, - to = "working.test_known_long_bool_h", - from = "working.test_known_long_bool_k", - species = "CO", habitat_types = "spawning", - format = "long", verbose = FALSE)) - - result <- DBI::dbGetQuery(conn, - "SELECT id_segment, spawning FROM working.test_known_long_bool_h ORDER BY id_segment") - expect_equal(result$spawning, c(TRUE, FALSE)) -}) - -# -- Bridge: range-containment 3-way join ------------------------------------- - -test_that("bridge requires schema-qualified name", { - fake <- structure(list(), class = "DBIConnection") - mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", - mk_dbq("CO", TBL_DEFAULT, - c("blue_line_key", "downstream_route_measure", "spawning_co"))) - expect_error( - frs_habitat_overlay(fake, from = "ws.k", to = "ws.h", - bridge = "no_schema", species = "CO"), - "schema-qualified") -}) + upstream_route_measure double precision, + species_code text, spawning integer, rearing integer, + lake_rearing integer, wetland_rearing integer)") -test_that("bridge: missing required columns errors clearly", { - fake <- structure(list(), class = "DBIConnection") - # Helper that returns different cols depending on which info_schema query - mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", function(conn, sql) { - if (grepl("DISTINCT species_code", sql)) { - data.frame(species_code = "CO") - } else if (grepl("table_name = 'h'", sql)) { - data.frame(column_name = TBL_DEFAULT) - } else if (grepl("table_name = 'k'", sql)) { - data.frame(column_name = c("blue_line_key", "downstream_route_measure", - "spawning_co")) - } else { - data.frame(column_name = c("id_segment")) # bridge missing range cols - } - }) expect_error( - frs_habitat_overlay(fake, - from = "ws.k", to = "ws.h", bridge = "ws.s", - species = "CO"), - "bridge table.*missing required columns") -}) - -test_that("bridge: SQL emits 3-way range-containment join", { - fake <- structure(list(), class = "DBIConnection") - captured <- character(0) - mockery::stub(frs_habitat_overlay, "DBI::dbGetQuery", function(conn, sql) { - if (grepl("DISTINCT species_code", sql)) { - data.frame(species_code = "CO") - } else if (grepl("table_name = 'h'", sql)) { - data.frame(column_name = TBL_DEFAULT) - } else if (grepl("table_name = 'k'", sql)) { - data.frame(column_name = c("blue_line_key", "downstream_route_measure", - "spawning_co")) - } else { - data.frame(column_name = c("id_segment", "blue_line_key", - "downstream_route_measure", - "upstream_route_measure")) - } - }) - mockery::stub(frs_habitat_overlay, ".frs_db_execute", function(conn, sql) { - captured <<- c(captured, sql); 0L - }) - - frs_habitat_overlay(fake, - from = "ws.k", to = "ws.h", bridge = "ws.s", - species = "CO", habitat_types = "spawning", verbose = FALSE) - - expect_length(captured, 1) - sql <- captured[[1]] - expect_match(sql, "FROM ws\\.s AS s, ws\\.k AS k") - expect_match(sql, "h\\.id_segment = s\\.id_segment") - expect_match(sql, "s\\.blue_line_key = k\\.blue_line_key") - expect_match(sql, "s\\.downstream_route_measure >= k\\.downstream_route_measure") - expect_match(sql, "s\\.upstream_route_measure <= k\\.upstream_route_measure") + frs_habitat_overlay(conn, + from = "working.test_bridge_k", + to = "working.test_bridge_h", + bridge = "working.test_bridge_s", + species = "CO", verbose = FALSE), + "missing required columns") }) test_that("integration (bridge): range-containment flips contained segments", { skip_if_not(.frs_db_available(), "DB not available") conn <- frs_db_conn() on.exit({ - .frs_test_drop(conn, "working.test_bridge_to") - .frs_test_drop(conn, "working.test_bridge_from") - .frs_test_drop(conn, "working.test_bridge_streams") + .frs_test_drop(conn, "working.test_br_h") + .frs_test_drop(conn, "working.test_br_s") + .frs_test_drop(conn, "working.test_br_k") DBI::dbDisconnect(conn) }) - # Target: streams_habitat-style, keyed by id_segment only + # Target keyed by id_segment only DBI::dbExecute(conn, - "CREATE TABLE working.test_bridge_to ( + "CREATE TABLE working.test_br_h ( id_segment integer, species_code text, spawning boolean, rearing boolean, lake_rearing boolean, wetland_rearing boolean)") - # Three segments on BLK 100: drm 0-50, 50-100, 100-200 DBI::dbExecute(conn, - "INSERT INTO working.test_bridge_to VALUES - (1, 'CO', FALSE, FALSE, FALSE, FALSE), - (2, 'CO', FALSE, FALSE, FALSE, FALSE), - (3, 'CO', FALSE, FALSE, FALSE, FALSE)") + "INSERT INTO working.test_br_h VALUES + (1, 'SK', FALSE, FALSE, FALSE, FALSE), + (2, 'SK', FALSE, FALSE, FALSE, FALSE), + (3, 'SK', FALSE, FALSE, FALSE, FALSE)") - # Bridge: streams network with id_segment + ranges + # Bridge: id_segment + range DBI::dbExecute(conn, - "CREATE TABLE working.test_bridge_streams ( + "CREATE TABLE working.test_br_s ( id_segment integer, blue_line_key bigint, downstream_route_measure double precision, upstream_route_measure double precision)") DBI::dbExecute(conn, - "INSERT INTO working.test_bridge_streams VALUES - (1, 100, 0.0, 50.0), - (2, 100, 50.0, 100.0), - (3, 100, 100.0, 200.0)") - - # From: long-format, says CO spawning over drm [40, 110] - # — should include segment 2 (50-100) only; - # segment 1 starts at 0 (< 40) so urm 50 > drm 40 means it's - # PARTIALLY before — fails containment (drm 0 < 40), so excluded; - # segment 3 ends at 200 > 110, also fails containment. + "INSERT INTO working.test_br_s VALUES + (1, 100, 0, 50), + (2, 100, 50, 100), + (3, 100, 100, 200)") + + # Source: range [40, 110] for SK spawning DBI::dbExecute(conn, - "CREATE TABLE working.test_bridge_from ( - blue_line_key bigint, - downstream_route_measure double precision, + "CREATE TABLE working.test_br_k ( + blue_line_key bigint, downstream_route_measure double precision, upstream_route_measure double precision, - species_code text, habitat_type text, habitat_ind text)") + species_code text, spawning integer, rearing integer, + lake_rearing integer, wetland_rearing integer)") DBI::dbExecute(conn, - "INSERT INTO working.test_bridge_from VALUES - (100, 40.0, 110.0, 'CO', 'spawning', 'TRUE')") + "INSERT INTO working.test_br_k VALUES + (100, 40.0, 110.0, 'SK', 1, NULL, NULL, NULL)") invisible(frs_habitat_overlay(conn, - from = "working.test_bridge_from", - to = "working.test_bridge_to", - bridge = "working.test_bridge_streams", - species = "CO", habitat_types = "spawning", - format = "long", verbose = FALSE)) + from = "working.test_br_k", + to = "working.test_br_h", + bridge = "working.test_br_s", + species = "SK", + verbose = FALSE)) result <- DBI::dbGetQuery(conn, - "SELECT id_segment, spawning FROM working.test_bridge_to ORDER BY id_segment") - # Segment 2 (50-100) is the only one fully contained in [40, 110] + "SELECT id_segment, spawning, rearing + FROM working.test_br_h ORDER BY id_segment") + # Only segment 2 (50-100) is fully contained in [40, 110] expect_equal(result$spawning, c(FALSE, TRUE, FALSE)) + expect_equal(result$rearing, c(FALSE, FALSE, FALSE)) })