From 54b288a3600a619ae6438048f2f4612feec6948f Mon Sep 17 00:00:00 2001 From: almac2022 Date: Thu, 30 Apr 2026 08:20:40 -0700 Subject: [PATCH] Two bcfishpass-parity fixes: cluster phase-1 + trace_downstream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #186 — frs_cluster phase-1 + confluence-boost over-credits clusters .frs_cluster_both previously excluded on-spawning rearing segments from the clustering CTE. Removing those segments could shift cluster_minimums into the confluence-boost zone (DRM less than confluence_m) and validate clusters that bridge_gradient should deny. bcfishpass keeps on-spawning segments in clusters so its cluster_min stays high and the confluence boost does not fire. Phase-1 protection now lives only in the final UPDATE: on-spawning segments retain label_cluster TRUE regardless of cluster outcome, but cluster boundaries used for phase-2 / phase-3 testing are unchanged. Reproduction case: link MORR ST cluster 502 (Nado Creek + tributary 360704379, 12.58 percent gradient between trib confluence and downstream spawning) — bcfp denies the trib; fresh now matches. Fixes #187 — frs_trace_downstream FWA-averaged gradient hides barriers .frs_trace_downstream previously used whse_basemapping.fwa_downstreamtrace which iterates FWA-original linear_feature_id rows with feature-averaged gradients. Localized barriers on a sub-piece of a long FWA feature (e.g. a 7 m, 84 percent lake-outlet drop inside an otherwise flat 3 km feature) are invisible to that approach. Switched to a FWA_Downstream predicate join against the broken streams table arg — same pattern .frs_cluster_both phase-3 already uses. Localized gradients produced by frs_network_segment are now visible to the trace. New required columns in origins_sql: wscode_ltree, localcode_ltree (.frs_connected_waterbody is the only caller; updated). Reproduction case: link KISP SK at Kitwancool Lake (waterbody_key 329229702, area 780.6 ha). bcfp s load_habitat_linear_sk correctly stops at the lake-outlet drop; fresh now matches. Verified end-to-end: KISP SK spawning rollup pre-fix +42.3 percent, post-fix 0.0 percent exact match. Pre-existing test failure on test-frs_params.R:92 is unrelated to these changes (was failing on main before this branch). Version bump 0.24.1 -> 0.25.0. Co-Authored-By: Claude Opus 4.7 (1M context) --- DESCRIPTION | 2 +- NEWS.md | 10 +++++++++ R/frs_cluster.R | 51 +++++++++++++++++++++++++++++------------- R/frs_habitat.R | 59 +++++++++++++++++++++++++++++++++++-------------- 4 files changed, 90 insertions(+), 32 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5664182..7d4d804 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: fresh Title: Freshwater Referenced Spatial Hydrology -Version: 0.24.1 +Version: 0.25.0 Authors@R: c( person("Allan", "Irvine", , "al@newgraphenvironment.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-3495-2128")), diff --git a/NEWS.md b/NEWS.md index 14ecc66..a851f07 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# fresh 0.25.0 + +Two bcfishpass-parity fixes surfaced during link's WSG-coverage expansion (link's MORR + KISP runs, 2026-04-30). + +**`frs_cluster` phase-1 + confluence-boost interaction** ([#186](https://github.com/NewGraphEnvironment/fresh/issues/186)). `.frs_cluster_both` previously excluded on-spawning rearing segments from the clustering CTE. Removing those segments could shift `cluster_minimums` into the confluence-boost zone (DRM < `confluence_m`) and validate clusters that `bridge_gradient` should deny. bcfishpass's path-2 keeps on-spawning segments in clusters, so its cluster_min stays high and the confluence boost doesn't fire. Phase-1 protection now lives only in the final `UPDATE` step — on-spawning segments retain `label_cluster = TRUE` regardless of cluster outcome, but the cluster boundaries used for phase-2 / phase-3 testing are unchanged. Reproduction case: MORR ST cluster 502 (Nado Creek + tributary 360704379, 12.58% gradient between trib confluence and downstream spawning) — bcfp denies the trib's rearing-eligible segments; fresh now matches. + +**`.frs_trace_downstream` averaged FWA gradient** ([#187](https://github.com/NewGraphEnvironment/fresh/issues/187)). `.frs_trace_downstream` previously used `whse_basemapping.fwa_downstreamtrace`, an iterator returning FWA-original `linear_feature_id` rows with feature-averaged gradients. Localized barriers on a sub-piece of a long FWA feature (e.g. a 7 m, 84% lake-outlet drop inside an otherwise flat 3 km feature) are invisible to that approach. Switched to a `FWA_Downstream` predicate join against the broken streams `table` arg — same pattern `.frs_cluster_both` phase-3 already uses. Localized gradients produced by `frs_network_segment()` are now visible to the trace. New required columns in `origins_sql`: `wscode_ltree`, `localcode_ltree` (`.frs_connected_waterbody` is the only caller; updated). Reproduction case: KISP SK at Kitwancool Lake — bcfp's `model/02_habitat_linear/sql/load_habitat_linear_sk.sql` correctly stops at the lake-outlet drop; fresh now matches. + +Both fixes tighten link's bcfishpass-config parity rollup. No API changes for end users; `.frs_trace_downstream` is an internal helper. NEWS bump from 0.24.1 → 0.25.0 because behaviour changes are observable in `frs_habitat_classify` / `frs_cluster` / `frs_habitat` outputs for any caller running connectivity or cluster-aware classifications. + # fresh 0.24.1 Refresh bundled `inst/extdata/parameters_habitat_rules.yaml` to match link's current default-bundle output. The bundled sample had drifted to pre-`in_waterbody` / pre-`area_only` shape (last synced 2026-04-12) while link's bundles evolved through three #69 phases. Sync recovers the canonical sample for any `frs_habitat()` caller using fresh's bundled defaults. diff --git a/R/frs_cluster.R b/R/frs_cluster.R index 3f1d349..3a175a2 100644 --- a/R/frs_cluster.R +++ b/R/frs_cluster.R @@ -411,19 +411,25 @@ frs_cluster <- function(conn, table, habitat, tmp_clusters <- sprintf("pg_temp.frs_clusters_%s", gsub("[^a-z0-9]", "", tolower(species))) - # Phase 1: on-spawning segments — always valid, excluded from clustering + # Phase 1: on-spawning segments — always valid (always retained as + # `label_cluster = TRUE` regardless of cluster validation outcome). + # + # Earlier this function excluded phase-1 segments from the clustering + # CTE itself, on the theory that they "didn't need" to participate. + # That introduced a parity defect with bcfishpass: removing on-spawning + # segments from clusters can shift `cluster_minimums` into the + # confluence-boost zone (DRM < confluence_m) and re-validate clusters + # that bridge_gradient should deny — see fresh#186 for the MORR ST + # repro. bcfishpass keeps on-spawning segments in clusters; we now + # match. + # + # Phase-1 protection moves to the final UPDATE: phase-1 segments are + # excluded from the rearing-strip step regardless of cluster outcome. phase1_ids <- DBI::dbGetQuery(conn, sprintf( "SELECT h.id_segment FROM %s h WHERE h.species_code = %s AND h.%s IS TRUE AND h.%s IS TRUE", habitat, sp_quoted, label_cluster, label_connect))$id_segment - # Cluster remaining (non-phase1) segments - phase1_filter <- if (length(phase1_ids) > 0) { - sprintf("AND h.id_segment NOT IN (%s)", paste(phase1_ids, collapse = ", ")) - } else { - "" - } - .frs_db_execute(conn, sprintf("DROP TABLE IF EXISTS %s", tmp_clusters)) .frs_db_execute(conn, sprintf( "CREATE TEMP TABLE %s AS @@ -437,8 +443,8 @@ frs_cluster <- function(conn, table, habitat, FROM %s h INNER JOIN %s s ON h.id_segment = s.id_segment WHERE h.species_code = %s - AND h.%s IS TRUE %s", - tmp_clusters, habitat, table, sp_quoted, label_cluster, phase1_filter)) + AND h.%s IS TRUE", + tmp_clusters, habitat, table, sp_quoted, label_cluster)) .frs_db_execute(conn, sprintf( "CREATE INDEX ON %s (cluster_id)", tmp_clusters)) @@ -548,14 +554,27 @@ frs_cluster <- function(conn, table, habitat, # Union: valid in Phase 2 OR Phase 3 all_valid <- unique(c(valid_phase2, valid_phase3)) - # UPDATE: set FALSE for clustered segments NOT valid in either phase + # UPDATE: set FALSE for clustered segments NOT valid in either phase. + # Phase-1 segments (on-spawning rearing) are always retained — they + # represent segments where label_cluster and label_connect are both + # TRUE, which is the unambiguous "this segment is itself the + # connection" case. Excluded from the strip regardless of cluster + # outcome. + phase1_protect <- if (length(phase1_ids) > 0) { + sprintf("AND h.id_segment NOT IN (%s)", + paste(as.integer(phase1_ids), collapse = ", ")) + } else { + "" + } + if (length(all_valid) == 0) { .frs_db_execute(conn, sprintf( "UPDATE %s h SET %s = FALSE FROM %s c WHERE h.id_segment = c.id_segment - AND h.species_code = %s", - habitat, label_cluster, tmp_clusters, sp_quoted)) + AND h.species_code = %s + %s", + habitat, label_cluster, tmp_clusters, sp_quoted, phase1_protect)) } else { valid_list <- paste(as.integer(all_valid), collapse = ", ") .frs_db_execute(conn, sprintf( @@ -563,8 +582,10 @@ frs_cluster <- function(conn, table, habitat, FROM %s c WHERE h.id_segment = c.id_segment AND h.species_code = %s - AND c.cluster_id NOT IN (%s)", - habitat, label_cluster, tmp_clusters, sp_quoted, valid_list)) + AND c.cluster_id NOT IN (%s) + %s", + habitat, label_cluster, tmp_clusters, sp_quoted, valid_list, + phase1_protect)) } .frs_db_execute(conn, sprintf("DROP TABLE IF EXISTS %s", tmp_clusters)) diff --git a/R/frs_habitat.R b/R/frs_habitat.R index 89825bc..b09cd19 100644 --- a/R/frs_habitat.R +++ b/R/frs_habitat.R @@ -1348,20 +1348,41 @@ frs_habitat_species <- function(conn, species_code, base_tbl, breaks, #' @noRd #' Trace downstream from origins with distance cap and gradient stop #' -#' Given a set of origin points (waterbody outlets), trace downstream via -#' `fwa_downstreamtrace`, accumulate distance per origin group, cap at -#' `distance_max`, and stop at the first segment exceeding `gradient_max`. +#' Given a set of origin points (waterbody outlets), trace downstream +#' via `FWA_Downstream` predicate join against the broken streams +#' `table`, accumulate distance per origin group, cap at `distance_max`, +#' and stop at the first segment whose gradient exceeds `gradient_max`. #' Returns qualifying `linear_feature_id`s into a target table. #' +#' Earlier this function used `whse_basemapping.fwa_downstreamtrace`, +#' which iterates FWA-original `fwa_stream_networks_sp` rows with their +#' feature-averaged gradients. Localized barriers on a sub-piece of a +#' long FWA feature (e.g. a 7 m, 84% lake-outlet drop inside an +#' otherwise flat 3 km feature) are invisible to that approach because +#' the gradient is averaged across the whole feature. Switching to a +#' predicate join against the broken streams table preserves the +#' localized gradient values that `frs_network_segment()` produced — +#' see fresh#187 for the KISP SK repro at Kitwancool Lake. +#' +#' Mainstem only (`blue_line_key = watershed_key`) — matches the bcfp +#' convention and avoids side-channel double counting. +#' #' @param conn DBI connection. +#' @param table Character. Schema-qualified broken-streams table to +#' trace against. Must carry `blue_line_key`, `downstream_route_measure`, +#' `wscode_ltree`, `localcode_ltree`, `watershed_key`, `length_metre`, +#' `gradient`, and `linear_feature_id`. #' @param origins_sql Character. SQL that produces columns: `origin_id` -#' (grouping key), `blue_line_key`, `downstream_route_measure`. +#' (grouping key), `blue_line_key`, `downstream_route_measure`, +#' `wscode_ltree`, `localcode_ltree`. (`wscode_ltree`/`localcode_ltree` +#' are new requirements compared to the previous interface — callers +#' must SELECT them.) #' @param target Character. Table to INSERT `linear_feature_id` results into. #' @param distance_max Numeric. Maximum cumulative trace distance (metres). -#' @param gradient_max Numeric. Gradient threshold — trace stops at the first -#' segment exceeding this value. +#' @param gradient_max Numeric. Gradient threshold — trace stops at the +#' first segment exceeding this value. #' @noRd -.frs_trace_downstream <- function(conn, origins_sql, target, +.frs_trace_downstream <- function(conn, table, origins_sql, target, distance_max, gradient_max) { dm <- .frs_sql_num(distance_max) gm <- .frs_sql_num(gradient_max) @@ -1371,28 +1392,31 @@ frs_habitat_species <- function(conn, species_code, base_tbl, breaks, WITH origins AS (%s), downstream AS ( SELECT o.origin_id, - t.linear_feature_id, t.gradient, t.wscode, + t.linear_feature_id, t.gradient, t.wscode_ltree, t.downstream_route_measure, -t.length_metre + SUM(t.length_metre) OVER ( PARTITION BY o.origin_id - ORDER BY t.wscode DESC, t.downstream_route_measure DESC + ORDER BY t.wscode_ltree DESC, t.downstream_route_measure DESC ) AS dist_to_origin FROM origins o - CROSS JOIN LATERAL whse_basemapping.fwa_downstreamtrace( - o.blue_line_key, o.downstream_route_measure) t + INNER JOIN %s t ON FWA_Downstream( + o.blue_line_key, o.downstream_route_measure, + o.wscode_ltree, o.localcode_ltree, + t.blue_line_key, t.downstream_route_measure, + t.wscode_ltree, t.localcode_ltree) WHERE t.blue_line_key = t.watershed_key ), downstream_capped AS ( SELECT row_number() OVER ( PARTITION BY origin_id - ORDER BY wscode DESC, downstream_route_measure DESC + ORDER BY wscode_ltree DESC, downstream_route_measure DESC ) AS rn, * FROM downstream WHERE dist_to_origin < %s ), nearest_barrier AS ( SELECT DISTINCT ON (origin_id) * FROM downstream_capped WHERE gradient > %s - ORDER BY origin_id, wscode DESC, downstream_route_measure DESC + ORDER BY origin_id, wscode_ltree DESC, downstream_route_measure DESC ), valid_downstream AS ( SELECT d.linear_feature_id FROM downstream_capped d @@ -1400,7 +1424,7 @@ frs_habitat_species <- function(conn, species_code, base_tbl, breaks, WHERE nb.rn IS NULL OR d.rn < nb.rn ) SELECT DISTINCT linear_feature_id FROM valid_downstream", - target, origins_sql, dm, gm)) + target, origins_sql, table, dm, gm)) } @@ -1466,10 +1490,13 @@ frs_habitat_species <- function(conn, species_code, base_tbl, breaks, "CREATE TEMP TABLE %s (linear_feature_id bigint)", lfid_tbl)) # Phase 1: Downstream trace from waterbody outlets + # origins_sql now also yields wscode_ltree + localcode_ltree, required + # by the predicate-join trace (fresh#187). origins_sql <- sprintf( "SELECT DISTINCT ON (s2.waterbody_key) s2.waterbody_key AS origin_id, - s2.blue_line_key, s2.downstream_route_measure + s2.blue_line_key, s2.downstream_route_measure, + s2.wscode_ltree, s2.localcode_ltree FROM %s s2 INNER JOIN %s hr ON s2.id_segment = hr.id_segment WHERE hr.species_code = %s AND hr.rearing IS TRUE @@ -1477,7 +1504,7 @@ frs_habitat_species <- function(conn, species_code, base_tbl, breaks, s2.downstream_route_measure", table, habitat, sp_quoted) - .frs_trace_downstream(conn, origins_sql, lfid_tbl, + .frs_trace_downstream(conn, table, origins_sql, lfid_tbl, distance_max, bridge_gradient) # Map traced linear_feature_ids back to id_segments