diff --git a/DESCRIPTION b/DESCRIPTION index a979d8b..b5031c9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: link Title: Crossing Connectivity Interpretation -Version: 0.5.0 +Version: 0.6.0 Authors@R: person("Allan", "Irvine", , "airvine@newgraphenvironment.com", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index a354028..73548fd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,14 @@ +# link 0.6.0 + +Honour `user_barriers_definite_control.csv` at the observation-override step. + +- `lnk_barrier_overrides()` now excludes observations upstream of control-flagged positions from counting toward the override threshold, matching bcfishpass's access SQL. Previously controlled positions (concrete dams, long impassable falls, diversions) could be re-opened by upstream historical observations ([#44](https://github.com/NewGraphEnvironment/link/issues/44)). +- Gated per-species by a new `observation_control_apply` column in `parameters_fresh.csv` — TRUE for CH/CM/CO/PK/SK/ST; FALSE for BT/WCT; NA for CT/DV/RB. Residents routinely inhabit reaches upstream of anadromous-blocking falls (post-glacial headwater connectivity, no ocean-return requirement), so their observations still override. Matches bcfishpass's per-model application. +- Habitat-confirmation override path intentionally bypasses the control table — expert-confirmed habitat is higher-trust than observations, and bcfishpass's `hab_upstr` CTE has no control join either. +- `.lnk_pipeline_prep_overrides` now passes the control table to `lnk_barrier_overrides()` when the config manifest declares `barriers_definite_control`. Manifest key is the contract; no DB probe. +- `.lnk_pipeline_prep_load_aux` now always creates a schema-valid (possibly empty) `barriers_definite_control` table when the manifest declares the key — fixes an asymmetric gating bug that would have raised "relation does not exist" on AOIs with zero control rows. +- End-to-end validation WSG: DEAD (Deadman River) added to `data-raw/_targets.R`. It has a single `barrier_ind = TRUE` control row at FALLS (356361749, 45743) with six anadromous observations upstream and zero habitat coverage — the unique combination that actively exercises the filter. All four prior WSGs (ADMS/BULK/BABL/ELKR) were rescued by either the observation threshold or habitat path, making them parity checks rather than filter tests. + # link 0.5.0 Documentation and narrative for the targets pipeline. diff --git a/R/lnk_barrier_overrides.R b/R/lnk_barrier_overrides.R index cb36b92..b95e4c9 100644 --- a/R/lnk_barrier_overrides.R +++ b/R/lnk_barrier_overrides.R @@ -27,11 +27,20 @@ #' @param control Character or `NULL`. Schema-qualified table of barrier #' controls with columns: `blue_line_key`, `downstream_route_measure`, #' `barrier_ind`. Barriers in this table with `barrier_ind = TRUE` cannot -#' be overridden. +#' be overridden **by observations** — but only for species where +#' `params$observation_control_apply` is TRUE. Resident species routinely +#' inhabit reaches upstream of anadromous-blocking falls (post-glacial +#' connectivity, no ocean-return requirement), so their observations still +#' count unless this flag says otherwise. Habitat confirmations +#' (`habitat` argument) are higher-trust than observations — they bypass +#' the control table entirely, for all species. #' @param params Data frame with per-species parameters. Must have columns: #' `species_code`, `observation_threshold`, `observation_date_min`, -#' `observation_buffer_m`, `observation_species`. See -#' `configs/bcfishpass/parameters_fresh.csv` for format. +#' `observation_buffer_m`, `observation_species`. Optional column +#' `observation_control_apply` (logical) — when TRUE, the `control` table +#' blocks overrides for this species; when FALSE/NA/missing, the species +#' ignores control. Bcfishpass defaults: TRUE for CH/CM/CO/PK/SK/ST, +#' FALSE for BT/WCT. See `configs/bcfishpass/parameters_fresh.csv`. #' @param cols_index Character vector. Column names to index on the #' barriers table for `fwa_upstream()` performance. Indexes are created #' `IF NOT EXISTS`. Default `c("blue_line_key", "wscode_ltree", @@ -135,22 +144,36 @@ lnk_barrier_overrides <- function(conn, obs_sp_list <- if (is.na(obs_sp_str)) sp else trimws(strsplit(obs_sp_str, ";")[[1]]) obs_sp_sql <- paste0("'", obs_sp_list, "'", collapse = ", ") + # Species-level opt-in for the control filter. bcfishpass applies control + # only in the anadromous access models (CH/CM/CO/PK/SK, ST) — residents + # (BT, WCT) and sub-CT species routinely live upstream of anadromous + # barriers (post-glacial headwater connectivity, no ocean-return + # requirement), so their observations should still override. + ctrl_apply_col <- species_to_process$observation_control_apply[i] + ctrl_apply <- isTRUE(as.logical(ctrl_apply_col)) + overrides_found <- 0L - # Control table: any matching control row prevents the override. - # barrier_ind is used separately in barrier loading (true = keep, false = remove). - # Here we only care about presence — if a control row exists for this barrier - # position, observations/habitat don't override it. - ctrl_where <- if (!is.null(control)) { + # Control table: a matching control row with barrier_ind = TRUE + # blocks the override. `NOT EXISTS` (rather than a LEFT JOIN + filter) + # keeps two things right in one shot — the barrier is blocked only + # when at least one TRUE control row matches (mixed TRUE/FALSE within + # the 1 m tolerance resolves to "blocked"), and the outer GROUP BY / + # HAVING count(...) aggregation does not get row-multiplied by a join + # to control. Gated per-species by `observation_control_apply`. + ctrl_where <- "" + ctrl_filter <- if (!is.null(control) && ctrl_apply) { sprintf( - "LEFT JOIN %s c - ON b.blue_line_key = c.blue_line_key - AND abs(b.downstream_route_measure - c.downstream_route_measure) < 1", + "AND NOT EXISTS ( + SELECT 1 FROM %s c + WHERE c.blue_line_key = b.blue_line_key + AND abs(b.downstream_route_measure - c.downstream_route_measure) < 1 + AND c.barrier_ind::boolean = true + )", control) } else { "" } - ctrl_filter <- if (!is.null(control)) "AND c.blue_line_key IS NULL" else "" # --- Observation-based overrides (JOIN pattern, not correlated subquery) --- if (!is.null(observations) && threshold > 0) { @@ -192,6 +215,11 @@ lnk_barrier_overrides <- function(conn, } # --- Habitat confirmation overrides (any confirmed habitat upstream) --- + # Control filter intentionally NOT applied here. Expert-confirmed + # habitat is a higher-trust signal than the control table — by the + # time a reviewer has marked habitat as confirmed upstream of a + # position, they have already considered the barrier's passability. + # bcfishpass does the same: `hab_upstr` CTE has no control join. if (!is.null(habitat)) { sql <- sprintf( "INSERT INTO %s (blue_line_key, downstream_route_measure, species_code) @@ -204,18 +232,15 @@ lnk_barrier_overrides <- function(conn, ON s.blue_line_key = h.blue_line_key AND round(h.upstream_route_measure::numeric) >= round(s.downstream_route_measure::numeric) AND round(h.upstream_route_measure::numeric) <= round(s.upstream_route_measure::numeric) - %s WHERE whse_basemapping.fwa_upstream( b.blue_line_key, b.downstream_route_measure, b.wscode_ltree, b.localcode_ltree, h.blue_line_key, h.upstream_route_measure, s.wscode_ltree, s.localcode_ltree, false, 200) - %s ON CONFLICT DO NOTHING", to, sp, - barriers, habitat, obs_sp_sql, - ctrl_where, ctrl_filter) + barriers, habitat, obs_sp_sql) n <- DBI::dbExecute(conn, sql) overrides_found <- overrides_found + n diff --git a/R/lnk_pipeline_prepare.R b/R/lnk_pipeline_prepare.R index 2c4c922..4979e6a 100644 --- a/R/lnk_pipeline_prepare.R +++ b/R/lnk_pipeline_prepare.R @@ -125,7 +125,12 @@ lnk_pipeline_prepare <- function(conn, aoi, cfg, schema, downstream_route_measure double precision)", schema)) } - # --- Barriers-definite control (per-WSG, used to prune gradient barriers) --- + # --- Barriers-definite control (per-WSG, used to prune gradient barriers + # AND to lock positions against observation-based overrides). Mirror the + # barriers_definite pattern above — whenever the manifest declares the + # key, ensure a schema-valid table exists even if this AOI has zero rows, + # so downstream steps can gate on the manifest field rather than probing + # the DB. ctrl_all <- cfg$overrides$barriers_definite_control if (!is.null(ctrl_all)) { ctrl <- ctrl_all[ctrl_all$watershed_group_code == aoi, ] @@ -133,6 +138,14 @@ lnk_pipeline_prepare <- function(conn, aoi, cfg, schema, DBI::dbWriteTable(conn, DBI::Id(schema = schema, table = "barriers_definite_control"), ctrl, overwrite = TRUE) + } else { + .lnk_db_execute(conn, sprintf( + "DROP TABLE IF EXISTS %s.barriers_definite_control", schema)) + .lnk_db_execute(conn, sprintf( + "CREATE TABLE %s.barriers_definite_control ( + blue_line_key integer, + downstream_route_measure double precision, + barrier_ind text)", schema)) } } @@ -252,10 +265,21 @@ lnk_pipeline_prepare <- function(conn, aoi, cfg, schema, .lnk_quote_literal(schema))) habitat_arg <- if (nrow(habitat_exists) > 0) habitat_tbl else NULL + # Manifest-driven gating. `.lnk_pipeline_prep_load_aux` writes + # `.barriers_definite_control` exactly when this manifest key + # is declared on the config bundle, so the config field itself is the + # direct contract for whether control is in play — no DB probe needed. + control_arg <- if (!is.null(cfg$overrides$barriers_definite_control)) { + paste0(schema, ".barriers_definite_control") + } else { + NULL + } + lnk_barrier_overrides(conn, barriers = paste0(schema, ".natural_barriers"), observations = observations, habitat = habitat_arg, + control = control_arg, params = cfg$parameters_fresh, to = paste0(schema, ".barrier_overrides"), verbose = FALSE) diff --git a/comms/rtj/20260423_m1_r_worker_verified.md b/comms/rtj/20260423_m1_r_worker_verified.md new file mode 100644 index 0000000..fc33afd --- /dev/null +++ b/comms/rtj/20260423_m1_r_worker_verified.md @@ -0,0 +1,35 @@ +--- +from: rtj +to: link +topic: M1 verified as a ready R-worker host; crew.cluster 0.4.0 API gap +status: open +--- + +## 2026-04-23 — rtj + +### M1 verified as a ready R-worker host (2026-04-22) + +Ran `rtj/scripts/hosts/crew-worker_verify.R` to validate the infra primitive under whatever launcher you pick for PR 3-of-3. 7/7 checks pass, 1.1s M4→M1→M4 round-trip via raw `ssh m1 'Rscript -'` stdin pipe. + +Confirmed on M1: + +- R 4.5.2 with `link`, `fresh`, `targets`, `crew` all loading cleanly +- `.libPaths()` has user library first (`~/Library/R/arm64/4.5/library`) +- `PG_DB_SHARE` propagates to non-interactive SSH R via `~/.Renviron` +- tailnet ACL permits peer → M4 TCP callbacks + +### One pitfall worth flagging on launcher choice + +`crew.cluster` 0.4.0 does NOT export `crew_controller_cluster` — only HPC-scheduler variants (`crew_controller_sge/lsf/pbs/slurm`). If you were planning to use a generic "ssh" controller from crew.cluster, it doesn't exist. Options I see: + +- `crew::crew_controller_local()` on M4 + custom `crew_class_launcher` subclass for SSH +- `clustermq` (mature, ssh-native) +- Raw `mirai::daemon` + bespoke dispatcher + +Not pushing an opinion; just saving you the 5 min I spent discovering this. + +### Landing note (per soul 2026-04-23 branch ruling) + +This thread is landing on your `44-barriers-definite-control` branch because that's where link's local clone is currently checked out. It won't reach `main` until PR #44 merges. If you need it visible on `main` sooner, cherry-pick or merge when convenient. + +Close when acknowledged. diff --git a/data-raw/_targets.R b/data-raw/_targets.R index f5dcbb3..af08f49 100644 --- a/data-raw/_targets.R +++ b/data-raw/_targets.R @@ -1,8 +1,10 @@ # data-raw/_targets.R # # Pipeline definition for the bcfishpass comparison. Orchestrates the -# six lnk_pipeline_* phase helpers across the four validated watershed -# groups and rolls up the per-WSG diff tibbles into one aggregate. +# six lnk_pipeline_* phase helpers across five watershed groups and +# rolls up the per-WSG diff tibbles into one aggregate. ADMS/BULK/BABL/ +# ELKR are the numerical-parity WSGs; DEAD (added 2026-04-23 with #44) +# is the end-to-end test for the `barriers_definite_control` filter. # # Run from the link repo root: # Rscript -e 'targets::tar_config_set(script = "data-raw/_targets.R", @@ -36,7 +38,13 @@ tar_option_set( packages = c("link", "fresh", "DBI", "RPostgres", "tibble", "dplyr") ) -wsgs <- c("ADMS", "BULK", "BABL", "ELKR") +# DEAD (Deadman River) is the end-to-end test for the control filter. +# It has one `barrier_ind = TRUE` control row with 6 observations upstream +# in the CH/CM/CO/PK/SK pool and zero habitat-classification coverage — +# the unique combination that actively exercises the filter. The other +# four WSGs are numerical-parity checks; their TRUE control rows are +# all rescued by the habitat path or sit below the observation threshold. +wsgs <- c("ADMS", "BULK", "BABL", "ELKR", "DEAD") list( tar_target(cfg, link::lnk_config("bcfishpass")), @@ -53,7 +61,8 @@ list( rollup, dplyr::bind_rows( comparison_ADMS, comparison_BULK, - comparison_BABL, comparison_ELKR + comparison_BABL, comparison_ELKR, + comparison_DEAD ) ) ) diff --git a/data-raw/logs/20260423_01_tar_make_post_44_phase2.txt b/data-raw/logs/20260423_01_tar_make_post_44_phase2.txt new file mode 100644 index 0000000..c7330e0 --- /dev/null +++ b/data-raw/logs/20260423_01_tar_make_post_44_phase2.txt @@ -0,0 +1,297 @@ ++ cfg dispatched +✔ cfg completed [239ms, 296.43 kB] ++ comparison_BABL dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_st" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_BABL completed [1m 37.2s, 419 B] ++ comparison_ELKR dispatched +NOTICE: schema "fresh" already exists, skipping + +Override validation: working_elkr.pscis_fixes vs working_elkr.crossings + Total overrides: 324 + Valid (matched): 288 + Orphans: 36 <-- not found in crossings + Duplicates: 0 +Updated 288 of 7306 rows (barrier_status) +NOTICE: table "barriers_definite_control" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_wct" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_wct" does not exist, skipping + +✔ comparison_ELKR completed [2m 35.7s, 319 B] ++ comparison_BULK dispatched +NOTICE: schema "fresh" already exists, skipping + +Override validation: working_bulk.pscis_fixes vs working_bulk.crossings + Total overrides: 580 + Valid (matched): 514 + Orphans: 66 <-- not found in crossings + Duplicates: 0 +Updated 514 of 5568 rows (barrier_status) +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_pk" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_st" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_BULK completed [2m 55.8s, 438 B] ++ comparison_ADMS dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +✔ comparison_ADMS completed [1m 9.1s, 382 B] ++ rollup dispatched +✔ rollup completed [1ms, 758 B] +✔ ended pipeline [8m 18.3s, 6 completed, 0 skipped] diff --git a/data-raw/logs/20260423_02_tar_make_phase2a.txt b/data-raw/logs/20260423_02_tar_make_phase2a.txt new file mode 100644 index 0000000..fe8a5a7 --- /dev/null +++ b/data-raw/logs/20260423_02_tar_make_phase2a.txt @@ -0,0 +1,297 @@ ++ cfg dispatched +✔ cfg completed [237ms, 296.48 kB] ++ comparison_BABL dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_st" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_BABL completed [1m 38.8s, 418 B] ++ comparison_ELKR dispatched +NOTICE: schema "fresh" already exists, skipping + +Override validation: working_elkr.pscis_fixes vs working_elkr.crossings + Total overrides: 324 + Valid (matched): 288 + Orphans: 36 <-- not found in crossings + Duplicates: 0 +Updated 288 of 7306 rows (barrier_status) +NOTICE: table "barriers_definite_control" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_wct" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_wct" does not exist, skipping + +✔ comparison_ELKR completed [2m 34.7s, 319 B] ++ comparison_BULK dispatched +NOTICE: schema "fresh" already exists, skipping + +Override validation: working_bulk.pscis_fixes vs working_bulk.crossings + Total overrides: 580 + Valid (matched): 514 + Orphans: 66 <-- not found in crossings + Duplicates: 0 +Updated 514 of 5568 rows (barrier_status) +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_pk" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_st" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_BULK completed [2m 56.2s, 438 B] ++ comparison_ADMS dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +✔ comparison_ADMS completed [1m 7.3s, 387 B] ++ rollup dispatched +✔ rollup completed [1ms, 760 B] +✔ ended pipeline [8m 17.7s, 6 completed, 0 skipped] diff --git a/data-raw/logs/20260423_03_tar_make_phase2b.txt b/data-raw/logs/20260423_03_tar_make_phase2b.txt new file mode 100644 index 0000000..c465066 --- /dev/null +++ b/data-raw/logs/20260423_03_tar_make_phase2b.txt @@ -0,0 +1,297 @@ ++ cfg dispatched +✔ cfg completed [243ms, 296.48 kB] ++ comparison_BABL dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_st" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_BABL completed [1m 44.1s, 416 B] ++ comparison_ELKR dispatched +NOTICE: schema "fresh" already exists, skipping + +Override validation: working_elkr.pscis_fixes vs working_elkr.crossings + Total overrides: 324 + Valid (matched): 288 + Orphans: 36 <-- not found in crossings + Duplicates: 0 +Updated 288 of 7306 rows (barrier_status) +NOTICE: table "barriers_definite_control" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_wct" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_wct" does not exist, skipping + +✔ comparison_ELKR completed [2m 36.3s, 319 B] ++ comparison_BULK dispatched +NOTICE: schema "fresh" already exists, skipping + +Override validation: working_bulk.pscis_fixes vs working_bulk.crossings + Total overrides: 580 + Valid (matched): 514 + Orphans: 66 <-- not found in crossings + Duplicates: 0 +Updated 514 of 5568 rows (barrier_status) +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_pk" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_st" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_BULK completed [2m 51.2s, 438 B] ++ comparison_ADMS dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +✔ comparison_ADMS completed [1m 6.6s, 379 B] ++ rollup dispatched +✔ rollup completed [1ms, 751 B] +✔ ended pipeline [8m 18.8s, 6 completed, 0 skipped] diff --git a/data-raw/logs/20260423_04_tar_make_repro.txt b/data-raw/logs/20260423_04_tar_make_repro.txt new file mode 100644 index 0000000..f15addb --- /dev/null +++ b/data-raw/logs/20260423_04_tar_make_repro.txt @@ -0,0 +1,297 @@ ++ cfg dispatched +✔ cfg completed [265ms, 296.48 kB] ++ comparison_BABL dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_st" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_BABL completed [1m 44.9s, 416 B] ++ comparison_ELKR dispatched +NOTICE: schema "fresh" already exists, skipping + +Override validation: working_elkr.pscis_fixes vs working_elkr.crossings + Total overrides: 324 + Valid (matched): 288 + Orphans: 36 <-- not found in crossings + Duplicates: 0 +Updated 288 of 7306 rows (barrier_status) +NOTICE: table "barriers_definite_control" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_wct" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_wct" does not exist, skipping + +✔ comparison_ELKR completed [2m 27.2s, 319 B] ++ comparison_BULK dispatched +NOTICE: schema "fresh" already exists, skipping + +Override validation: working_bulk.pscis_fixes vs working_bulk.crossings + Total overrides: 580 + Valid (matched): 514 + Orphans: 66 <-- not found in crossings + Duplicates: 0 +Updated 514 of 5568 rows (barrier_status) +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_pk" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_st" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_BULK completed [2m 56.7s, 438 B] ++ comparison_ADMS dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +✔ comparison_ADMS completed [1m 9s, 379 B] ++ rollup dispatched +✔ rollup completed [1ms, 751 B] +✔ ended pipeline [8m 18.4s, 6 completed, 0 skipped] diff --git a/data-raw/logs/20260423_05_tar_make_dead.txt b/data-raw/logs/20260423_05_tar_make_dead.txt new file mode 100644 index 0000000..82c74ed --- /dev/null +++ b/data-raw/logs/20260423_05_tar_make_dead.txt @@ -0,0 +1,73 @@ ++ comparison_DEAD dispatched +NOTICE: schema "working_dead" does not exist, skipping + +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_DEAD completed [42.2s, 420 B] ++ rollup dispatched +✔ rollup completed [0ms, 866 B] +✔ ended pipeline [42.6s, 2 completed, 5 skipped] diff --git a/data-raw/logs/20260423_06_tar_make_repro_dead.txt b/data-raw/logs/20260423_06_tar_make_repro_dead.txt new file mode 100644 index 0000000..d358e4e --- /dev/null +++ b/data-raw/logs/20260423_06_tar_make_repro_dead.txt @@ -0,0 +1,365 @@ ++ cfg dispatched +✔ cfg completed [248ms, 296.48 kB] ++ comparison_BABL dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_st" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_BABL completed [1m 46s, 416 B] ++ comparison_ELKR dispatched +NOTICE: schema "fresh" already exists, skipping + +Override validation: working_elkr.pscis_fixes vs working_elkr.crossings + Total overrides: 324 + Valid (matched): 288 + Orphans: 36 <-- not found in crossings + Duplicates: 0 +Updated 288 of 7306 rows (barrier_status) +NOTICE: table "barriers_definite_control" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_wct" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_wct" does not exist, skipping + +✔ comparison_ELKR completed [2m 35.6s, 319 B] ++ comparison_BULK dispatched +NOTICE: schema "fresh" already exists, skipping + +Override validation: working_bulk.pscis_fixes vs working_bulk.crossings + Total overrides: 580 + Valid (matched): 514 + Orphans: 66 <-- not found in crossings + Duplicates: 0 +Updated 514 of 5568 rows (barrier_status) +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_pk" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "streams_acc_02_ovr_st" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_BULK completed [2m 58.9s, 438 B] ++ comparison_ADMS dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_ch" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_co" does not exist, skipping + +NOTICE: table "streams_acc_015_ovr_sk" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +✔ comparison_ADMS completed [1m 9.8s, 379 B] ++ comparison_DEAD dispatched +NOTICE: schema "fresh" already exists, skipping + +NOTICE: table "barriers_definite" does not exist, skipping + +NOTICE: table "streams_blk" does not exist, skipping + +NOTICE: table "gradient_barriers_raw" does not exist, skipping + +NOTICE: table "natural_barriers" does not exist, skipping + +NOTICE: table "barrier_overrides" does not exist, skipping + +NOTICE: table "barriers_bt" does not exist, skipping + +NOTICE: table "barriers_bt_min" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk" does not exist, skipping + +NOTICE: table "barriers_ch_cm_co_pk_sk_min" does not exist, skipping + +NOTICE: table "barriers_st" does not exist, skipping + +NOTICE: table "barriers_st_min" does not exist, skipping + +NOTICE: table "barriers_wct" does not exist, skipping + +NOTICE: table "barriers_wct_min" does not exist, skipping + +NOTICE: table "gradient_barriers_minimal" does not exist, skipping + +NOTICE: table "streams" does not exist, skipping + +NOTICE: table "streams_habitat" does not exist, skipping + +NOTICE: table "observations_breaks" does not exist, skipping + +NOTICE: table "habitat_endpoints" does not exist, skipping + +NOTICE: table "crossings_breaks" does not exist, skipping + +NOTICE: table "streams_breaks" does not exist, skipping + +NOTICE: relation "streams_id_segment_idx" already exists, skipping + +NOTICE: table "streams_acc_015" does not exist, skipping + +NOTICE: table "streams_acc_02" does not exist, skipping + +NOTICE: table "streams_acc_025" does not exist, skipping + +NOTICE: table "streams_acc_025_ovr_bt" does not exist, skipping + +NOTICE: table "frs_clusters_bt" does not exist, skipping + +NOTICE: table "frs_clusters_ch" does not exist, skipping + +NOTICE: table "frs_clusters_co" does not exist, skipping + +NOTICE: table "frs_clusters_sk" does not exist, skipping + +NOTICE: table "frs_qual_spawn_sk" does not exist, skipping + +NOTICE: table "frs_trace_lfid_sk" does not exist, skipping + +NOTICE: table "frs_clusters_st" does not exist, skipping + +✔ comparison_DEAD completed [41.7s, 420 B] ++ rollup dispatched +✔ rollup completed [1ms, 866 B] +✔ ended pipeline [9m 12.7s, 7 completed, 0 skipped] diff --git a/inst/extdata/configs/bcfishpass/README.md b/inst/extdata/configs/bcfishpass/README.md index efa8c06..8b82cbe 100644 --- a/inst/extdata/configs/bcfishpass/README.md +++ b/inst/extdata/configs/bcfishpass/README.md @@ -1,6 +1,6 @@ # bcfishpass config -Reproduces bcfishpass output exactly for regression. All 4 watershed groups (ADMS, BULK, BABL, ELKR) are within 5% of bcfishpass when this config drives the pipeline. +Reproduces bcfishpass output exactly for regression. All five watershed groups (ADMS, BULK, BABL, ELKR, DEAD) are within 5% of bcfishpass when this config drives the pipeline. ADMS/BULK/BABL/ELKR are the numerical-parity set; DEAD is the end-to-end test for the `barriers_definite_control` override filter (see `research/bcfishpass_comparison.md`). ## What is in here diff --git a/inst/extdata/configs/bcfishpass/parameters_fresh.csv b/inst/extdata/configs/bcfishpass/parameters_fresh.csv index 10cbc2d..9158358 100644 --- a/inst/extdata/configs/bcfishpass/parameters_fresh.csv +++ b/inst/extdata/configs/bcfishpass/parameters_fresh.csv @@ -1,12 +1,12 @@ -"species_code","access_gradient_max","spawn_gradient_min","rear_gradient_min","cluster_rearing","cluster_direction","cluster_bridge_gradient","cluster_bridge_distance","cluster_confluence_m","cluster_spawning","cluster_spawn_direction","cluster_spawn_bridge_gradient","cluster_spawn_bridge_distance","cluster_spawn_confluence_m","observation_threshold","observation_date_min","observation_buffer_m","observation_species" -"BT",0.25,0,0,TRUE,"both",0.05,10000,10,FALSE,"",NA,NA,NA,1,"1990-01-01",20,"BT;CH;CO;SK;PK;CM;ST" -"CH",0.15,0,0,TRUE,"both",0.05,10000,10,FALSE,"",NA,NA,NA,5,"1990-01-01",20,"CH;CM;CO;PK;SK" -"CM",0.15,0,0,FALSE,"",NA,NA,NA,FALSE,"",NA,NA,NA,5,"1990-01-01",20,"CH;CM;CO;PK;SK" -"CO",0.15,0,0,TRUE,"both",0.05,10000,10,FALSE,"",NA,NA,NA,5,"1990-01-01",20,"CH;CM;CO;PK;SK" -"CT",0.25,0,0,FALSE,"",NA,NA,NA,FALSE,"",NA,NA,NA,NA,NA,NA,NA -"DV",0.25,0,0,FALSE,"",NA,NA,NA,FALSE,"",NA,NA,NA,NA,NA,NA,NA -"PK",0.15,0,0,FALSE,"",NA,NA,NA,FALSE,"",NA,NA,NA,5,"1990-01-01",20,"CH;CM;CO;PK;SK" -"RB",0.25,0,0,FALSE,"",NA,NA,NA,FALSE,"",NA,NA,NA,NA,NA,NA,NA -"SK",0.15,0,0,TRUE,"both",0.05,10000,10,TRUE,"both",0.05,3000,10,5,"1990-01-01",20,"CH;CM;CO;PK;SK" -"ST",0.2,0,0,TRUE,"both",0.05,10000,10,FALSE,"",NA,NA,NA,5,"1990-01-01",20,"CH;CM;CO;PK;SK;ST" -"WCT",0.2,0,0,TRUE,"both",0.05,10000,10,FALSE,"",NA,NA,NA,1,"1900-01-01",20,"WCT" +"species_code","access_gradient_max","spawn_gradient_min","rear_gradient_min","cluster_rearing","cluster_direction","cluster_bridge_gradient","cluster_bridge_distance","cluster_confluence_m","cluster_spawning","cluster_spawn_direction","cluster_spawn_bridge_gradient","cluster_spawn_bridge_distance","cluster_spawn_confluence_m","observation_threshold","observation_date_min","observation_buffer_m","observation_species","observation_control_apply" +"BT",0.25,0,0,TRUE,"both",0.05,10000,10,FALSE,"",NA,NA,NA,1,"1990-01-01",20,"BT;CH;CO;SK;PK;CM;ST",FALSE +"CH",0.15,0,0,TRUE,"both",0.05,10000,10,FALSE,"",NA,NA,NA,5,"1990-01-01",20,"CH;CM;CO;PK;SK",TRUE +"CM",0.15,0,0,FALSE,"",NA,NA,NA,FALSE,"",NA,NA,NA,5,"1990-01-01",20,"CH;CM;CO;PK;SK",TRUE +"CO",0.15,0,0,TRUE,"both",0.05,10000,10,FALSE,"",NA,NA,NA,5,"1990-01-01",20,"CH;CM;CO;PK;SK",TRUE +"CT",0.25,0,0,FALSE,"",NA,NA,NA,FALSE,"",NA,NA,NA,NA,NA,NA,NA,NA +"DV",0.25,0,0,FALSE,"",NA,NA,NA,FALSE,"",NA,NA,NA,NA,NA,NA,NA,NA +"PK",0.15,0,0,FALSE,"",NA,NA,NA,FALSE,"",NA,NA,NA,5,"1990-01-01",20,"CH;CM;CO;PK;SK",TRUE +"RB",0.25,0,0,FALSE,"",NA,NA,NA,FALSE,"",NA,NA,NA,NA,NA,NA,NA,NA +"SK",0.15,0,0,TRUE,"both",0.05,10000,10,TRUE,"both",0.05,3000,10,5,"1990-01-01",20,"CH;CM;CO;PK;SK",TRUE +"ST",0.2,0,0,TRUE,"both",0.05,10000,10,FALSE,"",NA,NA,NA,5,"1990-01-01",20,"CH;CM;CO;PK;SK;ST",TRUE +"WCT",0.2,0,0,TRUE,"both",0.05,10000,10,FALSE,"",NA,NA,NA,1,"1900-01-01",20,"WCT",FALSE diff --git a/inst/extdata/vignette-data/rollup.rds b/inst/extdata/vignette-data/rollup.rds index 8ccbee7..ff55b92 100644 Binary files a/inst/extdata/vignette-data/rollup.rds and b/inst/extdata/vignette-data/rollup.rds differ diff --git a/inst/extdata/vignette-data/sub_ch.rds b/inst/extdata/vignette-data/sub_ch.rds index 1b886c3..67f42ef 100644 Binary files a/inst/extdata/vignette-data/sub_ch.rds and b/inst/extdata/vignette-data/sub_ch.rds differ diff --git a/inst/extdata/vignette-data/sub_ch_bcfp.rds b/inst/extdata/vignette-data/sub_ch_bcfp.rds index 35ab336..26c20a0 100644 Binary files a/inst/extdata/vignette-data/sub_ch_bcfp.rds and b/inst/extdata/vignette-data/sub_ch_bcfp.rds differ diff --git a/planning/active/findings.md b/planning/active/findings.md index 6877e8b..3ee943e 100644 --- a/planning/active/findings.md +++ b/planning/active/findings.md @@ -1,96 +1,60 @@ -# Findings: _targets.R pipeline (#38) +# Findings: Wire barriers_definite_control (#44) -## Why targets (and not a monolithic lnk_habitat) +## Where the gap lives -Earlier session considered a big `lnk_habitat(conn, aoi, config)` wrapper that orchestrates the whole pipeline. Rejected: +Three places where `"barriers_definite_control"` is referenced in link today: -- Hides the DAG that rtj is trying to parallelize -- Duplicates what `tar_make()` already provides (caching, skipping, parallelism) -- Turns pipeline variants into if/else branches inside one function rather than separate target graphs -- Every DAG node collapsed to one black-box call — inspection, debugging, partial reruns all harder +1. **`R/lnk_pipeline_prepare.R` → `.lnk_pipeline_prep_load_aux`** — loads the per-AOI filtered CSV rows into `.barriers_definite_control` when `cfg$overrides$barriers_definite_control` is non-NULL. Already correct. +2. **`R/lnk_pipeline_prepare.R` → `.lnk_pipeline_prep_gradient`** — `information_schema` probe for the table, then `DELETE FROM gradient_barriers_raw g USING barriers_definite_control c WHERE ... c.barrier_ind::boolean = false`. Already correct. Removes **passable** positions from gradient set before minimal reduction. +3. **`R/lnk_pipeline_prepare.R` → `.lnk_pipeline_prep_overrides`** — does NOT pass `control` to `lnk_barrier_overrides`. This is the gap. `lnk_barrier_overrides` accepts a `control` parameter but is called without one from this site. -Targets solves these natively. `_targets.R` IS the pipeline definition. Each target is a named node. `tar_make()` runs, `tar_visnetwork()` / `tar_mermaid()` visualize, `tar_skip` inherits cache invalidation, parallelism via crew controllers. +## Latent bug in lnk_barrier_overrides control filter -link still owns interpretation helpers (the `R/lnk_habitat_*.R` phase functions). Those are called BY targets, not instead of it. +Read `R/lnk_barrier_overrides.R` lines 140–153. The implementation: -## Architectural constraints from rtj - -From `rtj/docs/distributed-fwapg.md` (cross-referenced; byte-identical fwapg restored on M1 as of 2026-04-22): - -1. **localhost DB per worker** — every worker creates its own `lnk_db_conn()` to localhost. No remote DB chatter over tailnet (latency blows up on hundreds of `dbGetQuery` calls). -2. **Small returns from `map()` targets** — KB-scale data frames only. No geometry, no raster, no wkb shipped over SSH. Our `compare_bcfishpass_wsg()` returns ~10 rows per WSG. -3. **M1 is optional** — `crew_controller_group` handles graceful degradation. Target graph has no M1 awareness. -4. **WSG is the parallelization unit** — ~220 WSGs province-wide, naturally independent. We start with 4 (ADMS, BULK, BABL, ELKR). -5. **Schema namespacing** — `working_` per rtj contract. Prevents parallel workers on the same host from colliding on `working.*`. - -## Design decisions - -### Per-phase helpers, not one wrapper -Six `lnk_pipeline_*.R` functions, one per DAG phase. Each is a clear unit; each can be targeted independently. Phase names read as verbs: setup → load → prepare → break → classify → connect. +```r +ctrl_where <- sprintf("LEFT JOIN %s c ON b.blue_line_key = c.blue_line_key + AND abs(b.downstream_route_measure - c.downstream_route_measure) < 1", control) +ctrl_filter <- "AND c.blue_line_key IS NULL" +``` -### `aoi` not `wsg` for the partition param -`wsg` hardcodes the bcfishpass WSG partition scheme. Fresh already uses `aoi` as the generic spatial filter (accepts WSG code, ltree, sf polygon). Link helpers inherit this convention. Today `aoi = "BULK"` works the same as the old `wsg = "BULK"`; tomorrow it extends to mapsheets, HUC basins, custom polygons. +Filter treats ANY control row as blocking override — including `barrier_ind = FALSE` rows. Docstring (lines 27–30) says only `barrier_ind = TRUE` rows block. -### Prefix is `lnk_pipeline_*` -Not `lnk_habitat_*` — only one of six phases (classify) is actually about habitat. The others are setup, loading, network prep, segmenting, connectivity. `lnk_pipeline_*` reads as "these are pipeline building blocks." +In practice on bcfishpass input this is masked because `.lnk_pipeline_prep_gradient`'s upstream DELETE removes `barrier_ind = FALSE` positions from `gradient_barriers_raw` before they reach the override step. But falls and user-definite positions are not pruned by control at load time — they stay in `natural_barriers`. If a control row with `barrier_ind = FALSE` exists for a fall or definite-barrier position, the current filter blocks observation overrides on it. Should not block. -### Static branching (`tar_map`) vs dynamic (`pattern = map(wsg)`) -Use `tar_map`. Static branching produces named targets (`comparison_BULK`, `comparison_ADMS`) — debuggable, inspectable, diffable. Dynamic branching hides per-element names behind indices — harder to trace. +Fix: `"AND (c.blue_line_key IS NULL OR c.barrier_ind::boolean = false)"`. -### Targets in `Suggests`, not `Imports` -Pipeline-dev dependency, not user-facing. Users who want to run the comparison can `install.packages(c("targets", "crew"))` on demand. `link` itself stays minimal. +## Manifest-driven gating decision -### Regenerate the research doc DAG -`tar_mermaid()` output replaces the hand-written Mermaid in `research/bcfishpass_comparison.md`. Single source of truth. Keep the glossary and `classDef` color-coding — those are human decoration, not pipeline structure. +`.lnk_pipeline_prep_overrides` could probe `information_schema.tables` to discover whether the control table exists (same pattern used there for habitat). Decided against: the manifest key is the direct contract. If `cfg$overrides$barriers_definite_control` is non-NULL the load step wrote the table; if it's NULL no table exists. Manifest gate, not DB probe. -### `compare_bcfishpass_wsg()` return shape -```r -tibble::tibble( - wsg = "BULK", - species = "BT", - habitat_type = c("spawning", "rearing"), - link_km = c(34.2, 71.8), - bcfishpass_km = c(33.1, 73.4), - diff_pct = c(+3.3, -2.2) -) -``` -Pulls from fresh's `streams_habitat` table joined against `bcfishpass.streams_habitat_linear_*` reference tables. Both live on the worker's localhost DB (byte-identical dumps on M4 and M1 per rtj). +Scope discipline: the existing `information_schema` probe for the habitat table in the same function, and the similar probe in `.lnk_pipeline_prep_gradient` for the control table, work correctly today. Leaving them alone in this PR; filing a follow-up issue for consistency. Using the manifest as the contract is well preferred across the package. -## PR 2 design constraint: `fresh.streams` is not per-AOI +## Tests that need to exist -`lnk_pipeline_prepare` writes base segments to `fresh.streams`, matching the legacy compare script. Fresh's downstream functions (`frs_break_apply`, `frs_habitat_classify`, `frs_cluster`) assume that table path. Every pipeline run does `DROP TABLE IF EXISTS fresh.streams CASCADE` before rebuilding, so two parallel AOI runs on the same host would overwrite each other's segments and race the `streams_habitat` output. +- `tests/testthat/test-lnk_barrier_overrides.R` does not exist. Creating new. +- `tests/testthat/test-lnk_pipeline_prepare.R` exists — extending with prep_overrides control pass-through tests. -This is tolerable for single-host single-run today — it breaks the parallel `tar_map(wsg = ...)` + `crew_controller_local(workers = 2)` design. Three ways to handle in PR 2: +## No `information_schema` probe in the new code -1. `crew_controller_local(workers = 1)` — serialize runs on each host. Simplest, defeats half the parallelism gain. -2. Per-AOI fresh table names — patch fresh to accept a `streams_table` parameter across break/classify/cluster, or write into `.streams` and follow all downstream fresh calls with that. Substantial fresh work. -3. Separate database per worker — overkill. +`.lnk_pipeline_prep_overrides`'s new control guard reads `cfg$overrides$barriers_definite_control` directly. That field is populated by `lnk_config()` when the manifest declares the key. No DB round-trip needed. -Leaning toward option 1 for the initial PR 2, with option 2 as a fresh follow-up. Document explicitly. +## Expected rollup direction -## Other accepted fragilities (from code-check on `prepare`) +Running the pipeline pre-fix vs post-fix on bcfishpass config: -- `id_segment` assignment in `prep_network` uses `row_number() OVER (ORDER BY blue_line_key, downstream_route_measure)`. Ties produce arbitrary ordering across runs. Faithful to compare script; ties are rare on FWA in practice. -- `natural_barriers` re-joins gradient barriers to FWA instead of using the already-enriched ltree columns from `gradient_barriers_raw`. Silent drop if an enrichment UPDATE left NULL ltree. Faithful to compare script; hasn't hit in 4 WSGs of real data. -- Per-model gradient class sets (bt, ch_cm_co_pk_sk, st, wct) are hardcoded in `prep_minimal`. TODO in-code to move into `cfg$pipeline$gradient_models` so variants can swap them. +- WSGs with `user_barriers_definite_control.csv` rows having `barrier_ind = TRUE` and upstream observations at those positions → rollup `link_km` shrinks for affected species (positions that were wrongly overridden are no longer overridden). Moves toward bcfishpass reference. +- WSGs with no such rows → rollup unchanged. -## Unknowns to resolve during implementation +Magnitude: unknown. Control-TRUE rows on the four validated WSGs are uncommon, so likely small. Direction matters more than magnitude. -- How cleanly does `frs_habitat_classify()` accept a `working_` schema? Does it assume `working.*`? If so, we need a `working_schema` arg in fresh. If `lnk_habitat_classify` writes to a schema name that fresh doesn't know about, classification may fail. -- Per-WSG schema cleanup contract — `on.exit(DROP SCHEMA working_ CASCADE)` inside `compare_bcfishpass_wsg()`, or let the next run drop + recreate? -- Does `frs_break_apply()` need to know the schema for the streams table, or does the input table name carry it? +## Reproducibility -Document findings as discovered. +The change is a deterministic additional filter clause on a `LEFT JOIN`. No new randomness, no schedule-dependent behaviour. Two back-to-back `tar_make()` runs must produce bit-identical rollups. Will verify with `digest::digest()`. ## Cross-refs -- rtj/docs/distributed-fwapg.md — architectural source of truth -- fresh 0.14.0 — `frs_barriers_minimal()` is prerequisite for `lnk_habitat_build_network` -- link 0.2.0 — `lnk_config()` feeds all phases - -## Versions - -- fresh: 0.14.0 -- link: main (0.2.0 → 0.3.0) -- bcfishpass: ea3c5d8 -- fwapg: Docker (FWA 20240830) +- Plan file: `/Users/airvine/.claude/plans/stateful-hopping-feather.md` +- Issue: link#44 +- Parallel cleanup issue (separate PR): link#45 (gradient classes) +- Follow-up to file at end of this PR: "Migrate remaining pipeline probes to manifest-driven gating" diff --git a/planning/active/progress.md b/planning/active/progress.md index 5fedb39..2b5960d 100644 --- a/planning/active/progress.md +++ b/planning/active/progress.md @@ -1,47 +1,25 @@ # Progress -## Session 2026-04-22 +## Session 2026-04-23 -- Archived lnk_config PWF (shipped as link 0.2.0 via PR #39) -- Starting link#38: `_targets.R` pipeline -- Dependencies cleared: fresh 0.14.0 (frs_barriers_minimal) and link 0.2.0 (lnk_config) are on main -- rtj data parity on M4 + M1 confirmed; R install on M1 (Phase 3) still pending but not blocking — single-host first -- Issue #38 updated with package-vs-pipeline split (helpers in `R/`, `_targets.R` + comparison in `data-raw/`) -- PR 1 Phase 1.1 done: `lnk_pipeline_setup()` (originally `lnk_habitat_setup_schema`, renamed before building more). Mocked tests for SQL shape + identifier validation (8 passing). Live DB test intentionally skipped — CREATE SCHEMA semantics are Postgres's, not ours to test. -- Naming decision: prefix is `lnk_pipeline_*` (not `lnk_habitat_*` — only 1 of 6 phases is actually about habitat). Phase names read as verbs: setup → load → prepare → break → classify → connect. -- Param decision: canonical `(conn, aoi, cfg, schema)`. `aoi` follows fresh convention — accepts a WSG code today; extends to ltree filters, sf polygons, mapsheets later. `setup` is the only outlier: `(conn, schema, overwrite)`. -- PR 1 Phase 1.2 done: `lnk_pipeline_load()` — loads crossings + misc crossings + applies modelled fixes (NONE/OBS → PASSABLE) + PSCIS barrier status overrides. Split into three internal `@noRd` helpers for readability. Cleaner scope than the original "load_inputs" plan: falls, definite barriers, observation exclusions, and habitat classification moved to `prepare` where they're actually consumed. 12 tests (4 input validation + 4 fixes SQL/branching + 1 apply_pscis branching + 3 structure). 169 link tests total. -- PR 1 Phase 1.3 done: `lnk_pipeline_prepare()` — thin orchestrator over 6 internal sub-helpers (prep_load_aux, prep_gradient, prep_natural, prep_overrides, prep_minimal, prep_network). First real consumer of `frs_barriers_minimal()` from fresh 0.14.0. `.lnk_quote_literal()` added to utils.R for safe SQL literal interpolation. 31 new tests (input validation + SQL shape + 4 model minimal reductions + union). Full link suite at 200 passing. -- Code-check found one genuine architectural concern for PR 2: `fresh.streams` is a shared schema, parallel WSG runs on one host would collide. Noted in findings.md with three mitigation options (leaning toward `workers = 1` for initial PR 2). -- PR 1 Phase 1.4 done: `lnk_pipeline_break()` — builds observations_breaks (species-filtered via `cfg$wsg_species` + data-error exclusions), habitat_endpoints (DRM + URM union), crossings_breaks, then sequential `frs_break_apply` respecting `cfg$pipeline$break_order` with `id_segment` reassignment between rounds. Four internal `@noRd` sub-helpers. 13 new tests (input validation + obs species derivation incl. CT expansion + SQL shape per branch + break_order honored). Full link suite at 229 passing. -- PR 1 Phase 1.5/1.6 done: `lnk_pipeline_classify()` + `lnk_pipeline_connect()` — classify builds `fresh.streams_breaks` (gradient FULL + falls + definite + crossings, WSG-filtered) then calls `frs_habitat_classify()` with rules YAML + barrier overrides. Connect wraps fresh's `.frs_run_connectivity` for per-species cluster + connected_waterbody. Both auto-derive species from `cfg$parameters_fresh` ∩ `cfg$wsg_species` presence for the AOI; both accept explicit `species =` override. 22 tests covering input validation, species derivation, access-gating breaks SQL shape, no-species error. Full link suite at 251 passing. -- **All six pipeline helpers complete.** -- PR 1 Phase 1.7 done: compare_bcfishpass.R rewritten from 635 lines to 136 lines using the six helpers. ADMS run 67s end-to-end, all species within 5%, spawning values identical to research doc, rearing within ~1% (acceptable ordering variance from id_segment tie-breaking). -- Fix along the way: added `cfg$species` (parsed from rules YAML at load) so `lnk_pipeline_classify_species` intersects against rules species (8) instead of parameters_fresh species (11). parameters_fresh has CT/DV/RB which bcfishpass doesn't model. Also added `barriers_definite` to `config.yaml` `break_order` (was missing). -- PR 1 ready to close. Remaining: NEWS/DESCRIPTION bump, final `/code-check`, PR with SRED tag. -- PR 1 MERGED as link 0.3.0 (PR #41). Branch deleted. - -## PR 2 kickoff - -- Branched `38-targets-pipeline-pr2` off main. -- Wrote `data-raw/compare_bcfishpass_wsg(wsg, config)` — wraps the six phase helpers for one WSG, returns a small tibble (wsg × species × habitat_type × link_km × bcfishpass_km × diff_pct). KB-scale return — no geometry, ships cleanly over SSH when distributed. -- Wrote `data-raw/_targets.R` — `tar_map(wsg = 4 WSGs)` over the per-WSG target, `crew_controller_local(workers = 1)`, rollup target binds all four tibbles. Serial because `fresh.streams` is a shared schema across workers on the same host (findings.md). -- Added `targets` / `crew` / `tibble` / `dplyr` to DESCRIPTION Suggests. -- Drift lesson from PR 1 → Issue #40 filed (CSV provenance + runtime stamps). Scope expands `lnk_stamp` (#24) into the lineage source. -- Next: `/code-check` on PR 2 staged diff, then `tar_make()` end-to-end, commit stamped verification log. -- Reframing (per user): the correctness bar is **bit-identical output from the same inputs**, not "within 5% of bcfishpass." The 5% comparison is parity diagnostics only. Saved to memory (`feedback_reproducibility.md`) + CLAUDE.md. Research-doc drift from earlier today (BT rearing -0.7 → -1.1) is env-state drift, not pipeline non-determinism — to be traceable once stamps/lineage ship (#40). -- tar_make end-to-end done. Three successive runs (10, 11, 12) produced bit-identical 34-row rollup tibbles — reproducibility proven. Wall clock ~8m 30s per run (serial). -- Promoted `.lnk_pipeline_classify_species` → exported `lnk_pipeline_species(cfg, aoi)` to remove duplication with the data-raw inline helper. Tests moved to `test-lnk_pipeline_species.R`. classify + connect internals updated. Compare wrapper uses `link::lnk_pipeline_species()`. -- Code-check surfaced a real connection leak (second `dbConnect` could throw before `on.exit` registered) and SQL quoting inconsistency on species list. Both fixed; 12th run confirms numbers unchanged. -- DESCRIPTION bumped to 0.4.0. NEWS entry captures the reproducibility + parity distinction. Committing and pushing PR 2 next. -- PR 2 MERGED as link 0.4.0 (PR #42). Branch deleted. - -## PR 3 kickoff - -- Branched `38-targets-pipeline-pr3` off main. -- `tar_mermaid()` reviewed — output is hashed-ID graph unsuitable as a research-doc DAG. Kept the hand-written pipeline DAG and added a clean "Targets orchestration" Mermaid beside it. -- Research doc results table refreshed with run 12 numbers (2026-04-22), correctness-bar section added at top. -- Vignette `reproducing-bcfishpass.Rmd` written — three-line entrypoint, rollup table, BULK CH habitat mapgl map. Pre-computes artifacts via `data-raw/vignette_reproducing_bcfishpass.R` → `inst/extdata/vignette-data/{rollup,bulk_ch}.rds`. Rendered clean on local test. -- Retired `data-raw/compare_bcfishpass.R`; `_targets.R` + `compare_bcfishpass_wsg.R` supersede it. -- DESCRIPTION bumped to 0.5.0; mapgl + sf added to Suggests. -- Next: `/code-check` on staged diff, commit, push, PR with SRED tag. +- Archived `2026-04-23-targets-pipeline/` — link#38 closed via PRs #41/#42/#43. Three consecutive `tar_make()` runs produced bit-identical rollups. All species within 5% of bcfishpass reference on all four WSGs. +- Branched `44-barriers-definite-control` off main. +- Plan approved. PWF initialized for #44. +- Pre-flight complete: identified the `ctrl_filter` bug in `lnk_barrier_overrides` (all rows block, not just `barrier_ind = TRUE`), and confirmed `.lnk_pipeline_prep_overrides` doesn't pass `control`. Same PR fixes both — filter semantics + missing pass-through. +- Next: Phase 1 — fix `R/lnk_barrier_overrides.R` `ctrl_filter` and add `tests/testthat/test-lnk_barrier_overrides.R`. +- Phase 1 committed (d1a7109) — `NOT EXISTS` control filter, 11 tests, 269 PASS. +- Phase 2 committed (53bedbd) — manifest-gated `control` pass-through in `.lnk_pipeline_prep_overrides`, fixed asymmetric load_aux (schema-valid empty table), 271 PASS. +- Post-Phase-2 `tar_make()` (log: `data-raw/logs/20260423_01_tar_make_post_44.txt`) showed 11–22pp drift AWAY from bcfishpass on ADMS/BABL; BULK/ELKR unchanged. Root cause: bcfishpass applies control filter only in CH/CM/CO/PK/SK and ST models (not BT/WCT/CT/DV/RB). My implementation applied it across all species in the `params` loop. +- Phase 2a: new `observation_control_apply` column in `parameters_fresh.csv` (TRUE for CH/CM/CO/PK/SK/ST; FALSE for BT/WCT; NA for CT/DV/RB), per-species NOT EXISTS gate in `lnk_barrier_overrides()`, three new tests. 279 PASS. Amendment pushed to issue #44 documenting the species-scoped approach and biological rationale. +- Next: Phase 3 — `pak::local_install()`, `tar_make()`, compare rollup to bcfishpass; expected direction — BT/WCT/ST on ADMS/BABL recover to near pre-fix, CH/CM/CO/PK/SK slightly closer to bcfishpass. +- Phase 2a alone was insufficient — CH/CO/SK/ST on ADMS/BABL still drifted -15 to -22pp. Investigation traced the residual to my ctrl_filter also blocking the habitat-path INSERT in lnk_barrier_overrides. bcfishpass's `hab_upstr` CTE has no control join — habitat is higher-trust and bypasses the filter. +- Phase 2b (6f3bc46) — removed ctrl_where/ctrl_filter from habitat INSERT; flipped the "control applies to habitat" test to assert absence; docstring notes habitat bypass. 279 PASS. +- Post-Phase-2b rollup exactly matches pre-fix baseline on all 4 parity WSGs. Investigation showed all 6 TRUE control rows on ADMS/BULK/BABL are rescued by observation threshold or habitat path — filter correctly wired but inactive on these WSGs. +- Phase 2c: province-wide hunt for TRUE control rows with ≥ threshold obs AND zero habitat upstream produced CAMB (11 obs), DEAD (6), LFRA (16 but too large), SALM (7). Picked DEAD — single TRUE control row at FALLS (356361749, 45743) with exactly 6 CH-group obs and zero habitat. Added DEAD to `data-raw/_targets.R`, incremental tar_make builds only comparison_DEAD + rollup (42s). +- DEAD rollup: all species within 3% of bcfishpass reference. Direct inspection of `working_dead.barrier_overrides` at (356361749, 45743): BT only, confirming per-species gate (BT bypass + CH/CM/CO/PK/SK/ST blocked). Commit fb8a0db. +- Log files committed (1c683e3): 20260423_01_phase2, _02_phase2a, _03_phase2b, _04_repro, _05_dead, _06_repro_dead. +- 5-WSG rebuild reproducibility confirmed: two consecutive `tar_destroy + tar_make` produce rollup digest `210c3f8254c47ac88573a80d96a2701e`, 46 rows, identical. +- Phase 4 (f52dcbc): NEWS 0.6.0, DESCRIPTION 0.5.0→0.6.0, research doc (DEAD table + key-fixes row + three-part-fix subsection + DAG update), vignette (5-WSG narrative + pivot column), bcfishpass config README updated, vignette artifacts regenerated. +- Follow-up filed: #46 (migrate `.lnk_pipeline_prep_gradient()` + `.lnk_pipeline_prep_overrides()` probes to manifest-driven gating). +- Branch pushed. PR #47 opened. SRED tag `Relates to NewGraphEnvironment/sred-2025-2026#24` in body. +- Flagged in PR: commit 22ac1dd ("comms(→link): M1 verified as R-worker host") landed on this branch from a parallel session's branch-landing policy; orthogonal to #44 scope. diff --git a/planning/active/task_plan.md b/planning/active/task_plan.md index af41346..7f8ca92 100644 --- a/planning/active/task_plan.md +++ b/planning/active/task_plan.md @@ -1,73 +1,92 @@ -# Task Plan: _targets.R pipeline (#38) +# Task Plan: Wire barriers_definite_control into lnk_barrier_overrides (#44) ## Goal -Replace the 635-line `data-raw/compare_bcfishpass.R` script with a targets-driven pipeline that: -- Runs each DAG node as a `tar_target()` — inspectable, cacheable, skippable -- Parallelizes across watershed groups via `tar_map(wsg = c(...))` -- Regenerates the research doc DAG from `tar_mermaid()` -- Single-host on M4 first; distributed swap to `crew_controller_group(local=M4, cluster=M1)` is a follow-up after rtj Phase 4 - -Uses `lnk_config("bcfishpass")` (shipped in 0.2.0) and `frs_barriers_minimal()` (fresh 0.14.0). - -## Package vs pipeline split - -Helpers (`lnk_habitat_*`) go in `R/` as exported package functions — generic building blocks any caller can compose. `_targets.R` + `compare_bcfishpass_wsg()` go in `data-raw/` — this specific comparison pipeline, not part of the installed package. `data-raw/` is the canonical R-package home for "code that USES this package to produce outputs." - -## PR 1: Extract helpers to R/lnk_pipeline_*.R - -Break the 635-line script into small named functions (one per pipeline phase). Canonical signature `(conn, aoi, cfg, schema)` — `aoi` follows fresh convention (accepts a WSG code today; extends to ltree filters, sf polygons, mapsheets later). `setup` is the only outlier: `(conn, schema, overwrite)`. - -- [x] `R/lnk_pipeline_setup.R` — create working schema, ensure `fresh` schema -- [x] `R/lnk_pipeline_load.R` — crossings + modelled fixes + PSCIS status overrides. Falls, definite barriers, observation exclusions, habitat classification moved to `prepare` (load stays focused on anthropogenic crossings) -- [x] `R/lnk_pipeline_prepare.R` — loads falls + definite + control + habitat confirms; detects gradient barriers (`frs_break_find`) with control pruning + ltree enrichment; builds natural_barriers; computes barrier overrides via `lnk_barrier_overrides`; per-model non-minimal reduction via `frs_barriers_minimal` (fresh 0.14.0); loads fresh.streams with channel_width + stream_order_parent + GENERATED cols + id_segment. Six internal `@noRd` sub-helpers -- [x] `R/lnk_pipeline_break.R` — builds observations_breaks (species-filtered + exclusions), habitat_endpoints (DRM + URM), crossings_breaks; runs sequential `frs_break_apply` in config-defined order with `id_segment` reassignment between rounds -- [x] `R/lnk_pipeline_classify.R` — builds access-gating `fresh.streams_breaks` (gradient + falls + definite + crossings), calls `frs_habitat_classify` with rules YAML + thresholds + barrier overrides. Species default derives from `cfg$parameters_fresh` ∩ `cfg$wsg_species` presence for the AOI. -- [x] `R/lnk_pipeline_connect.R` — calls fresh's `.frs_run_connectivity` (per-species cluster + connected_waterbody driven by `cfg$parameters_fresh` flags). Fresh internal access flagged as a follow-up (export a stable API in fresh). -- [x] Update existing `data-raw/compare_bcfishpass.R` to call the helpers — verified on ADMS (635 lines → 136 lines, all species within 5%, sub-1% rearing drift from research doc acceptable) -- [ ] Tests + runnable examples for each helper (live-DB tests skip without `.lnk_db_available()`) -- [ ] pkgdown reference entries -- [ ] `/code-check` before each commit -- [ ] PR 1: SRED tag (NewGraphEnvironment/sred-2025-2026#24) — Relates to #38 - -## PR 2: Add _targets.R + per-partition target fn - -- [x] `data-raw/compare_bcfishpass_wsg.R` — wraps pipeline phases for one WSG, returns ~10-row tibble (wsg × species × habitat_type × link_km × bcfishpass_km × diff_pct). Creates own conn + conn_ref with fail-early on missing `PG_PASS_SHARE`, registers on.exit cleanup per-conn (no leak on second conn failure), cleans up on exit. Defensive drop of `fresh.streams*` at entry. -- [x] Pulls comparison diff against `bcfishpass.habitat_linear_*` reference over tunnel. All interpolated strings go through `DBI::dbQuoteLiteral`. -- [x] `data-raw/_targets.R` with static `tar_map(wsg = c("ADMS","BULK","BABL","ELKR"))` + synchronous execution (crew removed after the controller hung on dispatched-but-never-complete behavior; shared `fresh.streams` prevents parallel anyway). -- [x] `targets` + `tarchetypes` + `tibble` + `dplyr` → DESCRIPTION Suggests (crew dropped). -- [x] **Promote `.lnk_pipeline_classify_species` → exported `lnk_pipeline_species(cfg, aoi)`** — canonical public helper for "species this config classifies in this AOI". Used by classify + connect internally and by data-raw externally. Removes both the duplicated private helper and the inlined `.wsg_species_present` from data-raw. -- [x] Run `tar_make()` end-to-end on all 4 WSGs. Rollup = 34 rows, all within 5% of bcfishpass. Reproducibility check: runs 10 + 11 produced bit-identical rollup tibbles. -- [x] Log the run under `data-raw/logs/20260422_10_tar_make_from_dataraw.txt` + `20260422_11_tar_make_final.txt` (plus `20260422_12_*` post-fix re-verify). -- [x] `/code-check` before commit — found a real conn leak (second dbConnect could throw before on.exit registered) and a SQL quoting inconsistency on species; both fixed and re-verified. -- [x] **Correctness framing** — reframed verification from "within 5% of bcfishpass" to "bit-identical across repeated runs". Added section to CLAUDE.md + memory entry. Confirmed across three runs (10, 11, 12) — all 34 rollup rows identical. -- [ ] PR 2: SRED tag — Relates to #38 - -## PR 3: Retire old script + research doc refresh + vignette - -- [x] `tar_mermaid()` reviewed — output is hashed-ID orchestration graph, poor replacement for the hand-written pipeline-phase DAG. Kept the pipeline DAG in `research/bcfishpass_comparison.md`; added a small "Targets orchestration" Mermaid showing cfg → 4 WSGs → rollup. -- [x] Research doc refreshed with 2026-04-22 rollup numbers (was 2026-04-15) + reproducibility framing at top. -- [x] Delete `data-raw/compare_bcfishpass.R` — superseded by `_targets.R` + `compare_bcfishpass_wsg.R`. Git history preserves. -- [x] Vignette `vignettes/reproducing-bcfishpass.Rmd` — narrative, three-line entrypoint, rollup table, BULK CH habitat mapgl map, reproducibility note, pointers to future default-variant vignette. -- [x] `data-raw/vignette_reproducing_bcfishpass.R` — pre-computes `rollup.rds` + `bulk_ch.rds` into `inst/extdata/vignette-data/` so the vignette doesn't hit the DB at build time. CLAUDE.md vignette convention. -- [x] `mapgl`, `sf` added to DESCRIPTION Suggests. -- [x] NEWS entry + bump to 0.5.0. +Honour `user_barriers_definite_control.csv`'s `barrier_ind = TRUE` rows at the observation-override step. Positions marked as non-overridable (known fish-blocking dams, long impassable falls, diversions) must never be re-opened by historical upstream observations. Matches bcfishpass's per-species access SQL. + +Bit-identical-across-reruns reproducibility preserved. Rollup direction expected: toward bcfishpass reference, not away. + +## Phase 1: lnk_barrier_overrides control filter fix + +- [x] Read `R/lnk_barrier_overrides.R` control block. Confirmed: current filter treated ANY control row as blocking; docstring said only `barrier_ind = TRUE` rows block. +- [x] Updated `ctrl_filter` to `"AND (c.blue_line_key IS NULL OR c.barrier_ind::boolean = false)"`. +- [x] Updated the inline comment to describe the fixed semantics. +- [x] New test file `tests/testthat/test-lnk_barrier_overrides.R` with mocked SQL assertions — 7 tests covering observation-path control filter, NULL-control path, habitat-path control filter. +- [x] `devtools::test()` green: 265 PASS. +- [x] lintr clean on changed R/test files (only pre-existing indentation style notes, consistent with the rest of the codebase). - [ ] `/code-check` before commit -- [ ] PR 3: SRED tag — Fixes #38 -## Follow-up (out of scope) +## Phase 2: Wire control through .lnk_pipeline_prep_overrides + +- [x] Updated `.lnk_pipeline_prep_overrides` with manifest-gated `control_arg` computation; passes `control = control_arg` to `lnk_barrier_overrides`. +- [x] Fixed asymmetric gating — `.lnk_pipeline_prep_load_aux` now always creates a schema-valid (possibly empty) `.barriers_definite_control` table when the manifest declares the key, even if the AOI has zero control rows. Mirrors the `barriers_definite` pattern above. Lets `.lnk_pipeline_prep_overrides` gate on the manifest without worrying about the per-AOI row count. +- [x] Two new `.lnk_pipeline_prep_overrides` tests in `test-lnk_pipeline_prepare.R` — manifest present → `control = ".barriers_definite_control"`; manifest absent → `control = NULL`. +- [x] `devtools::test()` green: 271 PASS. +- [x] `/code-check` surfaced the asymmetric-gating bug — fixed and re-verified before commit. + +## Phase 2a: Per-species control gate (observation_control_apply) + +Post-Phase-2 `tar_make()` drifted 11–22pp *away* from bcfishpass on ADMS/BABL because bcfishpass applies the control filter per-species (CH/CM/CO/PK/SK and ST only), while my implementation applied it across all species. Residents (BT, WCT) inhabit reaches upstream of anadromous-blocking falls — their observations should still override. + +- [x] Add `observation_control_apply` column to `inst/extdata/configs/bcfishpass/parameters_fresh.csv`. TRUE for CH/CM/CO/PK/SK/ST; FALSE for BT/WCT; NA for CT/DV/RB. +- [x] `lnk_barrier_overrides()` gates the NOT EXISTS clause per-species on `params$observation_control_apply[i]`. Missing column or NA ⇒ no filter (resident default). +- [x] Updated `@param control` / `@param params` roxygen to document the gate. +- [x] Extended `.stub_params()` in `test-lnk_barrier_overrides.R` with optional `control_apply`. Three new tests: FALSE ⇒ no clause, NA ⇒ no clause, mixed-species params ⇒ per-species gating. +- [x] `devtools::test()`: 279 PASS. +- [x] Amend issue #44 body with Phase 2a scope and biological rationale. +- [x] `/code-check` before commit — two rounds, both Clean. + +## Phase 2b: Ungate habitat override path from control + +Phase 2a species-gating fixed BT/WCT drift but CH/CM/CO/PK/SK/ST still dropped 11–22pp on ADMS/BABL. Root cause: my `ctrl_filter` was applied to BOTH the observation and habitat paths of `lnk_barrier_overrides()`. bcfishpass's `hab_upstr` CTE has no control join at all — expert-confirmed habitat is higher-trust than the control designation and bypasses the filter. + +- [x] Removed `ctrl_where` / `ctrl_filter` from the habitat INSERT in `lnk_barrier_overrides()`. Observation path unchanged. +- [x] Updated roxygen: control parameter now notes it applies only to observations; habitat bypasses. +- [x] Flipped the existing "control filter applies to habitat too" test to assert the opposite (bcfishpass parity). `devtools::test()` 279 PASS. +- [x] Committed (6f3bc46). +- [x] `tar_make()` — Phase 2b rollup numerically identical to pre-fix baseline on all 34 rows, all 4 WSGs within 5% of bcfishpass reference. + +## Phase 2c: Add DEAD as the filter's end-to-end test WSG + +Discovered post-Phase 2b: none of ADMS/BULK/BABL/ELKR actually exercises the new control filter end-to-end. All 6 TRUE control rows across these WSGs are rescued by either the observation threshold (obs < 5) or the habitat path (classification upstream). That's why post-fix == pre-fix — correct, but information-less. + +Province-wide hunt for TRUE control rows with ≥ threshold observations upstream AND zero habitat coverage turned up 4 candidates: CAMB (11 obs), DEAD (6), LFRA (16, but too large), SALM (7). Picked **DEAD** (Deadman River) — smallest runtime, 6 obs just above CH threshold, single TRUE control row at FALLS (356361749, 45743). bcfishpass reference keeps this fall in `barriers_ch_cm_co_pk_sk` (control worked); pre-fix link would have overridden via observations. + +- [x] Added DEAD to `data-raw/_targets.R` wsgs vector. +- [ ] `tar_make()` incremental — builds `comparison_DEAD` + new rollup (ADMS/BULK/BABL/ELKR cached from Phase 2b run). +- [ ] Verify DEAD's diff_pct on CH/CO/SK/ST is small (post-fix link ≈ bcfishpass — filter working). +- [ ] Verify the specific fall at (356361749, 45743) is NOT in `working_dead.barrier_overrides` for CH/CM/CO/PK/SK/ST (filter blocked the override). + +## Phase 3: End-to-end verification + +- [x] `pak::local_install()` to pick up pipeline changes. +- [x] First post-fix run: `20260423_02_tar_make_phase2a.txt`, `20260423_03_tar_make_phase2b.txt`. +- [x] Inspect rollup against pre-change baseline — matches exactly on 4 WSGs (filter moot on those; DEAD being added to exercise it). +- [ ] Reproducibility run (Phase 2b state): `20260423_04_tar_make_repro.txt` in progress. Rollup must be bit-identical to Phase 2b. +- [ ] `digest::digest()` on two Phase 2b rollup tibbles → same hash. +- [ ] Post-DEAD reproducibility: two consecutive `tar_make()` runs with DEAD present produce bit-identical 5-WSG rollups. + +## Phase 4: Artifact updates -- Distributed execution — swap `crew_controller_local()` for `crew_controller_group(local=M4, cluster=M1)` after rtj Phase 4 passes the M4→M1 SSH exec check -- `configs/default/` variant wired into a second `_targets.R` or CLI arg — tracked via #19/#20/#21 biological decisions +- [ ] Regenerate vignette data: `Rscript data-raw/vignette_reproducing_bcfishpass.R`. Produces new `rollup.rds` + `sub_ch.rds` + `sub_ch_bcfp.rds`. +- [ ] Render vignette locally to verify pivot tables + map update cleanly +- [ ] Update `research/bcfishpass_comparison.md`: + - Four per-WSG parity tables with new numbers + - Short paragraph under "Key fixes during comparison" documenting the control wiring + numeric direction +- [ ] `NEWS.md` 0.6.0 entry: "Honour `user_barriers_definite_control.csv` at the observation-override step. Previously controlled positions could be re-opened by upstream observations; now they can't." +- [ ] `DESCRIPTION` version bump 0.5.0 → 0.6.0 -## Follow-up (out of scope for this PR) +## Phase 5: Ship -- Distributed execution — swap `crew_controller_local()` for `crew_controller_group(local=M4, cluster=M1)` after rtj Phase 4 passes the M4→M1 SSH exec check -- `configs/default/` variant wired into a second `_targets.R` or CLI arg — tracked via #19/#20/#21 biological decisions +- [ ] `/code-check` on full staged diff +- [ ] Commit atomically per the plan's commit layout +- [ ] Push branch +- [ ] Open PR with SRED tag `Relates to NewGraphEnvironment/sred-2025-2026#24` +- [ ] **File follow-up issue** (before closing PR 44): "Migrate remaining pipeline probes to manifest-driven gating". See `/Users/airvine/.claude/plans/stateful-hopping-feather.md` for scope. ## Versions at start - fresh: 0.14.0 -- link: main (0.2.0, target 0.3.0) +- link: main (0.5.0, target 0.6.0) - bcfishpass: ea3c5d8 - fwapg: Docker (FWA 20240830) diff --git a/planning/archive/2026-04-23-targets-pipeline/README.md b/planning/archive/2026-04-23-targets-pipeline/README.md new file mode 100644 index 0000000..9b70d03 --- /dev/null +++ b/planning/archive/2026-04-23-targets-pipeline/README.md @@ -0,0 +1,22 @@ +# Archive: targets-pipeline refactor (link#38, closed 2026-04-23) + +## Outcome + +Three-PR arc completed: + +- **PR #41 (link 0.3.0)** — six `lnk_pipeline_*` phase helpers extracted from the 635-line compare script +- **PR #42 (link 0.4.0)** — `data-raw/_targets.R` + `compare_bcfishpass_wsg()`, exported `lnk_pipeline_species()`, reproducibility framing +- **PR #43 (link 0.5.0)** — vignette `reproducing-bcfishpass.Rmd`, research doc refresh, retired legacy `compare_bcfishpass.R` + +Three consecutive `tar_make()` runs produced bit-identical 34-row rollup tibbles. All species within 5% of bcfishpass reference on all four WSGs (ADMS, BULK, BABL, ELKR). + +## What superseded it + +- New PWF cycle 2026-04-23 for #44 (wire `barriers_definite_control` into `lnk_barrier_overrides`) +- Issue #45 filed for gradient-class cleanup, parallel-safe +- Issue #40 filed for config CSV provenance + pipeline run stamps (supersedes narrow scope of #24) + +## Key lessons captured + +- `feedback_verification_logs.md` — always stamp env state in pipeline verification logs +- `feedback_reproducibility.md` — correctness bar is bit-identical output, not "within 5% of bcfishpass" diff --git a/planning/archive/2026-04-23-targets-pipeline/findings.md b/planning/archive/2026-04-23-targets-pipeline/findings.md new file mode 100644 index 0000000..6877e8b --- /dev/null +++ b/planning/archive/2026-04-23-targets-pipeline/findings.md @@ -0,0 +1,96 @@ +# Findings: _targets.R pipeline (#38) + +## Why targets (and not a monolithic lnk_habitat) + +Earlier session considered a big `lnk_habitat(conn, aoi, config)` wrapper that orchestrates the whole pipeline. Rejected: + +- Hides the DAG that rtj is trying to parallelize +- Duplicates what `tar_make()` already provides (caching, skipping, parallelism) +- Turns pipeline variants into if/else branches inside one function rather than separate target graphs +- Every DAG node collapsed to one black-box call — inspection, debugging, partial reruns all harder + +Targets solves these natively. `_targets.R` IS the pipeline definition. Each target is a named node. `tar_make()` runs, `tar_visnetwork()` / `tar_mermaid()` visualize, `tar_skip` inherits cache invalidation, parallelism via crew controllers. + +link still owns interpretation helpers (the `R/lnk_habitat_*.R` phase functions). Those are called BY targets, not instead of it. + +## Architectural constraints from rtj + +From `rtj/docs/distributed-fwapg.md` (cross-referenced; byte-identical fwapg restored on M1 as of 2026-04-22): + +1. **localhost DB per worker** — every worker creates its own `lnk_db_conn()` to localhost. No remote DB chatter over tailnet (latency blows up on hundreds of `dbGetQuery` calls). +2. **Small returns from `map()` targets** — KB-scale data frames only. No geometry, no raster, no wkb shipped over SSH. Our `compare_bcfishpass_wsg()` returns ~10 rows per WSG. +3. **M1 is optional** — `crew_controller_group` handles graceful degradation. Target graph has no M1 awareness. +4. **WSG is the parallelization unit** — ~220 WSGs province-wide, naturally independent. We start with 4 (ADMS, BULK, BABL, ELKR). +5. **Schema namespacing** — `working_` per rtj contract. Prevents parallel workers on the same host from colliding on `working.*`. + +## Design decisions + +### Per-phase helpers, not one wrapper +Six `lnk_pipeline_*.R` functions, one per DAG phase. Each is a clear unit; each can be targeted independently. Phase names read as verbs: setup → load → prepare → break → classify → connect. + +### `aoi` not `wsg` for the partition param +`wsg` hardcodes the bcfishpass WSG partition scheme. Fresh already uses `aoi` as the generic spatial filter (accepts WSG code, ltree, sf polygon). Link helpers inherit this convention. Today `aoi = "BULK"` works the same as the old `wsg = "BULK"`; tomorrow it extends to mapsheets, HUC basins, custom polygons. + +### Prefix is `lnk_pipeline_*` +Not `lnk_habitat_*` — only one of six phases (classify) is actually about habitat. The others are setup, loading, network prep, segmenting, connectivity. `lnk_pipeline_*` reads as "these are pipeline building blocks." + +### Static branching (`tar_map`) vs dynamic (`pattern = map(wsg)`) +Use `tar_map`. Static branching produces named targets (`comparison_BULK`, `comparison_ADMS`) — debuggable, inspectable, diffable. Dynamic branching hides per-element names behind indices — harder to trace. + +### Targets in `Suggests`, not `Imports` +Pipeline-dev dependency, not user-facing. Users who want to run the comparison can `install.packages(c("targets", "crew"))` on demand. `link` itself stays minimal. + +### Regenerate the research doc DAG +`tar_mermaid()` output replaces the hand-written Mermaid in `research/bcfishpass_comparison.md`. Single source of truth. Keep the glossary and `classDef` color-coding — those are human decoration, not pipeline structure. + +### `compare_bcfishpass_wsg()` return shape +```r +tibble::tibble( + wsg = "BULK", + species = "BT", + habitat_type = c("spawning", "rearing"), + link_km = c(34.2, 71.8), + bcfishpass_km = c(33.1, 73.4), + diff_pct = c(+3.3, -2.2) +) +``` +Pulls from fresh's `streams_habitat` table joined against `bcfishpass.streams_habitat_linear_*` reference tables. Both live on the worker's localhost DB (byte-identical dumps on M4 and M1 per rtj). + +## PR 2 design constraint: `fresh.streams` is not per-AOI + +`lnk_pipeline_prepare` writes base segments to `fresh.streams`, matching the legacy compare script. Fresh's downstream functions (`frs_break_apply`, `frs_habitat_classify`, `frs_cluster`) assume that table path. Every pipeline run does `DROP TABLE IF EXISTS fresh.streams CASCADE` before rebuilding, so two parallel AOI runs on the same host would overwrite each other's segments and race the `streams_habitat` output. + +This is tolerable for single-host single-run today — it breaks the parallel `tar_map(wsg = ...)` + `crew_controller_local(workers = 2)` design. Three ways to handle in PR 2: + +1. `crew_controller_local(workers = 1)` — serialize runs on each host. Simplest, defeats half the parallelism gain. +2. Per-AOI fresh table names — patch fresh to accept a `streams_table` parameter across break/classify/cluster, or write into `.streams` and follow all downstream fresh calls with that. Substantial fresh work. +3. Separate database per worker — overkill. + +Leaning toward option 1 for the initial PR 2, with option 2 as a fresh follow-up. Document explicitly. + +## Other accepted fragilities (from code-check on `prepare`) + +- `id_segment` assignment in `prep_network` uses `row_number() OVER (ORDER BY blue_line_key, downstream_route_measure)`. Ties produce arbitrary ordering across runs. Faithful to compare script; ties are rare on FWA in practice. +- `natural_barriers` re-joins gradient barriers to FWA instead of using the already-enriched ltree columns from `gradient_barriers_raw`. Silent drop if an enrichment UPDATE left NULL ltree. Faithful to compare script; hasn't hit in 4 WSGs of real data. +- Per-model gradient class sets (bt, ch_cm_co_pk_sk, st, wct) are hardcoded in `prep_minimal`. TODO in-code to move into `cfg$pipeline$gradient_models` so variants can swap them. + +## Unknowns to resolve during implementation + +- How cleanly does `frs_habitat_classify()` accept a `working_` schema? Does it assume `working.*`? If so, we need a `working_schema` arg in fresh. If `lnk_habitat_classify` writes to a schema name that fresh doesn't know about, classification may fail. +- Per-WSG schema cleanup contract — `on.exit(DROP SCHEMA working_ CASCADE)` inside `compare_bcfishpass_wsg()`, or let the next run drop + recreate? +- Does `frs_break_apply()` need to know the schema for the streams table, or does the input table name carry it? + +Document findings as discovered. + +## Cross-refs + +- rtj/docs/distributed-fwapg.md — architectural source of truth +- fresh 0.14.0 — `frs_barriers_minimal()` is prerequisite for `lnk_habitat_build_network` +- link 0.2.0 — `lnk_config()` feeds all phases + +## Versions + +- fresh: 0.14.0 +- link: main (0.2.0 → 0.3.0) +- bcfishpass: ea3c5d8 +- fwapg: Docker (FWA 20240830) diff --git a/planning/archive/2026-04-23-targets-pipeline/progress.md b/planning/archive/2026-04-23-targets-pipeline/progress.md new file mode 100644 index 0000000..5fedb39 --- /dev/null +++ b/planning/archive/2026-04-23-targets-pipeline/progress.md @@ -0,0 +1,47 @@ +# Progress + +## Session 2026-04-22 + +- Archived lnk_config PWF (shipped as link 0.2.0 via PR #39) +- Starting link#38: `_targets.R` pipeline +- Dependencies cleared: fresh 0.14.0 (frs_barriers_minimal) and link 0.2.0 (lnk_config) are on main +- rtj data parity on M4 + M1 confirmed; R install on M1 (Phase 3) still pending but not blocking — single-host first +- Issue #38 updated with package-vs-pipeline split (helpers in `R/`, `_targets.R` + comparison in `data-raw/`) +- PR 1 Phase 1.1 done: `lnk_pipeline_setup()` (originally `lnk_habitat_setup_schema`, renamed before building more). Mocked tests for SQL shape + identifier validation (8 passing). Live DB test intentionally skipped — CREATE SCHEMA semantics are Postgres's, not ours to test. +- Naming decision: prefix is `lnk_pipeline_*` (not `lnk_habitat_*` — only 1 of 6 phases is actually about habitat). Phase names read as verbs: setup → load → prepare → break → classify → connect. +- Param decision: canonical `(conn, aoi, cfg, schema)`. `aoi` follows fresh convention — accepts a WSG code today; extends to ltree filters, sf polygons, mapsheets later. `setup` is the only outlier: `(conn, schema, overwrite)`. +- PR 1 Phase 1.2 done: `lnk_pipeline_load()` — loads crossings + misc crossings + applies modelled fixes (NONE/OBS → PASSABLE) + PSCIS barrier status overrides. Split into three internal `@noRd` helpers for readability. Cleaner scope than the original "load_inputs" plan: falls, definite barriers, observation exclusions, and habitat classification moved to `prepare` where they're actually consumed. 12 tests (4 input validation + 4 fixes SQL/branching + 1 apply_pscis branching + 3 structure). 169 link tests total. +- PR 1 Phase 1.3 done: `lnk_pipeline_prepare()` — thin orchestrator over 6 internal sub-helpers (prep_load_aux, prep_gradient, prep_natural, prep_overrides, prep_minimal, prep_network). First real consumer of `frs_barriers_minimal()` from fresh 0.14.0. `.lnk_quote_literal()` added to utils.R for safe SQL literal interpolation. 31 new tests (input validation + SQL shape + 4 model minimal reductions + union). Full link suite at 200 passing. +- Code-check found one genuine architectural concern for PR 2: `fresh.streams` is a shared schema, parallel WSG runs on one host would collide. Noted in findings.md with three mitigation options (leaning toward `workers = 1` for initial PR 2). +- PR 1 Phase 1.4 done: `lnk_pipeline_break()` — builds observations_breaks (species-filtered via `cfg$wsg_species` + data-error exclusions), habitat_endpoints (DRM + URM union), crossings_breaks, then sequential `frs_break_apply` respecting `cfg$pipeline$break_order` with `id_segment` reassignment between rounds. Four internal `@noRd` sub-helpers. 13 new tests (input validation + obs species derivation incl. CT expansion + SQL shape per branch + break_order honored). Full link suite at 229 passing. +- PR 1 Phase 1.5/1.6 done: `lnk_pipeline_classify()` + `lnk_pipeline_connect()` — classify builds `fresh.streams_breaks` (gradient FULL + falls + definite + crossings, WSG-filtered) then calls `frs_habitat_classify()` with rules YAML + barrier overrides. Connect wraps fresh's `.frs_run_connectivity` for per-species cluster + connected_waterbody. Both auto-derive species from `cfg$parameters_fresh` ∩ `cfg$wsg_species` presence for the AOI; both accept explicit `species =` override. 22 tests covering input validation, species derivation, access-gating breaks SQL shape, no-species error. Full link suite at 251 passing. +- **All six pipeline helpers complete.** +- PR 1 Phase 1.7 done: compare_bcfishpass.R rewritten from 635 lines to 136 lines using the six helpers. ADMS run 67s end-to-end, all species within 5%, spawning values identical to research doc, rearing within ~1% (acceptable ordering variance from id_segment tie-breaking). +- Fix along the way: added `cfg$species` (parsed from rules YAML at load) so `lnk_pipeline_classify_species` intersects against rules species (8) instead of parameters_fresh species (11). parameters_fresh has CT/DV/RB which bcfishpass doesn't model. Also added `barriers_definite` to `config.yaml` `break_order` (was missing). +- PR 1 ready to close. Remaining: NEWS/DESCRIPTION bump, final `/code-check`, PR with SRED tag. +- PR 1 MERGED as link 0.3.0 (PR #41). Branch deleted. + +## PR 2 kickoff + +- Branched `38-targets-pipeline-pr2` off main. +- Wrote `data-raw/compare_bcfishpass_wsg(wsg, config)` — wraps the six phase helpers for one WSG, returns a small tibble (wsg × species × habitat_type × link_km × bcfishpass_km × diff_pct). KB-scale return — no geometry, ships cleanly over SSH when distributed. +- Wrote `data-raw/_targets.R` — `tar_map(wsg = 4 WSGs)` over the per-WSG target, `crew_controller_local(workers = 1)`, rollup target binds all four tibbles. Serial because `fresh.streams` is a shared schema across workers on the same host (findings.md). +- Added `targets` / `crew` / `tibble` / `dplyr` to DESCRIPTION Suggests. +- Drift lesson from PR 1 → Issue #40 filed (CSV provenance + runtime stamps). Scope expands `lnk_stamp` (#24) into the lineage source. +- Next: `/code-check` on PR 2 staged diff, then `tar_make()` end-to-end, commit stamped verification log. +- Reframing (per user): the correctness bar is **bit-identical output from the same inputs**, not "within 5% of bcfishpass." The 5% comparison is parity diagnostics only. Saved to memory (`feedback_reproducibility.md`) + CLAUDE.md. Research-doc drift from earlier today (BT rearing -0.7 → -1.1) is env-state drift, not pipeline non-determinism — to be traceable once stamps/lineage ship (#40). +- tar_make end-to-end done. Three successive runs (10, 11, 12) produced bit-identical 34-row rollup tibbles — reproducibility proven. Wall clock ~8m 30s per run (serial). +- Promoted `.lnk_pipeline_classify_species` → exported `lnk_pipeline_species(cfg, aoi)` to remove duplication with the data-raw inline helper. Tests moved to `test-lnk_pipeline_species.R`. classify + connect internals updated. Compare wrapper uses `link::lnk_pipeline_species()`. +- Code-check surfaced a real connection leak (second `dbConnect` could throw before `on.exit` registered) and SQL quoting inconsistency on species list. Both fixed; 12th run confirms numbers unchanged. +- DESCRIPTION bumped to 0.4.0. NEWS entry captures the reproducibility + parity distinction. Committing and pushing PR 2 next. +- PR 2 MERGED as link 0.4.0 (PR #42). Branch deleted. + +## PR 3 kickoff + +- Branched `38-targets-pipeline-pr3` off main. +- `tar_mermaid()` reviewed — output is hashed-ID graph unsuitable as a research-doc DAG. Kept the hand-written pipeline DAG and added a clean "Targets orchestration" Mermaid beside it. +- Research doc results table refreshed with run 12 numbers (2026-04-22), correctness-bar section added at top. +- Vignette `reproducing-bcfishpass.Rmd` written — three-line entrypoint, rollup table, BULK CH habitat mapgl map. Pre-computes artifacts via `data-raw/vignette_reproducing_bcfishpass.R` → `inst/extdata/vignette-data/{rollup,bulk_ch}.rds`. Rendered clean on local test. +- Retired `data-raw/compare_bcfishpass.R`; `_targets.R` + `compare_bcfishpass_wsg.R` supersede it. +- DESCRIPTION bumped to 0.5.0; mapgl + sf added to Suggests. +- Next: `/code-check` on staged diff, commit, push, PR with SRED tag. diff --git a/planning/archive/2026-04-23-targets-pipeline/task_plan.md b/planning/archive/2026-04-23-targets-pipeline/task_plan.md new file mode 100644 index 0000000..af41346 --- /dev/null +++ b/planning/archive/2026-04-23-targets-pipeline/task_plan.md @@ -0,0 +1,73 @@ +# Task Plan: _targets.R pipeline (#38) + +## Goal + +Replace the 635-line `data-raw/compare_bcfishpass.R` script with a targets-driven pipeline that: +- Runs each DAG node as a `tar_target()` — inspectable, cacheable, skippable +- Parallelizes across watershed groups via `tar_map(wsg = c(...))` +- Regenerates the research doc DAG from `tar_mermaid()` +- Single-host on M4 first; distributed swap to `crew_controller_group(local=M4, cluster=M1)` is a follow-up after rtj Phase 4 + +Uses `lnk_config("bcfishpass")` (shipped in 0.2.0) and `frs_barriers_minimal()` (fresh 0.14.0). + +## Package vs pipeline split + +Helpers (`lnk_habitat_*`) go in `R/` as exported package functions — generic building blocks any caller can compose. `_targets.R` + `compare_bcfishpass_wsg()` go in `data-raw/` — this specific comparison pipeline, not part of the installed package. `data-raw/` is the canonical R-package home for "code that USES this package to produce outputs." + +## PR 1: Extract helpers to R/lnk_pipeline_*.R + +Break the 635-line script into small named functions (one per pipeline phase). Canonical signature `(conn, aoi, cfg, schema)` — `aoi` follows fresh convention (accepts a WSG code today; extends to ltree filters, sf polygons, mapsheets later). `setup` is the only outlier: `(conn, schema, overwrite)`. + +- [x] `R/lnk_pipeline_setup.R` — create working schema, ensure `fresh` schema +- [x] `R/lnk_pipeline_load.R` — crossings + modelled fixes + PSCIS status overrides. Falls, definite barriers, observation exclusions, habitat classification moved to `prepare` (load stays focused on anthropogenic crossings) +- [x] `R/lnk_pipeline_prepare.R` — loads falls + definite + control + habitat confirms; detects gradient barriers (`frs_break_find`) with control pruning + ltree enrichment; builds natural_barriers; computes barrier overrides via `lnk_barrier_overrides`; per-model non-minimal reduction via `frs_barriers_minimal` (fresh 0.14.0); loads fresh.streams with channel_width + stream_order_parent + GENERATED cols + id_segment. Six internal `@noRd` sub-helpers +- [x] `R/lnk_pipeline_break.R` — builds observations_breaks (species-filtered + exclusions), habitat_endpoints (DRM + URM), crossings_breaks; runs sequential `frs_break_apply` in config-defined order with `id_segment` reassignment between rounds +- [x] `R/lnk_pipeline_classify.R` — builds access-gating `fresh.streams_breaks` (gradient + falls + definite + crossings), calls `frs_habitat_classify` with rules YAML + thresholds + barrier overrides. Species default derives from `cfg$parameters_fresh` ∩ `cfg$wsg_species` presence for the AOI. +- [x] `R/lnk_pipeline_connect.R` — calls fresh's `.frs_run_connectivity` (per-species cluster + connected_waterbody driven by `cfg$parameters_fresh` flags). Fresh internal access flagged as a follow-up (export a stable API in fresh). +- [x] Update existing `data-raw/compare_bcfishpass.R` to call the helpers — verified on ADMS (635 lines → 136 lines, all species within 5%, sub-1% rearing drift from research doc acceptable) +- [ ] Tests + runnable examples for each helper (live-DB tests skip without `.lnk_db_available()`) +- [ ] pkgdown reference entries +- [ ] `/code-check` before each commit +- [ ] PR 1: SRED tag (NewGraphEnvironment/sred-2025-2026#24) — Relates to #38 + +## PR 2: Add _targets.R + per-partition target fn + +- [x] `data-raw/compare_bcfishpass_wsg.R` — wraps pipeline phases for one WSG, returns ~10-row tibble (wsg × species × habitat_type × link_km × bcfishpass_km × diff_pct). Creates own conn + conn_ref with fail-early on missing `PG_PASS_SHARE`, registers on.exit cleanup per-conn (no leak on second conn failure), cleans up on exit. Defensive drop of `fresh.streams*` at entry. +- [x] Pulls comparison diff against `bcfishpass.habitat_linear_*` reference over tunnel. All interpolated strings go through `DBI::dbQuoteLiteral`. +- [x] `data-raw/_targets.R` with static `tar_map(wsg = c("ADMS","BULK","BABL","ELKR"))` + synchronous execution (crew removed after the controller hung on dispatched-but-never-complete behavior; shared `fresh.streams` prevents parallel anyway). +- [x] `targets` + `tarchetypes` + `tibble` + `dplyr` → DESCRIPTION Suggests (crew dropped). +- [x] **Promote `.lnk_pipeline_classify_species` → exported `lnk_pipeline_species(cfg, aoi)`** — canonical public helper for "species this config classifies in this AOI". Used by classify + connect internally and by data-raw externally. Removes both the duplicated private helper and the inlined `.wsg_species_present` from data-raw. +- [x] Run `tar_make()` end-to-end on all 4 WSGs. Rollup = 34 rows, all within 5% of bcfishpass. Reproducibility check: runs 10 + 11 produced bit-identical rollup tibbles. +- [x] Log the run under `data-raw/logs/20260422_10_tar_make_from_dataraw.txt` + `20260422_11_tar_make_final.txt` (plus `20260422_12_*` post-fix re-verify). +- [x] `/code-check` before commit — found a real conn leak (second dbConnect could throw before on.exit registered) and a SQL quoting inconsistency on species; both fixed and re-verified. +- [x] **Correctness framing** — reframed verification from "within 5% of bcfishpass" to "bit-identical across repeated runs". Added section to CLAUDE.md + memory entry. Confirmed across three runs (10, 11, 12) — all 34 rollup rows identical. +- [ ] PR 2: SRED tag — Relates to #38 + +## PR 3: Retire old script + research doc refresh + vignette + +- [x] `tar_mermaid()` reviewed — output is hashed-ID orchestration graph, poor replacement for the hand-written pipeline-phase DAG. Kept the pipeline DAG in `research/bcfishpass_comparison.md`; added a small "Targets orchestration" Mermaid showing cfg → 4 WSGs → rollup. +- [x] Research doc refreshed with 2026-04-22 rollup numbers (was 2026-04-15) + reproducibility framing at top. +- [x] Delete `data-raw/compare_bcfishpass.R` — superseded by `_targets.R` + `compare_bcfishpass_wsg.R`. Git history preserves. +- [x] Vignette `vignettes/reproducing-bcfishpass.Rmd` — narrative, three-line entrypoint, rollup table, BULK CH habitat mapgl map, reproducibility note, pointers to future default-variant vignette. +- [x] `data-raw/vignette_reproducing_bcfishpass.R` — pre-computes `rollup.rds` + `bulk_ch.rds` into `inst/extdata/vignette-data/` so the vignette doesn't hit the DB at build time. CLAUDE.md vignette convention. +- [x] `mapgl`, `sf` added to DESCRIPTION Suggests. +- [x] NEWS entry + bump to 0.5.0. +- [ ] `/code-check` before commit +- [ ] PR 3: SRED tag — Fixes #38 + +## Follow-up (out of scope) + +- Distributed execution — swap `crew_controller_local()` for `crew_controller_group(local=M4, cluster=M1)` after rtj Phase 4 passes the M4→M1 SSH exec check +- `configs/default/` variant wired into a second `_targets.R` or CLI arg — tracked via #19/#20/#21 biological decisions + +## Follow-up (out of scope for this PR) + +- Distributed execution — swap `crew_controller_local()` for `crew_controller_group(local=M4, cluster=M1)` after rtj Phase 4 passes the M4→M1 SSH exec check +- `configs/default/` variant wired into a second `_targets.R` or CLI arg — tracked via #19/#20/#21 biological decisions + +## Versions at start + +- fresh: 0.14.0 +- link: main (0.2.0, target 0.3.0) +- bcfishpass: ea3c5d8 +- fwapg: Docker (FWA 20240830) diff --git a/research/bcfishpass_comparison.md b/research/bcfishpass_comparison.md index 83ad7f3..595060a 100644 --- a/research/bcfishpass_comparison.md +++ b/research/bcfishpass_comparison.md @@ -47,6 +47,18 @@ All species within 5% of bcfishpass reference. Pipeline runs serially in ~8.5 mi | BT | +3.4% | -0.7% | | WCT | +4.0% | +1.6% | +### DEAD + +Added 2026-04-23 (#44) as the end-to-end test for `barriers_definite_control`. DEAD has a single `barrier_ind = TRUE` control row at FALLS (356361749, 45743) with six anadromous observations upstream in the CH/CM/CO/PK/SK pool and zero habitat-classification coverage — the unique combination that actively exercises the filter. Pre-fix link would have overridden the fall (six observations exceed the threshold of five; habitat-path coverage absent); post-fix link correctly blocks the override for anadromous species and matches bcfishpass, which keeps the fall in `barriers_ch_cm_co_pk_sk` post-override. BT is allowed to override the fall because `observation_control_apply = FALSE` for BT — mirrors bcfishpass's `model_access_bt.sql` which has no control join. + +| Species | Spawning | Rearing | +|---------|----------|---------| +| BT | +2.1% | -0.2% | +| CH | +1.4% | +1.4% | +| CO | +1.3% | -0.3% | +| PK | +1.1% | N/A | +| ST | +1.3% | +0.0% | + ## DAG ```mermaid @@ -95,17 +107,19 @@ flowchart LR cfg --> BULK["compare_bcfishpass_wsg
BULK"] cfg --> BABL["compare_bcfishpass_wsg
BABL"] cfg --> ELKR["compare_bcfishpass_wsg
ELKR"] + cfg --> DEAD["compare_bcfishpass_wsg
DEAD"] - ADMS --> rollup["rollup
34 rows · wsg × species × habitat_type × km × diff_pct"] + ADMS --> rollup["rollup
46 rows · wsg × species × habitat_type × km × diff_pct"] BULK --> rollup BABL --> rollup ELKR --> rollup + DEAD --> rollup classDef root fill:#eef,stroke:#336; classDef wsg fill:#efe,stroke:#363; classDef sink fill:#fee,stroke:#633; class cfg root - class ADMS,BULK,BABL,ELKR wsg + class ADMS,BULK,BABL,ELKR,DEAD wsg class rollup sink ``` @@ -131,6 +145,17 @@ Composite steps in the DAG that aren't a single function call: | SK spawn_connected additive step | -9.6% → -0.7% | fresh code (0.13.6) | | Three-phase cluster | CH +6% → +2.6% | fresh code (0.13.8) | | Index input tables | 228s → 6.6s classification | fresh code (0.13.4) | +| Wire `barriers_definite_control` into override step, per-species, observation-path only | DEAD CH/CO/PK/ST +1.1 to +1.4% (moot on ADMS/BULK/BABL/ELKR) | link code (0.6.0) | + +### barriers_definite_control wiring (#44) + +bcfishpass pairs `user_barriers_definite.csv` with a control table that flags positions as non-overridable (`barrier_ind = TRUE`) — known fish-blocking dams, long impassable falls, diversions. Historical observations upstream should not re-open these barriers. link's override step was not honouring this table. Three fixes land together in 0.6.0: + +1. **Observation-path filter.** `lnk_barrier_overrides()` excludes observations from counting toward the override threshold when the barrier position has a matching TRUE control row. Uses `NOT EXISTS` rather than a LEFT JOIN so the outer `HAVING count(...) >= threshold` aggregation is not row-multiplied. +2. **Per-species application.** New column `observation_control_apply` in `parameters_fresh.csv` (TRUE for CH/CM/CO/PK/SK/ST; FALSE for BT/WCT) gates the filter. Residents inhabit reaches upstream of anadromous-blocking falls routinely (post-glacial headwater connectivity), so their observations still override. Matches bcfishpass's per-model SQL — `model_access_bt.sql` has no control join; `model_access_ch_cm_co_pk_sk.sql` and `model_access_st.sql` do. +3. **Habitat path untouched.** Expert-confirmed habitat is higher-trust than observations; it bypasses the control table, consistent with bcfishpass's `hab_upstr` CTE which has no control join. + +End-to-end validation on DEAD (added specifically for this reason — see section above). Numerical impact on the four original WSGs is zero because every TRUE control row is already rescued by the observation threshold or the habitat path; the filter is correctly wired but inactive on those WSGs. ## Remaining gaps diff --git a/tests/testthat/test-lnk_barrier_overrides.R b/tests/testthat/test-lnk_barrier_overrides.R new file mode 100644 index 0000000..b6a5dd6 --- /dev/null +++ b/tests/testthat/test-lnk_barrier_overrides.R @@ -0,0 +1,269 @@ +# -- ctrl_filter honours barrier_ind (mocked SQL) ----------------------------- + +# The ctrl_where / ctrl_filter pattern appears inside both the +# observation-based override SQL and the habitat-based override SQL. These +# tests capture the rendered SQL and assert the filter shape. + +.stub_params <- function(control_apply = TRUE) { + data.frame( + species_code = "BT", + observation_threshold = 1L, + observation_date_min = "2000-01-01", + observation_buffer_m = 20, + observation_species = "BT", + observation_control_apply = control_apply, + stringsAsFactors = FALSE + ) +} + +test_that("lnk_barrier_overrides honours barrier_ind = true via NOT EXISTS", { + captured <- character(0) + local_mocked_bindings( + dbExecute = function(conn, sql, ...) { + captured <<- c(captured, sql) + 1L + }, + dbGetQuery = function(conn, sql, ...) { + if (grepl("information_schema.columns", sql)) { + return(data.frame( + column_name = c("blue_line_key", "wscode_ltree", "localcode_ltree"), + stringsAsFactors = FALSE)) + } + if (grepl("SELECT count", sql, ignore.case = TRUE)) { + return(data.frame(count = 0L)) + } + data.frame() + }, + .package = "DBI" + ) + + lnk_barrier_overrides( + conn = "mock", + barriers = "working.natural_barriers", + observations = "bcfishobs.observations", + control = "working.barriers_definite_control", + params = .stub_params(), + to = "working.barrier_overrides", + verbose = FALSE + ) + + joined <- paste(captured, collapse = "\n") + # NOT EXISTS subquery in WHERE, not a LEFT JOIN in FROM + expect_match(joined, "AND NOT EXISTS", fixed = TRUE) + expect_match(joined, "FROM working.barriers_definite_control c", + fixed = TRUE) + expect_match(joined, "c.barrier_ind::boolean = true", + fixed = TRUE) + expect_no_match(joined, "LEFT JOIN.*barriers_definite_control") +}) + +test_that("lnk_barrier_overrides omits ctrl_filter when control is NULL", { + captured <- character(0) + local_mocked_bindings( + dbExecute = function(conn, sql, ...) { + captured <<- c(captured, sql) + 1L + }, + dbGetQuery = function(conn, sql, ...) { + if (grepl("information_schema.columns", sql)) { + return(data.frame( + column_name = c("blue_line_key", "wscode_ltree", "localcode_ltree"), + stringsAsFactors = FALSE)) + } + if (grepl("SELECT count", sql, ignore.case = TRUE)) { + return(data.frame(count = 0L)) + } + data.frame() + }, + .package = "DBI" + ) + + lnk_barrier_overrides( + conn = "mock", + barriers = "working.natural_barriers", + observations = "bcfishobs.observations", + control = NULL, + params = .stub_params(), + to = "working.barrier_overrides", + verbose = FALSE + ) + + joined <- paste(captured, collapse = "\n") + expect_no_match(joined, "NOT EXISTS.*barriers_definite_control") + expect_no_match(joined, "LEFT JOIN.*barriers_definite_control") + expect_no_match(joined, "c\\.barrier_ind::boolean") +}) + +test_that("habitat override path is NOT gated by control (bcfishpass parity)", { + # bcfishpass's hab_upstr CTE has no control join. Expert-confirmed + # habitat is higher-trust than observations; the control table does not + # block it. Any drift here would silently under-override bcfishpass. + captured <- character(0) + local_mocked_bindings( + dbExecute = function(conn, sql, ...) { + captured <<- c(captured, sql) + 1L + }, + dbGetQuery = function(conn, sql, ...) { + if (grepl("information_schema.columns", sql)) { + return(data.frame( + column_name = c("blue_line_key", "wscode_ltree", "localcode_ltree"), + stringsAsFactors = FALSE)) + } + if (grepl("SELECT count", sql, ignore.case = TRUE)) { + return(data.frame(count = 0L)) + } + data.frame() + }, + .package = "DBI" + ) + + # No observations; habitat path only + lnk_barrier_overrides( + conn = "mock", + barriers = "working.natural_barriers", + observations = NULL, + habitat = "working.user_habitat_classification", + control = "working.barriers_definite_control", + params = .stub_params(), + to = "working.barrier_overrides", + verbose = FALSE + ) + + habitat_sql <- captured[grepl("working.user_habitat_classification h", + captured, fixed = TRUE)] + expect_true(length(habitat_sql) >= 1) + habitat_joined <- paste(habitat_sql, collapse = "\n") + expect_no_match(habitat_joined, "NOT EXISTS") + expect_no_match(habitat_joined, "barriers_definite_control") + expect_no_match(habitat_joined, "c\\.barrier_ind::boolean") +}) + +# -- per-species control gate (observation_control_apply) -------------------- + +test_that("ctrl_filter omitted when observation_control_apply = FALSE", { + captured <- character(0) + local_mocked_bindings( + dbExecute = function(conn, sql, ...) { + captured <<- c(captured, sql) + 1L + }, + dbGetQuery = function(conn, sql, ...) { + if (grepl("information_schema.columns", sql)) { + return(data.frame( + column_name = c("blue_line_key", "wscode_ltree", "localcode_ltree"), + stringsAsFactors = FALSE)) + } + if (grepl("SELECT count", sql, ignore.case = TRUE)) { + return(data.frame(count = 0L)) + } + data.frame() + }, + .package = "DBI" + ) + + lnk_barrier_overrides( + conn = "mock", + barriers = "working.natural_barriers", + observations = "bcfishobs.observations", + habitat = "working.user_habitat_classification", + control = "working.barriers_definite_control", + params = .stub_params(control_apply = FALSE), + to = "working.barrier_overrides", + verbose = FALSE + ) + + joined <- paste(captured, collapse = "\n") + # Control is declared at call site, but this species opts out. + expect_no_match(joined, "NOT EXISTS.*barriers_definite_control") + expect_no_match(joined, "c\\.barrier_ind::boolean") +}) + +test_that("ctrl_filter omitted when observation_control_apply = NA", { + captured <- character(0) + local_mocked_bindings( + dbExecute = function(conn, sql, ...) { + captured <<- c(captured, sql) + 1L + }, + dbGetQuery = function(conn, sql, ...) { + if (grepl("information_schema.columns", sql)) { + return(data.frame( + column_name = c("blue_line_key", "wscode_ltree", "localcode_ltree"), + stringsAsFactors = FALSE)) + } + if (grepl("SELECT count", sql, ignore.case = TRUE)) { + return(data.frame(count = 0L)) + } + data.frame() + }, + .package = "DBI" + ) + + lnk_barrier_overrides( + conn = "mock", + barriers = "working.natural_barriers", + observations = "bcfishobs.observations", + control = "working.barriers_definite_control", + params = .stub_params(control_apply = NA), + to = "working.barrier_overrides", + verbose = FALSE + ) + + joined <- paste(captured, collapse = "\n") + expect_no_match(joined, "NOT EXISTS.*barriers_definite_control") + expect_no_match(joined, "c\\.barrier_ind::boolean") +}) + +test_that("ctrl_filter gated per-species across mixed params", { + captured <- character(0) + local_mocked_bindings( + dbExecute = function(conn, sql, ...) { + captured <<- c(captured, sql) + 1L + }, + dbGetQuery = function(conn, sql, ...) { + if (grepl("information_schema.columns", sql)) { + return(data.frame( + column_name = c("blue_line_key", "wscode_ltree", "localcode_ltree"), + stringsAsFactors = FALSE)) + } + if (grepl("SELECT count", sql, ignore.case = TRUE)) { + return(data.frame(count = 0L)) + } + data.frame() + }, + .package = "DBI" + ) + + mixed_params <- data.frame( + species_code = c("BT", "CH"), + observation_threshold = c(1L, 5L), + observation_date_min = c("1990-01-01", "1990-01-01"), + observation_buffer_m = c(20, 20), + observation_species = c("BT;CH", "CH;CM;CO;PK;SK"), + observation_control_apply = c(FALSE, TRUE), + stringsAsFactors = FALSE + ) + + lnk_barrier_overrides( + conn = "mock", + barriers = "working.natural_barriers", + observations = "bcfishobs.observations", + control = "working.barriers_definite_control", + params = mixed_params, + to = "working.barrier_overrides", + verbose = FALSE + ) + + # Two per-species INSERTs were emitted. BT's should have no NOT EXISTS; + # CH's should. Identify by the species-code literal in SELECT. + bt_sql <- captured[grepl("SELECT b.blue_line_key, b.downstream_route_measure, 'BT'", + captured, fixed = TRUE)] + ch_sql <- captured[grepl("SELECT b.blue_line_key, b.downstream_route_measure, 'CH'", + captured, fixed = TRUE)] + expect_true(length(bt_sql) >= 1) + expect_true(length(ch_sql) >= 1) + expect_no_match(paste(bt_sql, collapse = "\n"), "NOT EXISTS") + expect_match(paste(ch_sql, collapse = "\n"), "NOT EXISTS", fixed = TRUE) +}) diff --git a/tests/testthat/test-lnk_pipeline_prepare.R b/tests/testthat/test-lnk_pipeline_prepare.R index db34d3f..49f42ed 100644 --- a/tests/testthat/test-lnk_pipeline_prepare.R +++ b/tests/testthat/test-lnk_pipeline_prepare.R @@ -187,3 +187,78 @@ test_that(".lnk_pipeline_prep_network loads fresh.streams with FWA filters", { expect_match(joined, "wscode_ltree <@ '999'::ltree IS FALSE") expect_match(joined, "ADD COLUMN id_segment integer") }) + +# -- prep_overrides control pass-through (manifest-driven) ------------------- + +test_that(".lnk_pipeline_prep_overrides passes control when manifest declares it", { + cfg_stub <- structure(list( + parameters_fresh = data.frame( + species_code = "BT", + observation_threshold = 1L, + observation_date_min = "2000-01-01", + observation_buffer_m = 20, + observation_species = "BT", + stringsAsFactors = FALSE + ), + overrides = list( + barriers_definite_control = data.frame( + blue_line_key = 360873822L, + downstream_route_measure = 1000, + barrier_ind = "t", + stringsAsFactors = FALSE + ) + ) + ), class = c("lnk_config", "list")) + + captured <- list() + local_mocked_bindings( + lnk_barrier_overrides = function(conn, ...) { + captured[["args"]] <<- list(...) + invisible(NULL) + } + ) + local_mocked_bindings( + dbGetQuery = function(conn, sql, ...) { + data.frame() # no habitat table + }, + .package = "DBI" + ) + + .lnk_pipeline_prep_overrides("mock-conn", cfg = cfg_stub, + schema = "working_bulk", observations = "bcfishobs.observations") + + expect_equal(captured$args$control, "working_bulk.barriers_definite_control") +}) + +test_that(".lnk_pipeline_prep_overrides passes control = NULL when manifest omits it", { + cfg_stub <- structure(list( + parameters_fresh = data.frame( + species_code = "BT", + observation_threshold = 1L, + observation_date_min = "2000-01-01", + observation_buffer_m = 20, + observation_species = "BT", + stringsAsFactors = FALSE + ), + overrides = list() # no barriers_definite_control key + ), class = c("lnk_config", "list")) + + captured <- list() + local_mocked_bindings( + lnk_barrier_overrides = function(conn, ...) { + captured[["args"]] <<- list(...) + invisible(NULL) + } + ) + local_mocked_bindings( + dbGetQuery = function(conn, sql, ...) { + data.frame() + }, + .package = "DBI" + ) + + .lnk_pipeline_prep_overrides("mock-conn", cfg = cfg_stub, + schema = "working_bulk", observations = "bcfishobs.observations") + + expect_null(captured$args$control) +}) diff --git a/vignettes/reproducing-bcfishpass.Rmd b/vignettes/reproducing-bcfishpass.Rmd index 715a86b..5a51507 100644 --- a/vignettes/reproducing-bcfishpass.Rmd +++ b/vignettes/reproducing-bcfishpass.Rmd @@ -31,6 +31,27 @@ to run it, and how the output compares to bcfishpass reference tables. Full per-phase pipeline detail lives in [`research/bcfishpass_comparison.md`](https://github.com/NewGraphEnvironment/link/blob/main/research/bcfishpass_comparison.md). +## Prerequisites + +The pipeline reads from a PostgreSQL database with +[fwapg](https://github.com/smnorris/fwapg) loaded. fwapg is the +processed form of the BC Freshwater Atlas — it adds `wscode_ltree` +and `localcode_ltree` columns to the stream-network tables (PostgreSQL +`ltree` types encoding watershed topology) and provides the SQL +functions the pipeline uses to traverse the network: +[`fwa_upstream`](https://github.com/smnorris/fwapg/blob/main/sql/functions/FWA_Upstream.sql), +[`fwa_downstream`](https://github.com/smnorris/fwapg/blob/main/sql/functions/FWA_Downstream.sql), +[`fwa_watershedatmeasure`](https://github.com/smnorris/fwapg/blob/main/sql/functions/FWA_WatershedAtMeasure.sql), +and others. See fwapg's repository for installation. + +[bcfishobs](https://github.com/smnorris/bcfishobs) is optional but +recommended — it populates `bcfishobs.observations`, the table that +drives per-species overrides of natural barriers below. + +The comparison layer in the map at the end of this vignette reads +from a read-only tunnel to the bcfishpass reference database. That is +a validation convenience, not a requirement for running link. + ## How the bcfishpass configuration works The rollup measures **intrinsic habitat potential conditioned on @@ -45,7 +66,7 @@ suitable *and* accessible — accessibility and intrinsic potential are separable in general, and a fuller treatment would report both. ``` -FWA streams (raw) +FWA stream network (via fwapg, ltree-enriched) │ │ gradient thresholds detect barriers @ 15 / 20 / 25 / 30 % ▼ @@ -78,16 +99,19 @@ segment is one classification unit. Breaks therefore fall at positions where the decision can change: - **Observations.** bcfishpass's per-species access models flip a - natural-barrier reach to accessible when the count of fish - observations on the upstream flow path meets a threshold. Thresholds - and species filters vary per model (see the SQL under + natural-barrier reach (gradient barrier, falls, or user-definite + barrier) to accessible when the count of upstream fish + observations meets a threshold. Thresholds and species filters + vary per model (see the SQL under [`model/access/`](https://github.com/smnorris/bcfishpass/tree/ea3c5d8/model)). Per-species parameters used by link live in the bundled `"bcfishpass"` config's [`parameters_fresh.csv`](https://github.com/NewGraphEnvironment/link/blob/main/inst/extdata/configs/bcfishpass/parameters_fresh.csv) (`observation_threshold`, `observation_date_min`, - `observation_buffer_m`, `observation_species`). For BULK (bcfishpass - commit `ea3c5d8`): + `observation_buffer_m`, `observation_species`). Override counting + is done in SQL via + [`fwa_upstream`](https://github.com/smnorris/fwapg/blob/main/sql/functions/FWA_Upstream.sql) + by `lnk_barrier_overrides`. For BULK (bcfishpass commit `ea3c5d8`): - BT — ≥ 1 observation of BT, CH, CM, CO, PK, SK, or ST; any date - CH / CM / CO / PK / SK — ≥ 5 observations in that salmon set, @@ -104,6 +128,17 @@ where the decision can change: so segmentation doesn't split reaches that would end up in the same access state. +- **User-identified definite barriers** — positions listed in + bcfishpass's + [`user_barriers_definite.csv`](https://github.com/smnorris/bcfishpass/blob/ea3c5d8/data/user_barriers_definite.csv) + (mirrored at + [`inst/extdata/configs/bcfishpass/overrides/user_barriers_definite.csv`](https://github.com/NewGraphEnvironment/link/blob/main/inst/extdata/configs/bcfishpass/overrides/user_barriers_definite.csv)). + Each row specifies `blue_line_key` and `downstream_route_measure` + for a barrier that always blocks access. Treated the same as falls + — always-blocking, always a break position, eligible for + per-species override via `lnk_barrier_overrides` when enough + upstream observations clear the threshold. + - **Habitat classification endpoints** — manual spawning / rearing delineations from bcfishpass's [`user_habitat_classification.csv`](https://github.com/smnorris/bcfishpass/blob/ea3c5d8/data/user_habitat_classification.csv) @@ -167,18 +202,21 @@ library(targets) # `_targets.R` lives in data-raw/; run from that directory. setwd("data-raw") -tar_make() # 4 WSGs, serial +tar_make() # 5 WSGs, serial rollup <- tar_read(rollup) # per-WSG × species × habitat tibble ``` `tar_make()` runs [`compare_bcfishpass_wsg()`](https://github.com/NewGraphEnvironment/link/blob/main/data-raw/compare_bcfishpass_wsg.R) -once each for Adams (ADMS), Bulkley (BULK), Babine (BABL), and Elk -(ELKR), binding the per-WSG tibbles into one rollup. Each call -exercises the six `lnk_pipeline_*` phases. All four are run so the -rollup spans the species assemblages and watershed structures used -in bcfishpass validation — BT with CH, CO, SK on ADMS; PK and ST -added on BULK and BABL; BT with WCT on ELKR. Method agreement across +once each for Adams (ADMS), Bulkley (BULK), Babine (BABL), Elk +(ELKR), and Deadman (DEAD), binding the per-WSG tibbles into one +rollup. Each call exercises the six `lnk_pipeline_*` phases. ADMS/BULK/ +BABL/ELKR span the species assemblages used in bcfishpass validation — +BT with CH, CO, SK on ADMS; PK and ST added on BULK and BABL; BT with +WCT on ELKR. DEAD is an end-to-end test for the +`barriers_definite_control` wiring: it has a single `barrier_ind = TRUE` +control row with enough anadromous observations upstream to exercise +the filter, which the other four WSGs don't. Method agreement across this spread is stronger evidence than agreement on a single WSG. ## The rollup @@ -200,7 +238,8 @@ bcfishpass_km × 100`. w <- stats::reshape(x, idvar = "species", timevar = "wsg", direction = "wide", v.names = "diff_pct") names(w)[-1] <- sub("diff_pct\\.", "", names(w)[-1]) - cols <- intersect(c("species", "ADMS", "BULK", "BABL", "ELKR"), names(w)) + cols <- intersect(c("species", "ADMS", "BULK", "BABL", "ELKR", "DEAD"), + names(w)) w <- w[order(w$species), cols] row.names(w) <- NULL w