diff --git a/CLAUDE.md b/CLAUDE.md index e74a4fe..a2d2f0c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -62,7 +62,7 @@ conn <- lnk_db_conn() # reads PG_DB_SHARE, PG_HOST_SHARE, etc. - `lnk_score(conn, crossings, method)` — `method = "severity"` for biological impact classification (high/moderate/low). `method = "rank"` for weighted multi-criteria prioritization. Threshold-driven, NULL-safe, column-agnostic. ### Rules family -- `lnk_rules_build(csv, to, edge_types)` — transforms a species habitat dimensions CSV into the rules YAML format consumed by `frs_habitat()`. Two CSVs: NGE defaults (`parameters_habitat_dimensions.csv`) and bcfishpass comparison (`parameters_habitat_dimensions_bcfishpass.csv`). +- `lnk_rules_build(csv, to, edge_types)` — transforms a species habitat dimensions CSV into the rules YAML format consumed by `frs_habitat()`. Two CSVs: newgraph defaults (`inst/extdata/parameters_habitat_dimensions.csv`) and bcfishpass comparison variant (`inst/extdata/configs/bcfishpass/dimensions.csv`). ### Barrier overrides - `lnk_barrier_overrides(conn, barriers, observations, habitat, params, to)` — processes fish observations and habitat confirmations into a barrier skip list for fresh. Counts observations upstream of each barrier via `fwa_upstream()` SQL, applies per-species thresholds, unions with habitat confirmations. Output: `(blue_line_key, downstream_route_measure, species_code)` table that fresh skips during access gating. diff --git a/DESCRIPTION b/DESCRIPTION index f7ecb06..777fad7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: link Title: Crossing Connectivity Interpretation -Version: 0.1.0 +Version: 0.2.0 Authors@R: person("Allan", "Irvine", , "airvine@newgraphenvironment.com", role = c("aut", "cre"), @@ -21,7 +21,8 @@ Depends: Imports: DBI, RPostgres, - utils + utils, + yaml Remotes: NewGraphEnvironment/fresh Suggests: diff --git a/NAMESPACE b/NAMESPACE index cc68954..02b85ba 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,9 @@ # Generated by roxygen2: do not edit by hand +S3method(print,lnk_config) export(lnk_aggregate) export(lnk_barrier_overrides) +export(lnk_config) export(lnk_db_conn) export(lnk_load) export(lnk_match) diff --git a/NEWS.md b/NEWS.md index cbca443..a087b06 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# link 0.2.0 + +Config bundles for pipeline variants. + +- Add `lnk_config(name_or_path)` — load a config bundle (rules YAML, dimensions CSV, parameters_fresh, overrides, pipeline knobs) as one list object. Bundles live at `inst/extdata/configs//` with a `config.yaml` manifest, or any directory containing `config.yaml` for custom variants ([#37](https://github.com/NewGraphEnvironment/link/issues/37)) +- Relocate bcfishpass config files into `inst/extdata/configs/bcfishpass/` (rules.yaml, dimensions.csv, parameters_fresh.csv, overrides/). All R scripts and data-raw/ references updated. + # link 0.0.0.9000 Initial release. Crossing connectivity interpretation layer — scores, diff --git a/R/lnk_barrier_overrides.R b/R/lnk_barrier_overrides.R index e1502ef..cb36b92 100644 --- a/R/lnk_barrier_overrides.R +++ b/R/lnk_barrier_overrides.R @@ -31,7 +31,7 @@ #' @param params Data frame with per-species parameters. Must have columns: #' `species_code`, `observation_threshold`, `observation_date_min`, #' `observation_buffer_m`, `observation_species`. See -#' `parameters_fresh_bcfishpass.csv` for format. +#' `configs/bcfishpass/parameters_fresh.csv` for format. #' @param cols_index Character vector. Column names to index on the #' barriers table for `fwa_upstream()` performance. Indexes are created #' `IF NOT EXISTS`. Default `c("blue_line_key", "wscode_ltree", @@ -45,8 +45,8 @@ #' \dontrun{ #' conn <- lnk_db_conn() #' -#' params <- read.csv(system.file("extdata", -#' "parameters_fresh_bcfishpass.csv", package = "link")) +#' params <- read.csv(system.file("extdata", "configs", "bcfishpass", +#' "parameters_fresh.csv", package = "link")) #' #' lnk_barrier_overrides(conn, #' barriers = "fresh.streams_breaks", diff --git a/R/lnk_config.R b/R/lnk_config.R new file mode 100644 index 0000000..09a69c0 --- /dev/null +++ b/R/lnk_config.R @@ -0,0 +1,191 @@ +#' Load a Pipeline Config Bundle +#' +#' Reads a config bundle manifest (`config.yaml`) and returns a single +#' list object containing everything a pipeline needs to classify +#' habitat for a given interpretation variant — rules YAML, parameters, +#' overrides, observation exclusions, habitat confirmations, and +#' pipeline knobs (break order, cluster params, spawn_connected rules). +#' +#' A config bundle is a directory under `inst/extdata/configs//` +#' (for bundled variants) or an arbitrary directory path (for custom +#' variants) containing `config.yaml` plus the files the manifest +#' references. All file paths in the manifest are resolved relative to +#' the bundle directory. +#' +#' The returned list is the single object passed around the pipeline +#' (e.g. into `_targets.R`), so pipeline variants become a config +#' authoring exercise, not a code fork. +#' +#' @param name_or_path Character. Either a bundled config name +#' (`"bcfishpass"`, `"default"`) or an absolute path to a config +#' directory. Bundled names resolve to `system.file("extdata", +#' "configs", name, package = "link")`. +#' +#' @return An `lnk_config` S3 list with these slots: +#' +#' - `name` — config name (from `name_or_path` or the manifest) +#' - `dir` — absolute path to the config directory +#' - `rules_yaml` — absolute path to the rules YAML (consumed by +#' `fresh::frs_habitat_classify()`) +#' - `dimensions_csv` — absolute path to the dimensions CSV (source +#' of `rules_yaml` via `lnk_rules_build()`) +#' - `parameters_fresh` — data frame of per-species fresh overrides +#' - `habitat_classification` — data frame of expert-confirmed +#' habitat endpoints (or `NULL` if the manifest does not reference +#' one) +#' - `observation_exclusions` — data frame of observation IDs to +#' skip (or `NULL`) +#' - `wsg_species` — data frame of species per watershed group (or +#' `NULL`) +#' - `overrides` — named list of data frames, one per override CSV +#' listed in the manifest +#' - `pipeline` — named list of pipeline knobs from the manifest +#' (`break_order`, `cluster`, `spawn_connected`) +#' +#' @export +#' +#' @examples +#' # Load the bundled bcfishpass variant +#' cfg <- lnk_config("bcfishpass") +#' +#' # Inspect +#' cfg$name +#' cfg$dir +#' file.exists(cfg$rules_yaml) +#' head(cfg$parameters_fresh) +#' names(cfg$overrides) +#' cfg$pipeline$break_order +#' +#' \dontrun{ +#' # Custom config: point at any directory containing config.yaml +#' my_cfg <- lnk_config("/path/to/my/variant") +#' +#' # Feed into the pipeline +#' fresh::frs_habitat_classify(conn, ..., +#' rules = cfg$rules_yaml, +#' params = cfg$parameters_fresh) +#' } +lnk_config <- function(name_or_path) { + if (!is.character(name_or_path) || length(name_or_path) != 1L) { + stop("name_or_path must be a single string", call. = FALSE) + } + + dir <- .lnk_config_resolve_dir(name_or_path) + + manifest_path <- file.path(dir, "config.yaml") + if (!file.exists(manifest_path)) { + stop("config.yaml not found in ", dir, call. = FALSE) + } + manifest <- yaml::read_yaml(manifest_path) + + required_top <- c("name", "files") + missing_top <- setdiff(required_top, names(manifest)) + if (length(missing_top) > 0L) { + stop("config.yaml missing required keys: ", + paste(missing_top, collapse = ", "), call. = FALSE) + } + + files <- manifest$files %||% list() + required_files <- c("rules_yaml", "dimensions_csv", "parameters_fresh") + missing_files <- setdiff(required_files, names(files)) + if (length(missing_files) > 0L) { + stop("config.yaml `files:` missing required entries: ", + paste(missing_files, collapse = ", "), call. = FALSE) + } + + resolve <- function(rel) { + if (is.null(rel)) return(NULL) + file.path(dir, rel) + } + + resolve_required <- function(key) { + p <- resolve(files[[key]]) + if (!file.exists(p)) { + stop("config.yaml `files$", key, "` references missing file: ", p, + call. = FALSE) + } + p + } + + read_csv_optional <- function(key) { + rel <- files[[key]] + if (is.null(rel)) return(NULL) + p <- resolve(rel) + if (!file.exists(p)) { + stop("config.yaml `files$", key, "` references missing file: ", p, + call. = FALSE) + } + utils::read.csv(p, stringsAsFactors = FALSE) + } + + overrides <- lapply(manifest$overrides %||% list(), function(rel) { + p <- resolve(rel) + if (!file.exists(p)) { + stop("config.yaml `overrides:` references missing file: ", p, + call. = FALSE) + } + utils::read.csv(p, stringsAsFactors = FALSE) + }) + + out <- list( + name = manifest$name, + dir = dir, + rules_yaml = resolve_required("rules_yaml"), + dimensions_csv = resolve_required("dimensions_csv"), + parameters_fresh = utils::read.csv(resolve_required("parameters_fresh"), + stringsAsFactors = FALSE), + habitat_classification = read_csv_optional("habitat_classification"), + observation_exclusions = read_csv_optional("observation_exclusions"), + wsg_species = read_csv_optional("wsg_species"), + overrides = overrides, + pipeline = manifest$pipeline %||% list() + ) + class(out) <- c("lnk_config", "list") + out +} + +#' @export +print.lnk_config <- function(x, ...) { + cat(" ", x$name, "\n", sep = "") + cat(" dir: ", x$dir, "\n", sep = "") + cat(" rules: ", basename(x$rules_yaml), "\n", sep = "") + cat(" dimensions: ", basename(x$dimensions_csv), "\n", sep = "") + cat(" parameters_fresh: ", nrow(x$parameters_fresh), " rows\n", sep = "") + if (length(x$overrides) > 0L) { + cat(" overrides: ", paste(names(x$overrides), collapse = ", "), + "\n", sep = "") + } + if (length(x$pipeline) > 0L) { + cat(" pipeline: ", paste(names(x$pipeline), collapse = ", "), + "\n", sep = "") + } + invisible(x) +} + +.lnk_config_resolve_dir <- function(name_or_path) { + # Heuristic: inputs containing a path separator are treated as + # filesystem paths; bare identifiers are looked up as bundled names + # first. Without this, a `bcfishpass/` directory in the current + # working directory would silently shadow the bundled config. + looks_like_path <- grepl("[/\\\\]", name_or_path) + + if (looks_like_path) { + if (!dir.exists(name_or_path)) { + stop("No config directory found at path: ", name_or_path, + call. = FALSE) + } + return(normalizePath(name_or_path, mustWork = TRUE)) + } + + bundled <- system.file("extdata", "configs", name_or_path, package = "link") + if (nzchar(bundled) && dir.exists(bundled)) { + return(normalizePath(bundled, mustWork = TRUE)) + } + + stop("No config bundle found for name: ", name_or_path, + "\n Bundled configs are in: ", + system.file("extdata", "configs", package = "link"), + "\n To load a custom config, pass an absolute or relative path", + " (must contain '/').", + call. = FALSE) +} diff --git a/R/lnk_rules_build.R b/R/lnk_rules_build.R index 5b859bb..4194042 100644 --- a/R/lnk_rules_build.R +++ b/R/lnk_rules_build.R @@ -27,10 +27,11 @@ #' to = "inst/extdata/parameters_habitat_rules.yaml" #' ) #' -#' # bcfishpass v0.5.0 comparison +#' # bcfishpass comparison variant #' lnk_rules_build( -#' csv = system.file("extdata", "parameters_habitat_dimensions_bcfishpass.csv", package = "link"), -#' to = "inst/extdata/parameters_habitat_rules_bcfishpass.yaml", +#' csv = system.file("extdata", "configs", "bcfishpass", "dimensions.csv", +#' package = "link"), +#' to = "inst/extdata/configs/bcfishpass/rules.yaml", #' edge_types = "explicit" #' ) #' } diff --git a/R/utils.R b/R/utils.R index 9c0bbbd..a1c2bd5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,6 +1,10 @@ # Internal utilities — not exported # These are the building blocks every lnk_* function uses. +#' Null-coalescing operator +#' @noRd +`%||%` <- function(x, y) if (is.null(x)) y else x # nolint: object_name_linter. + #' Execute SQL statement with error context #' @noRd .lnk_db_execute <- function(conn, sql) { diff --git a/_pkgdown.yml b/_pkgdown.yml index ea07191..7a58ac7 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -4,6 +4,11 @@ template: bootstrap: 5 reference: + - title: Configs + desc: Load a pipeline config bundle (rules, parameters, overrides) + contents: + - lnk_config + - title: Thresholds desc: Configurable scoring defaults contents: diff --git a/comms/README.md b/comms/README.md index 94fae93..3c89913 100644 --- a/comms/README.md +++ b/comms/README.md @@ -8,11 +8,11 @@ When a Claude working in one repo has a question or handoff for a Claude working ``` comms/ - / - YYYYMMDD_topic.md + / + YYYYMMDD_.md ``` -`` is the name of the other Claude's primary repo (`rtj/`, `kdot/`, `compost/`, `soul/`, etc). Subdir holds the conversation channel with that party. +`` = the other Claude's primary repo name (`rtj`, `kdot`, `soul`, `compost`, `fresh`, `link`, …). `` must be **unique within a day per peer**. If starting a second thread same-day on a related subject, pick a topic specific enough to disambiguate (e.g. `install_r_pat` vs `install_r_pkg_installer` vs `install_pak_site_lib`). ## File format @@ -21,7 +21,7 @@ comms/ from: to: topic: short description -status: open # open | answered | closed +status: open # open | closed --- ## YYYY-MM-DD — @@ -33,45 +33,65 @@ Initial message. Reply. Append, don't edit previous entries. ``` -Messages append in chronological order. Each starts with a date + sender header. `status` flips to `answered` when the other party replies, `closed` when the thread is resolved. +Status is binary: `open` (awaiting response) or `closed` (last writer is done). Either party can reopen a closed thread by appending a new message and flipping back to `open`. ## Which repo hosts the thread? -The **receiver's** repo. If soul-Claude has a question for rtj-Claude, the file lives in `rtj/comms/soul/` (hosted in rtj, channel named after the sender = soul). Rtj-Claude sees it when scanning its own repo on session start. +The **receiver's** repo. If soul-Claude has a question for rtj-Claude, the file lives in `rtj/comms/soul/` (hosted in rtj, channel named after the sender = soul). -This means writing a cross-repo message requires having the other repo cloned locally. All NGE Claudes do. +Writing a cross-repo message requires having the other repo cloned locally. All NGE Claudes do. -## Discovery +## Discovery (both sides) -On session start, scan `comms/` in this repo. Any file with `status: open` and a mtime newer than your last commit touching `comms/` is mail you haven't answered yet. Surface it to the user before starting other work: _"there's an open thread from rtj about X — handle it first, or defer?"_ +Both receivers and senders scan for active threads on session start: -## Commit conventions +- **Inbound (receiver-side):** scan `/comms/*/` for files with `status: open` and mtime newer than your last comms commit. That's mail for you. +- **Outbound (sender-side):** for each peer, scan `/comms//*.md` for files with `from: , status: open`. That's your un-answered sent mail. -- **One commit per appended message.** In practice = one per session per thread — each Claude writes one turn before yielding. -- **Push immediately.** Comms is broken if unpushed — the other Claude cannot see your message until you push. Never end a session with an un-pushed comms commit. -- **Status flips bundle with the message that caused them.** `open → answered` lands in the same commit as the reply that answered it. No separate status-only commit unless you're closing a truly stale thread with no final reply. -- **Code + comms = separate commits.** Code commits do the work; the comms commit appends the thread reply and references the code commit SHAs in its body. Keeps the comms log lightweight. +The peer list (who to scan) is maintained in `soul/conventions/comms.md` and injected into each comms-enabled repo's `CLAUDE.md`. -### Commit message format +If either scan surfaces open threads, flag to the user before starting other work: _"open thread from rtj about X / un-answered thread I sent to kdot about Y — handle first or defer?"_ -``` -comms(→rtj): handoff M1 bootstrap # outbound -comms(←rtj): reply re: allowlist completeness # inbound -comms: close thread on distributed-fwapg # meta (close/rename/reopen) -``` +## Commit conventions -- `→` = outbound (opening a thread, or sending in a thread hosted elsewhere) -- `←` = inbound (replying in a thread hosted in your own repo) -- No arrow = meta operation (close, reopen, rename) -- Co-Author tag as usual. +| Action | Commit subject | +|---|---| +| Open or add to a thread hosted in a peer's repo | `comms(→peer): topic` | +| Reply to a thread hosted in your own repo | `comms(←peer): topic` | +| Meta (close, reopen, rename, README change) | `comms: topic` | + +**Arrow points to the repo whose `comms/` contains the file you committed.** `→peer` = file lives in peer's repo. `←peer` = file lives in your own repo. + +- **One commit per appended message.** In practice = one per session per thread. +- **Push immediately.** Comms is broken if unpushed — the other Claude can't see your message. +- **Status flips bundle with the message that caused them.** Same commit as the reply. +- **Code + comms = separate commits.** Code commits do the work; comms commit appends the thread reply and references the code commit SHAs in its body. ### Reopening -Rare. Append a message, flip `status: closed → open`, one commit: `comms(←rtj): reopen — `. If the follow-up is really a new topic, start a new thread file instead. +Append a message, flip `status: closed → open`, one commit with the appropriate arrow. If the follow-up is really a new topic, start a new thread file instead and cross-reference the earlier one via relative link: `[install_r_pat](./20260422_install_r_pat.md)`. + +## Worked example + +A real four-turn thread from 2026-04-22, rtj ↔ kdot: + +**Turn 1 — rtj opens a question for kdot.** Creates `kdot/comms/rtj/20260422_install_r_pat.md` with `status: open` and a `## 2026-04-22 — rtj` section containing the question. +Commit (in kdot repo): `comms(→kdot): install_r.sh — which PAT path` + +**Turn 2 — kdot replies and closes.** Appends `## 2026-04-22 — kdot` with the answer and shipped-commit refs. Flips `status: closed`. +Commit (in kdot repo): `comms(←rtj): install_r.sh — canonical PAT via .Renviron` + +**Turn 3 — rtj reopens with pushback.** Appends `## 2026-04-22 — rtj (reopening)` with the design objection. Flips `status: open`. +Commit (in kdot repo): `comms(→kdot): reopen — push back on silent R auto-upgrade` + +**Turn 4 — kdot accepts and re-closes.** Appends `## 2026-04-22 — kdot` with acceptance + shipped fix. Flips `status: closed`. +Commit (in kdot repo): `comms(←rtj): accept pushback, --upgrade flag added` + +One file. Four commits. Clean audit trail. Full worked-out example at `kdot/comms/rtj/20260422_install_r_private_repo_pat.md`. ## Etiquette - Append, don't rewrite. The thread is the audit trail. -- Close the loop: flip `status` to `answered` or `closed` when you reply. -- Keep it short. If it needs a long discussion, open a GitHub issue and link to it from the thread. -- One topic per file. Start a new thread for a new question. +- Close the loop: flip `status` to `closed` when you're done. +- Keep threads short. If a discussion needs length, open a GitHub issue and link to it from the thread. +- One topic per thread file. If a follow-up surfaces a genuinely new topic, start a new file and cross-reference via relative link. diff --git a/data-raw/build_rules.R b/data-raw/build_rules.R index 18834a8..094f6c6 100644 --- a/data-raw/build_rules.R +++ b/data-raw/build_rules.R @@ -6,15 +6,15 @@ # Usage: # source("data-raw/build_rules.R") -# NGE defaults (categories: stream, canal, wetland) +# newgraph defaults (categories: stream, canal, wetland) link::lnk_rules_build( csv = "inst/extdata/parameters_habitat_dimensions.csv", to = "inst/extdata/parameters_habitat_rules.yaml" ) -# bcfishpass v0.5.0 comparison (explicit edge_type integers) +# bcfishpass comparison variant (explicit edge_type integers) link::lnk_rules_build( - csv = "inst/extdata/parameters_habitat_dimensions_bcfishpass.csv", - to = "inst/extdata/parameters_habitat_rules_bcfishpass.yaml", + csv = "inst/extdata/configs/bcfishpass/dimensions.csv", + to = "inst/extdata/configs/bcfishpass/rules.yaml", edge_types = "explicit" ) diff --git a/data-raw/compare_adms.R b/data-raw/compare_adms.R index 0e0b4d8..ca38427 100644 --- a/data-raw/compare_adms.R +++ b/data-raw/compare_adms.R @@ -214,8 +214,8 @@ falls_spec <- list( ) # bcfishpass-matching params -rules_path <- "inst/extdata/parameters_habitat_rules_bcfishpass.yaml" -params_fresh_path <- "inst/extdata/parameters_fresh_bcfishpass.csv" +rules_path <- "inst/extdata/configs/bcfishpass/rules.yaml" +params_fresh_path <- "inst/extdata/configs/bcfishpass/parameters_fresh.csv" params_fresh_df <- read.csv(params_fresh_path, stringsAsFactors = FALSE) # Match bcfishpass: exclude subsurface flow (6010) and placeholder streams (999) diff --git a/data-raw/compare_bcfishpass.R b/data-raw/compare_bcfishpass.R index a86d0d4..135b920 100644 --- a/data-raw/compare_bcfishpass.R +++ b/data-raw/compare_bcfishpass.R @@ -21,9 +21,10 @@ wsg <- if (length(commandArgs(TRUE)) > 0) commandArgs(TRUE)[1] else "ADMS" # Sub-basin AOI for fast iteration (NULL = full WSG) aoi <- NULL # aoi <- "wscode_ltree <@ '400.431358'::ltree" # ~300 segments, fast -bcfishpass_data <- system.file("extdata", "bcfishpass", package = "link") -rules_path <- system.file("extdata", "parameters_habitat_rules_bcfishpass.yaml", package = "link") -params_fresh_path <- system.file("extdata", "parameters_fresh_bcfishpass.csv", package = "link") +cfg <- link::lnk_config("bcfishpass") +bcfishpass_data <- file.path(cfg$dir, "overrides") +rules_path <- cfg$rules_yaml +params_fresh_df <- cfg$parameters_fresh # Species from wsg_species_presence — same source bcfishpass uses wsg_spp <- read.csv(system.file("extdata", "wsg_species_presence.csv", package = "fresh"), @@ -34,10 +35,8 @@ species_compare <- toupper(spp_cols[vapply(spp_cols, function(x) identical(wsg_row[[x]], "t"), logical(1))]) # Species to model: from habitat dimensions CSV (defines which species this config covers) # Intersect with WSG species presence to get species for this specific watershed -dims <- read.csv(system.file("extdata", "parameters_habitat_dimensions_bcfishpass.csv", - package = "link"), stringsAsFactors = FALSE) +dims <- read.csv(cfg$dimensions_csv, stringsAsFactors = FALSE) species_compare <- intersect(unique(dims$species), species_compare) -params_fresh_df <- read.csv(params_fresh_path, stringsAsFactors = FALSE) message("Species for ", wsg, ": ", paste(species_compare, collapse = ", ")) # =========================================================================== diff --git a/inst/extdata/configs/bcfishpass/README.md b/inst/extdata/configs/bcfishpass/README.md new file mode 100644 index 0000000..efa8c06 --- /dev/null +++ b/inst/extdata/configs/bcfishpass/README.md @@ -0,0 +1,36 @@ +# bcfishpass config + +Reproduces bcfishpass output exactly for regression. All 4 watershed groups (ADMS, BULK, BABL, ELKR) are within 5% of bcfishpass when this config drives the pipeline. + +## What is in here + +| File | Role | +|------|------| +| `config.yaml` | Manifest — points at everything below, plus pipeline parameters | +| `rules.yaml` | Built rules YAML (consumed by `frs_habitat_classify()`). Regenerate from `dimensions.csv` via `lnk_rules_build()` | +| `dimensions.csv` | Source of `rules.yaml` — species × habitat biology encoded for bcfishpass-match | +| `parameters_fresh.csv` | Per-species fresh overrides (spawn_gradient_min, observation_threshold, etc.) | +| `overrides/` | Synced from `smnorris/bcfishpass/data/` — expert-curated corrections + confirmed habitat + observation exclusions | + +## What NOT to do here + +- Do not hand-edit `rules.yaml` — edit `dimensions.csv` and run `lnk_rules_build()` +- Do not hand-edit files under `overrides/` — they are synced from bcfishpass upstream; file your correction there instead (see [smnorris/bcfishpass](https://github.com/smnorris/bcfishpass)) + +## Regenerating rules.yaml + +```r +link::lnk_rules_build( + csv = system.file("extdata", "configs", "bcfishpass", "dimensions.csv", + package = "link"), + to = "inst/extdata/configs/bcfishpass/rules.yaml", + edge_types = "explicit" +) +``` + +See `data-raw/build_rules.R` for the canonical invocation. + +## See also + +- `research/bcfishpass_comparison.md` — pipeline DAG and per-WSG results +- [link#37](https://github.com/NewGraphEnvironment/link/issues/37) — `lnk_config` loader that consumes `config.yaml` diff --git a/inst/extdata/configs/bcfishpass/config.yaml b/inst/extdata/configs/bcfishpass/config.yaml new file mode 100644 index 0000000..dd10125 --- /dev/null +++ b/inst/extdata/configs/bcfishpass/config.yaml @@ -0,0 +1,31 @@ +name: bcfishpass +description: | + Validation config — reproduces bcfishpass output exactly for regression. + Used as the reference against which newgraph defaults and other pipeline + variants are compared. Do not modify without running the full comparison + suite across ADMS, BULK, BABL, ELKR. +files: + rules_yaml: rules.yaml + dimensions_csv: dimensions.csv + parameters_fresh: parameters_fresh.csv + habitat_classification: overrides/user_habitat_classification.csv + observation_exclusions: overrides/observation_exclusions.csv + wsg_species: overrides/wsg_species_presence.csv +overrides: + modelled_fixes: overrides/user_modelled_crossing_fixes.csv + pscis_barrier_status: overrides/user_pscis_barrier_status.csv + pscis_xref: overrides/pscis_modelledcrossings_streams_xref.csv + barriers_definite: overrides/user_barriers_definite.csv + barriers_definite_control: overrides/user_barriers_definite_control.csv + crossings_misc: overrides/user_crossings_misc.csv +pipeline: + break_order: + - observations + - gradient_minimal + - habitat_endpoints + - crossings + cluster: + three_phase: true + spawn_connected: + SK: + gradient_max: 0.05 diff --git a/inst/extdata/parameters_habitat_dimensions_bcfishpass.csv b/inst/extdata/configs/bcfishpass/dimensions.csv similarity index 100% rename from inst/extdata/parameters_habitat_dimensions_bcfishpass.csv rename to inst/extdata/configs/bcfishpass/dimensions.csv diff --git a/inst/extdata/bcfishpass/cabd_additions.csv b/inst/extdata/configs/bcfishpass/overrides/cabd_additions.csv similarity index 100% rename from inst/extdata/bcfishpass/cabd_additions.csv rename to inst/extdata/configs/bcfishpass/overrides/cabd_additions.csv diff --git a/inst/extdata/bcfishpass/cabd_blkey_xref.csv b/inst/extdata/configs/bcfishpass/overrides/cabd_blkey_xref.csv similarity index 100% rename from inst/extdata/bcfishpass/cabd_blkey_xref.csv rename to inst/extdata/configs/bcfishpass/overrides/cabd_blkey_xref.csv diff --git a/inst/extdata/bcfishpass/cabd_exclusions.csv b/inst/extdata/configs/bcfishpass/overrides/cabd_exclusions.csv similarity index 100% rename from inst/extdata/bcfishpass/cabd_exclusions.csv rename to inst/extdata/configs/bcfishpass/overrides/cabd_exclusions.csv diff --git a/inst/extdata/bcfishpass/cabd_passability_status_updates.csv b/inst/extdata/configs/bcfishpass/overrides/cabd_passability_status_updates.csv similarity index 100% rename from inst/extdata/bcfishpass/cabd_passability_status_updates.csv rename to inst/extdata/configs/bcfishpass/overrides/cabd_passability_status_updates.csv diff --git a/inst/extdata/bcfishpass/dfo_known_sockeye_lakes.csv b/inst/extdata/configs/bcfishpass/overrides/dfo_known_sockeye_lakes.csv similarity index 100% rename from inst/extdata/bcfishpass/dfo_known_sockeye_lakes.csv rename to inst/extdata/configs/bcfishpass/overrides/dfo_known_sockeye_lakes.csv diff --git a/inst/extdata/bcfishpass/observation_exclusions.csv b/inst/extdata/configs/bcfishpass/overrides/observation_exclusions.csv similarity index 100% rename from inst/extdata/bcfishpass/observation_exclusions.csv rename to inst/extdata/configs/bcfishpass/overrides/observation_exclusions.csv diff --git a/inst/extdata/bcfishpass/pscis_modelledcrossings_streams_xref.csv b/inst/extdata/configs/bcfishpass/overrides/pscis_modelledcrossings_streams_xref.csv similarity index 100% rename from inst/extdata/bcfishpass/pscis_modelledcrossings_streams_xref.csv rename to inst/extdata/configs/bcfishpass/overrides/pscis_modelledcrossings_streams_xref.csv diff --git a/inst/extdata/bcfishpass/user_barriers_definite.csv b/inst/extdata/configs/bcfishpass/overrides/user_barriers_definite.csv similarity index 100% rename from inst/extdata/bcfishpass/user_barriers_definite.csv rename to inst/extdata/configs/bcfishpass/overrides/user_barriers_definite.csv diff --git a/inst/extdata/bcfishpass/user_barriers_definite_control.csv b/inst/extdata/configs/bcfishpass/overrides/user_barriers_definite_control.csv similarity index 100% rename from inst/extdata/bcfishpass/user_barriers_definite_control.csv rename to inst/extdata/configs/bcfishpass/overrides/user_barriers_definite_control.csv diff --git a/inst/extdata/bcfishpass/user_crossings_misc.csv b/inst/extdata/configs/bcfishpass/overrides/user_crossings_misc.csv similarity index 100% rename from inst/extdata/bcfishpass/user_crossings_misc.csv rename to inst/extdata/configs/bcfishpass/overrides/user_crossings_misc.csv diff --git a/inst/extdata/bcfishpass/user_habitat_classification.csv b/inst/extdata/configs/bcfishpass/overrides/user_habitat_classification.csv similarity index 100% rename from inst/extdata/bcfishpass/user_habitat_classification.csv rename to inst/extdata/configs/bcfishpass/overrides/user_habitat_classification.csv diff --git a/inst/extdata/bcfishpass/user_modelled_crossing_fixes.csv b/inst/extdata/configs/bcfishpass/overrides/user_modelled_crossing_fixes.csv similarity index 100% rename from inst/extdata/bcfishpass/user_modelled_crossing_fixes.csv rename to inst/extdata/configs/bcfishpass/overrides/user_modelled_crossing_fixes.csv diff --git a/inst/extdata/bcfishpass/user_modelled_crossing_fixes_20240825.csv b/inst/extdata/configs/bcfishpass/overrides/user_modelled_crossing_fixes_20240825.csv similarity index 100% rename from inst/extdata/bcfishpass/user_modelled_crossing_fixes_20240825.csv rename to inst/extdata/configs/bcfishpass/overrides/user_modelled_crossing_fixes_20240825.csv diff --git a/inst/extdata/bcfishpass/user_pscis_barrier_status.csv b/inst/extdata/configs/bcfishpass/overrides/user_pscis_barrier_status.csv similarity index 100% rename from inst/extdata/bcfishpass/user_pscis_barrier_status.csv rename to inst/extdata/configs/bcfishpass/overrides/user_pscis_barrier_status.csv diff --git a/inst/extdata/bcfishpass/wcrp_watersheds.csv b/inst/extdata/configs/bcfishpass/overrides/wcrp_watersheds.csv similarity index 100% rename from inst/extdata/bcfishpass/wcrp_watersheds.csv rename to inst/extdata/configs/bcfishpass/overrides/wcrp_watersheds.csv diff --git a/inst/extdata/bcfishpass/wsg_species_presence.csv b/inst/extdata/configs/bcfishpass/overrides/wsg_species_presence.csv similarity index 100% rename from inst/extdata/bcfishpass/wsg_species_presence.csv rename to inst/extdata/configs/bcfishpass/overrides/wsg_species_presence.csv diff --git a/inst/extdata/parameters_fresh_bcfishpass.csv b/inst/extdata/configs/bcfishpass/parameters_fresh.csv similarity index 100% rename from inst/extdata/parameters_fresh_bcfishpass.csv rename to inst/extdata/configs/bcfishpass/parameters_fresh.csv diff --git a/inst/extdata/parameters_habitat_rules_bcfishpass.yaml b/inst/extdata/configs/bcfishpass/rules.yaml similarity index 100% rename from inst/extdata/parameters_habitat_rules_bcfishpass.yaml rename to inst/extdata/configs/bcfishpass/rules.yaml diff --git a/man/lnk_barrier_overrides.Rd b/man/lnk_barrier_overrides.Rd index fecbb33..e585691 100644 --- a/man/lnk_barrier_overrides.Rd +++ b/man/lnk_barrier_overrides.Rd @@ -46,7 +46,7 @@ be overridden.} \item{params}{Data frame with per-species parameters. Must have columns: \code{species_code}, \code{observation_threshold}, \code{observation_date_min}, \code{observation_buffer_m}, \code{observation_species}. See -\code{parameters_fresh_bcfishpass.csv} for format.} +\code{configs/bcfishpass/parameters_fresh.csv} for format.} \item{cols_index}{Character vector. Column names to index on the barriers table for \code{fwa_upstream()} performance. Indexes are created @@ -74,8 +74,8 @@ output as a simple skip list via \code{barrier_overrides}. \dontrun{ conn <- lnk_db_conn() -params <- read.csv(system.file("extdata", - "parameters_fresh_bcfishpass.csv", package = "link")) +params <- read.csv(system.file("extdata", "configs", "bcfishpass", + "parameters_fresh.csv", package = "link")) lnk_barrier_overrides(conn, barriers = "fresh.streams_breaks", diff --git a/man/lnk_config.Rd b/man/lnk_config.Rd new file mode 100644 index 0000000..ac49c61 --- /dev/null +++ b/man/lnk_config.Rd @@ -0,0 +1,76 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lnk_config.R +\name{lnk_config} +\alias{lnk_config} +\title{Load a Pipeline Config Bundle} +\usage{ +lnk_config(name_or_path) +} +\arguments{ +\item{name_or_path}{Character. Either a bundled config name +(\code{"bcfishpass"}, \code{"default"}) or an absolute path to a config +directory. Bundled names resolve to \code{system.file("extdata", "configs", name, package = "link")}.} +} +\value{ +An \code{lnk_config} S3 list with these slots: +\itemize{ +\item \code{name} — config name (from \code{name_or_path} or the manifest) +\item \code{dir} — absolute path to the config directory +\item \code{rules_yaml} — absolute path to the rules YAML (consumed by +\code{fresh::frs_habitat_classify()}) +\item \code{dimensions_csv} — absolute path to the dimensions CSV (source +of \code{rules_yaml} via \code{lnk_rules_build()}) +\item \code{parameters_fresh} — data frame of per-species fresh overrides +\item \code{habitat_classification} — data frame of expert-confirmed +habitat endpoints (or \code{NULL} if the manifest does not reference +one) +\item \code{observation_exclusions} — data frame of observation IDs to +skip (or \code{NULL}) +\item \code{wsg_species} — data frame of species per watershed group (or +\code{NULL}) +\item \code{overrides} — named list of data frames, one per override CSV +listed in the manifest +\item \code{pipeline} — named list of pipeline knobs from the manifest +(\code{break_order}, \code{cluster}, \code{spawn_connected}) +} +} +\description{ +Reads a config bundle manifest (\code{config.yaml}) and returns a single +list object containing everything a pipeline needs to classify +habitat for a given interpretation variant — rules YAML, parameters, +overrides, observation exclusions, habitat confirmations, and +pipeline knobs (break order, cluster params, spawn_connected rules). +} +\details{ +A config bundle is a directory under \verb{inst/extdata/configs//} +(for bundled variants) or an arbitrary directory path (for custom +variants) containing \code{config.yaml} plus the files the manifest +references. All file paths in the manifest are resolved relative to +the bundle directory. + +The returned list is the single object passed around the pipeline +(e.g. into \verb{_targets.R}), so pipeline variants become a config +authoring exercise, not a code fork. +} +\examples{ +# Load the bundled bcfishpass variant +cfg <- lnk_config("bcfishpass") + +# Inspect +cfg$name +cfg$dir +file.exists(cfg$rules_yaml) +head(cfg$parameters_fresh) +names(cfg$overrides) +cfg$pipeline$break_order + +\dontrun{ +# Custom config: point at any directory containing config.yaml +my_cfg <- lnk_config("/path/to/my/variant") + +# Feed into the pipeline +fresh::frs_habitat_classify(conn, ..., + rules = cfg$rules_yaml, + params = cfg$parameters_fresh) +} +} diff --git a/man/lnk_rules_build.Rd b/man/lnk_rules_build.Rd index a2fa550..895672b 100644 --- a/man/lnk_rules_build.Rd +++ b/man/lnk_rules_build.Rd @@ -45,10 +45,11 @@ lnk_rules_build( to = "inst/extdata/parameters_habitat_rules.yaml" ) -# bcfishpass v0.5.0 comparison +# bcfishpass comparison variant lnk_rules_build( - csv = system.file("extdata", "parameters_habitat_dimensions_bcfishpass.csv", package = "link"), - to = "inst/extdata/parameters_habitat_rules_bcfishpass.yaml", + csv = system.file("extdata", "configs", "bcfishpass", "dimensions.csv", + package = "link"), + to = "inst/extdata/configs/bcfishpass/rules.yaml", edge_types = "explicit" ) } diff --git a/planning/active/findings.md b/planning/active/findings.md index cfdb695..f803efd 100644 --- a/planning/active/findings.md +++ b/planning/active/findings.md @@ -1,183 +1,89 @@ -# Findings - -## Confirmed: what matches bcfishpass - -### Base segments -`localcode_ltree IS NOT NULL`, `edge_type != 6010`, `wscode_ltree <@ '999' IS FALSE`. 10,458 segments on ADMS — exact match. - -### Gradient barrier detection -50,063 barriers across 8 classes, matching to 3/50,063. Island grouping, vertex extraction, 100m lookahead — identical algorithm. - -### Non-minimal barrier removal -`fwa_upstream` self-join deletes barriers with another barrier downstream. ADMS: 27,443 → 677. Without this: +149% segments. - -### Sequential breaking -Observations → gradient barriers → habitat endpoints → crossings. GENERATED columns recompute. 1m guard. Matches bcfishpass `break_streams()`. - -### Access model -bcfishpass uses ONLY natural barriers for access (`barriers_bt_dnstr`, `barriers_ch_cm_co_pk_sk_dnstr`, etc.). Anthropogenic barriers (crossings) are recorded but do NOT block access. Our `label_block = "blocked"` is correct. Tested `label_block = c("blocked", "barrier", "potential")` — caused -52% across all species. - -### Channel width -Synced from tunnel (75,736 field measurements). Tightened results ~0.5%. - -### Observation filtering -Species filtered by `wsg_species_presence.csv`. CT remaps (CCT, ACT, CT/RB). 179 vs 178 unique positions on ADMS. - -### Habitat endpoints -Both `downstream_route_measure` and `upstream_route_measure` as break positions. 143 vs 145 on ADMS. - -### SK rearing -Exact match on all WSGs (0.0%). - -## Confirmed: what doesn't match - -### ST spawning/rearing -22%/-25% (BABL) — ROOT CAUSE FOUND -Segment-level comparison: 223 bcfishpass-only ST spawning segments. 382 of 383 overlapping segments are **inaccessible in our system**. The falls at BLK 360886207 DRM 4127 blocks them. Overridden for BT/CH/CM/CO/PK/SK but NOT for ST. - -Cause: `observation_species` in `parameters_fresh_bcfishpass.csv` was `"ST"` (only ST obs counted). bcfishpass `model_access_st.sql` counts ALL salmon + steelhead: `'CH','CM','CO','PK','SK','ST'` with threshold >= 5 post-1990. Zero ST obs upstream but salmon obs exist → barrier stays for ST in our system but gets removed in bcfishpass. - -Fix: changed `observation_species` for ST from `"ST"` to `"CH;CM;CO;PK;SK;ST"`. - -**Result: ST spawning -22.0% → +3.8%, ST rearing -25.4% → +2.4%.** One CSV cell. - -### WCT observation override missing -bcfishpass `model_access_wct.sql` uses WCT-only observations with threshold = 1 (any WCT obs removes barrier). Our CSV had `observation_threshold = NA` (no override). Fixed to threshold = 1, species = WCT. - -**Result: WCT spawning -3.4% → +4.0%, WCT rearing -4.2% → +3.0%.** 685 barriers overridden on ELKR. - -### SK spawning -22.6% (BULK, after ST/WCT fix) -Segment-level comparison: 13 bcfishpass-only segments (7.26 km), 9 ours-only (2.16 km). All bcfishpass-only segments are accessible in our system — it's not access. They're on 3 BLKs near rearing lakes (edge_type 1050/1200 wetland/lake + 1000 stream), low gradient, good channel width. The downstream trace from rearing lakes in bcfishpass reaches these segments but our `frs_connected_spawning` doesn't. - -This is a boundary effect at the 3km distance cap. The bcfishpass-only segments on BLK 360846413 are 3.0-3.4 km from the rearing lake outlet (our outlet DRM 9718, segments start at DRM 6362). Different segment boundaries resolve the cumulative distance slightly differently — some segments fall just inside 3km in bcfishpass but just outside in our system. - -**Root cause found and proven:** `.frs_connected_spawning` line 1385 picks lake outlets with `ORDER BY s2.downstream_route_measure ASC`. bcfishpass uses `ORDER BY s.waterbody_key, s.wscode_ltree, s.localcode_ltree, s.downstream_route_measure`. DRM ordering picks an arbitrary segment on any BLK with the smallest measure. wscode ordering picks the actual network-topological outlet. - -Example: waterbody_key 329064462 spans 10 BLKs. Fresh picks BLK 360504780 DRM 0 (wrong tributary). bcfishpass picks BLK 360846413 DRM 9718 (actual outlet). Downstream trace from the wrong outlet misses 7+ km of spawning habitat. - -**Outlet ordering fix (PR #152):** Corrected and merged in fresh 0.13.5. The outlet ordering and partition are now correct. - -**Remaining -9.6% (BULK):** 9 segments in the downstream trace that don't meet spawning classification thresholds (3 are edge_type 1050 wetland, 3 have cw < 2.0, 1 has gradient > 0.025). The subtractive filter correctly excludes them. - -bcfishpass applies spawning thresholds INSIDE the downstream trace CTE (`load_habitat_linear_sk.sql` lines 100-114 — checks gradient, channel_width, access within the CASE statement). Fresh applies thresholds via `frs_habitat_classify` BEFORE connected_waterbody runs, and the subtractive filter only KEEPS segments already flagged as spawning. - -**Proven additive fix:** After subtractive step, ADD spawning for segments in the downstream trace that meet permissive connected rules (gradient <= 0.05, no cw min, no edge_type filter, accessible). Result: 63 → 71 segments, 22.04 → 24.22 km (bcfishpass 24.38 km). From -9.6% to -0.7%. - -The `spawn_connected` rules in the YAML define the permissive thresholds for the trace zone. Direction = downstream. The additive step applies these after the standard subtractive filter. Generalizable to any species + waterbody type. - -The ST/WCT observation_species fix improved SK from -39.9% to -22.6% by opening access at barriers that previously blocked salmon-accessible habitat. - -### BT rearing +5.4% to +7.1% (BULK, ELKR) -Slightly over. Stream order exception added more segments (+7.1% on ELKR). May be from segment boundary differences or classification predicates we're applying that bcfishpass doesn't. - -### WCT -3.4% spawning, -4.2% rearing (ELKR) -Stream order exception closed 1 point on rearing. Same unknown cause as ST but less severe. - -## Tested and eliminated as ST/WCT cause - -| Hypothesis | Test | Result | -|-----------|------|--------| -| Per-model non-minimal removal | Built bt/salmon/st/wct barrier tables, removed non-minimal within each | No change on ST/WCT | -| Crossings blocking access | `label_block = c("blocked", "barrier", "potential")` | -52% all species, wrong | -| Stream order rearing bypass | Post-classification UPDATE for stream_order=1, parent>=5 | +3 points ST rearing, not main cause | -| Gradient/channel_width thresholds | Compared params_obj vs tunnel parameters_habitat_thresholds | Exact match | -| Access gating mechanism | Compared access counts | Close (12,728 vs 11,673 ST accessible on BABL) | - -## Bugs found - -### bcfishpass access_st checks SK instead of ST -`load_streams_access.sql` line 120: `'SK' = any(obsrvtn_species_codes_upstr)` should be `'ST'`. Copy-paste from SK block. Filed NewGraphEnvironment/bcfishpass#9, referenced in link#33. Does not affect access blocking, only the access code label (1 vs 2). - -## Methodology lessons - -### Segment-level comparison finds root causes in minutes -Guessing from SQL differences wasted hours — tested per-model non-minimal, label_block, stream order exception, three-phase rearing. None were the cause. Dumping bcfishpass segments to a local table, diffing by spatial overlap, and checking accessibility on mismatches found the real cause (wrong observation_species) in one query chain. Do this first next time. - -### One CSV cell can account for -22% -The ST gap was entirely from `observation_species = "ST"` instead of `"CH;CM;CO;PK;SK;ST"` in `parameters_fresh_bcfishpass.csv`. Always verify per-species params against the actual bcfishpass SQL before guessing at architectural causes. - -### Read the per-model SQL, don't assume symmetry -Each bcfishpass model_access_*.sql has different observation species lists and thresholds. BT counts all salmon+steelhead (threshold 1). Salmon counts salmon only (threshold 5, post-1990). ST counts all salmon+steelhead (threshold 5, post-1990). WCT counts WCT only (threshold 1, any date). Don't assume they're all the same. - -### Network topology ordering matters for spatial queries -`ORDER BY downstream_route_measure` picks an arbitrary segment with the smallest measure on any BLK. `ORDER BY wscode_ltree, localcode_ltree, downstream_route_measure` picks the actual network-topological position. For lake outlets spanning multiple BLKs, these give completely different results. DRM is a measure within a BLK — it says nothing about where that BLK sits in the network. wscode is the network position. Any DISTINCT ON query that needs "the most downstream point" must use wscode ordering, not DRM. - -### Prove before filing -We guessed three times at the SK cause (per-model non-minimal, label_block, boundary effect) and filed/commented on fresh#147 prematurely. Each guess wasted time and muddied the issue. The segment comparison found the real cause (wrong outlet ordering) in minutes. The proof query (24.41 vs 24.38 km) took one SQL statement. Always prove with data before attributing a cause. - -## Unverified hypotheses - -### Rearing waterbody filter OR vs AND -bcfishpass rearing: `wb.waterbody_type = 'R' OR (wb.waterbody_type IS NULL OR edge_type IN (...))` — permissive, includes NULL waterbody. -bcfishpass spawning: `wb.waterbody_type = 'R' OR (wb.waterbody_type IS NULL AND edge_type IN (...))` — restrictive. -Not verified whether our rules produce the same SQL. Could add rearing segments on non-standard edge types. - -### Three-phase rearing -bcfishpass rearing runs 3 phases for BT/CH/CO/ST/WCT: -1. On spawning streams (spawning AND rearing thresholds, no connectivity) -2. Downstream of spawning (cluster + fwa_upstream trace) -3. Upstream of spawning (cluster + fwa_downstream trace, 10km, 5% gradient bridge) - -Our frs_cluster does a single pass removing disconnected rearing. Not verified whether the three-phase approach classifies more rearing. - -### Stream order exception across species -Applied to BT, CH, CO, ST, WCT in bcfishpass. Tested as post-classification UPDATE. Adds segments but not enough to explain gaps. CM, PK, SK do not have the exception. CM/PK have spawning only (no rearing model). - -## Results summary (latest: ST/WCT obs fix + stream order exception) - -BULK and ELKR updated with ST/WCT observation_species fix. SK spawning proven +0.1% with outlet ordering fix (not yet in fresh). - -| Species | Metric | ADMS | BULK | BABL | ELKR | -|---------|--------|------|------|------|------| -| BT | spawn | +1.8% | +3.1% | +4.1% | +3.4% | -| BT | rear | -0.3% | +1.3% | +0.4% | +1.7% | -| CH | spawn | +0.5% | +1.9% | +3.8% | — | -| CH | rear | +2.1% | +6.0% | +6.1% | — | -| CO | spawn | +1.6% | +3.1% | +4.8% | — | -| CO | rear | -1.0% | +0.9% | +0.0% | — | -| PK | spawn | — | +2.3% | — | — | -| SK | spawn | +2.6% | -22.6% (proven +0.1% with fix) | -13.6% | — | -| SK | rear | +0.0% | +0.0% | +0.0% | — | -| ST | spawn | — | +1.9% | +3.8% | — | -| ST | rear | — | +3.6% | +2.4% | — | -| WCT | spawn | — | — | — | +4.0% | -| WCT | rear | — | — | — | +3.0% | - -### BT rearing +7% — ROOT CAUSE FOUND -Segment comparison: 646 ours-only segments (224 km) vs 36 bcfishpass-only (8.6 km). We over-classify. BT `cluster_rearing = FALSE` in our params — no rearing connectivity filter. bcfishpass applies three-phase rearing connectivity to BT (on-spawning, downstream, upstream). 224 km of disconnected rearing included in our results. - -Fix: set BT `cluster_rearing = TRUE`. **Result: BT rearing BULK +7.0% → +1.3%, ELKR +7.1% → +1.7%, BABL +2.8% → +0.4%, ADMS +2.6% → -0.3%.** All within 5%. - -### CH rearing +6.0% BULK, +6.1% BABL — frs_cluster upstream boolean too permissive -Segment comparison: 442 ours-only (103 km) vs 26 bcfishpass-only (8.5 km). Breakdown: -- 14.6 km from stream order exception (we add rearing, bcfishpass doesn't at this stage) -- 46.7 km on BLKs with CH spawning — should be "on-spawning rearing" in bcfishpass but isn't classified -- 41.6 km on BLKs WITHOUT CH spawning — our frs_cluster considers connected, bcfishpass three-phase doesn't reach - -Our frs_cluster connects rearing to spawning via network proximity (upstream/downstream within bridge_gradient + bridge_distance). bcfishpass three-phase is more restrictive: on-spawning only (Phase 1), then downstream clusters connected to spawning (Phase 2), then upstream clusters within 10km + 5% gradient bridge (Phase 3). - -**Root cause confirmed:** frs_cluster checks network proximity (is rearing upstream/downstream of spawning within bridge_distance?). bcfishpass Phase 3 traces downstream from each rearing cluster and STOPS at the first >5% gradient — rearing above a steep section doesn't count even if spawning exists below. - -97 of 180 same-BLK CH ours-only segments have a >5% gradient between them and the nearest downstream spawning. bcfishpass excludes them; frs_cluster includes them. - -This is a real difference in how connectivity is evaluated. frs_cluster checks "is there spawning within range?" bcfishpass checks "can fish get from this rearing to the spawning without crossing a gradient barrier?" The 5% gradient bridge in frs_cluster is applied to the cluster boundary, not to every segment along the path. - -This applies to ALL species with cluster_rearing. The excess is from the upstream boolean check keeping rearing that the downstream path gradient would reject. - -Tested `direction = "downstream"`: -93% BT rearing. Too restrictive — loses bcfishpass Phase 2 (rearing downstream of spawning). `direction = "both"` is correct for coverage but the upstream boolean is too permissive. - -bcfishpass Phase 3 traces DOWNSTREAM from each rearing cluster to find spawning — linear, path gradient works. Our `frs_cluster_downstream` does the same. But `direction = "both"` combines it with upstream boolean, which overrides downstream rejections. - -Not a CSV fix. Needs architectural change: `_both` should require downstream path gradient pass, not OR with upstream boolean. Or: fresh implements bcfishpass-style three-phase rearing (on-spawning, downstream-of-spawning, upstream-of-spawning-traced-downstream). Filed as fresh#153. - -Accepted at +6% for now. Only affects CH rearing consistently. Other species within 5%. - -### fresh#153 regression: BT rearing -87.9% on ADMS -The upstream path gradient check in frs_cluster is fundamentally broken. `FWA_Upstream` returns ALL upstream segments (4,770 for one cluster) including tributaries. `row_number` ordered by wscode sorts tributaries before mainstem continuation. A >5% gradient on a distant tributary gets a lower row_number than the adjacent spawning segment on the mainstem, blocking the connection. - -Example: segment 15319 (rearing) has spawning at segment 15320 — adjacent, 123m, zero gradient. But `FWA_Upstream` finds 3,217 steep segments across all upstream tributaries. The wscode ordering places some tributary segments between the rearing and the mainstem spawning. - -bcfishpass only traces DOWNSTREAM for path gradient (linear, no branching). Upstream uses simple boolean `FWA_Upstream` without path constraints. The upstream check can't do path gradient because the network branches upstream. - -Fix: upstream check should use boolean `FWA_Upstream` (exists/not exists) without path gradient. Only the downstream check can apply segment-by-segment gradient constraints because the trace is linear. - -### CO spawning: +4.8% BABL — borderline +# Findings: lnk_config (#37) + +## Current state of config data + +Scattered across the repo: + +- `inst/extdata/parameters_habitat_rules_bcfishpass.yaml` — built rules YAML +- `inst/extdata/parameters_habitat_dimensions_bcfishpass.csv` — source of rules YAML +- `inst/extdata/parameters_fresh_bcfishpass.csv` — spawn_gradient_min etc. overrides +- `inst/extdata/wsg_species_presence.csv` — species per watershed group +- `inst/extdata/observation_exclusions.csv` — obs IDs to skip +- Override CSVs — referenced from `data-raw/compare_bcfishpass.R` but live in bcfishpass/data (external) +- Break order, cluster params, spawn_connected rules — hardcoded in `compare_bcfishpass.R` + +## Decision: directory-per-config with manifest + +Each variant = `inst/extdata/configs//` with `config.yaml` manifest pointing at all files. + +Benefits: +- Portable — user can drop a directory anywhere, pass absolute path to `lnk_config()` +- One place to look — no more hunting across `inst/extdata/` roots +- Per-variant README — each bundle documents its intent + +## Return shape (from issue #37) + +```r +list( + name = "bcfishpass", + dir = "", + rules_yaml = "", + dimensions_csv = "", + parameters_fresh = tibble(...), + wsg_species = tibble(...), + observation_excl = tibble(...), + overrides = list( + modelled_fixes = tibble(...), + pscis_barrier_status = tibble(...), + pscis_xref = tibble(...), + barriers_definite = tibble(...) + ), + break_order = c("observations", "gradient_minimal", "habitat_endpoints", "crossings"), + cluster_params = list(three_phase = TRUE, distance_cap = ...), + spawn_connected = list(SK = list(gradient_max = 0.05, ...)) +) +``` + +Keys: `name`, `dir`, `rules_yaml`, `dimensions_csv` stay as paths (rules YAML is consumed by `frs_habitat_classify()` as a path, no reason to parse it here). Other CSVs load eagerly into tibbles. + +## Manifest schema (first draft) + +```yaml +# inst/extdata/configs/bcfishpass/config.yaml +name: bcfishpass +description: | + Validation config — reproduces bcfishpass output exactly for regression. + Do not modify without running the full comparison suite. +files: + rules_yaml: rules.yaml + dimensions_csv: dimensions.csv + parameters_fresh: parameters_fresh.csv + wsg_species: wsg_species_presence.csv + observation_exclusions: observation_exclusions.csv +overrides: + modelled_fixes: overrides/user_modelled_crossing_fixes.csv + pscis_barrier_status: overrides/user_pscis_barrier_status.csv + pscis_xref: overrides/pscis_modelledcrossings_streams_xref.csv + barriers_definite: overrides/user_barriers_definite.csv +pipeline: + break_order: [observations, gradient_minimal, habitat_endpoints, crossings] + cluster: + three_phase: true + spawn_connected: + SK: + gradient_max: 0.05 + distance_max: ... +``` + +All file paths in the manifest are relative to the config dir. + +## Not in scope for #37 + +- Actually running the pipeline (that's `_targets.R`, link#38) +- Populating `default/` with real departures from bcfishpass (intermittent streams etc. — #19, #20, #21) +- Per-WSG overrides (AOI-agnostic; pipeline handles per-WSG) + +## Cross-refs + +- rtj/docs/distributed-fwapg.md — targets will use the `$schema_working` convention `working_`; `lnk_config` is AOI-agnostic, schema naming is the pipeline's job +- `fresh` package — consumers of `lnk_config` (`frs_habitat_classify`, etc.) are already wired for the file paths/tibbles this returns diff --git a/planning/active/progress.md b/planning/active/progress.md index ff2d8c8..5be4729 100644 --- a/planning/active/progress.md +++ b/planning/active/progress.md @@ -1,26 +1,12 @@ # Progress -## Session 2026-04-13 (continued) -- Tested per-model non-minimal on BABL, ELKR, ADMS — no effect on ST/WCT -- Tested label_block with crossings — -52% regression, confirmed crossings don't block in bcfishpass -- Read load_streams_access.sql: access uses ONLY natural barriers, NOT anthropogenic -- Found bcfishpass access_st bug: checks 'SK' instead of 'ST' (filed bcfishpass#9, link#33) -- Read load_habitat_linear_st.sql line by line -- Found stream order exception: tested, +3 points on ST rearing, not the main cause -- Found rearing waterbody filter OR vs AND — not verified as cause -- Found three-phase rearing pattern — not verified as cause -- Read all 8 load_habitat_linear_*.sql files for cross-species comparison -- **Key lesson: stop guessing from SQL differences, compare segments directly against tunnel** -- Commits: 88e5af4, 67b67b6, 7b5e888 - -## Session 2026-04-14 -- Segment-level ST comparison: loaded bcfishpass_ref.st_babl + diff tables for QGIS -- Found 382/383 bcfishpass-only segments are inaccessible in our system -- Traced to falls at BLK 360886207 not overridden for ST -- Root cause: observation_species = "ST" should be "CH;CM;CO;PK;SK;ST" -- Fix: one CSV cell. ST spawning -22% → +3.8%, rearing -25% → +2.4% -- Also fixed WCT: added observation_threshold=1, species=WCT (not yet tested) -- **Key lesson: segment-level comparison finds root causes in minutes, guessing from SQL wastes hours** -- SK spawning: traced to wrong lake outlet ordering in frs_connected_spawning line 1385 - ORDER BY downstream_route_measure picks wrong BLK. bcfishpass uses wscode_ltree ordering. - Proven: corrected query gives 24.41 km vs bcfishpass 24.38 km (+0.1%) +## Session 2026-04-22 +- Archived prior PWF (bcfishpass comparison — all 4 WSGs within 5%, shipped fresh 0.13.5–0.13.8) +- fresh#160 shipped: `frs_barriers_minimal()` extracts non-minimal removal into fresh 0.14.0 (merged) +- Filed link#37 (lnk_config) + link#38 (_targets.R pipeline); closed link#36 (targets supersedes CSV DAG) +- Starting link#37: config bundle loader +- Phase 1 done: relocated files under `inst/extdata/configs/bcfishpass/` (rules.yaml, dimensions.csv, parameters_fresh.csv, overrides/), wrote config.yaml manifest + README, updated refs in R/ scripts, data-raw/, CLAUDE.md +- Phase 2/3 done: `lnk_config()` loader with validation, S3 print method, 9 tests (identifier, missing manifest, missing keys, missing files, custom path, bcfishpass bundle, print, override missing). All 146 link tests passing, lint clean. Added `yaml` to Imports, moved `%||%` to utils.R. pkgdown reference updated, NEWS entry, bumped to 0.2.0. +- Phase 5 done: compare_bcfishpass.R now uses `lnk_config("bcfishpass")` for rules_yaml, parameters_fresh, and dimensions paths. Parse-check passes. Full BULK run deferred — change is path-source only, no structural edits. +- Code-check round 1 surfaced one real bug (resolver foot-gun: bare names could be shadowed by a local dir in CWD). Fixed in `.lnk_config_resolve_dir` (require `/` for path inputs), regression test added. 28 lnk_config tests, 149 link tests passing. +- Next: PR with SRED tag diff --git a/planning/active/task_plan.md b/planning/active/task_plan.md index 9993735..5d4dd35 100644 --- a/planning/active/task_plan.md +++ b/planning/active/task_plan.md @@ -1,41 +1,57 @@ -# Task Plan: ST/WCT classification gap (#31) +# Task Plan: lnk_config() config bundle loader (#37) ## Goal -Identify and close the ST -22% spawning / -25% rearing gap on BABL, WCT -4% on ELKR. - -## Status -- Per-model non-minimal: tested, no effect on ST/WCT -- label_block with crossings: tested, -52% regression (crossings don't block in bcfishpass) -- Stream order exception: tested, closed 3 points on ST rearing (-28% → -25%) -- Root cause NOT confirmed. Hypotheses tested and eliminated. Need segment-level comparison. - -## Phase 1: Segment-level ST comparison on BABL -- [x] Query tunnel: bcfishpass ST segments → bcfishpass_ref.st_babl (2,334 rows with geometry) -- [x] Query local: our ST classification → bcfishpass_ref.st_babl_ours (31,580 rows) -- [x] Diff: 223 bcfishpass-only spawning segments (87.9 km), 688 bcfishpass-only rearing (277.7 km) -- [x] For mismatches: 382/383 are inaccessible in our system. Falls at BLK 360886207 blocks them. -- [x] Root cause: observation_species for ST was "ST" only. bcfishpass counts all salmon+steelhead. -- [x] Fix: one CSV cell. ST spawning -22% → +3.8%, ST rearing -25% → +2.4%. - -## Phase 2: WCT + ELKR verification -- [ ] Run ELKR with WCT observation override fix -- [ ] Run BULK with ST fix - -## Phase 3: SK spawning segment-level comparison -- [ ] Same approach: dump bcfishpass SK spawning segments for BULK/BABL -- [ ] Diff against ours -- [ ] Identify whether it's access, classification, or cluster algorithm - -## Tested and eliminated -- Per-model non-minimal barrier removal (no effect) -- label_block with crossings (-52%, crossings don't block access in bcfishpass) -- Stream order exception (3 points, not the main cause) -- Thresholds (spawn_gradient_max, rear_gradient_max, channel_width ranges — all match exactly) -- Access gating (bcfishpass uses only natural barriers, same as us) - -## Filed -- NewGraphEnvironment/bcfishpass#9 — access_st checks 'SK' instead of 'ST' (copy-paste bug) -- NewGraphEnvironment/link#33 — reference to bcfishpass#9 - -## Versions -- fresh: 0.13.4, bcfishpass: v0.5.0, link: 0.1.0 + +Create a config abstraction so pipeline variants (bcfishpass validation, newgraph defaults, min-spawn, channel-type-first breaking) stop being copy-paste script forks. Each variant = a directory under `inst/extdata/configs//` bundled with rules YAML, dimensions CSV, parameters, wsg_species_presence, observation_exclusions, and override CSVs. `lnk_config(name_or_path)` loads the bundle into one list object. + +Unblocks `_targets.R` (link#38). + +## Phase 1: Directory layout + move existing files + +- [x] Design `config.yaml` manifest schema (which files go where, required vs optional) +- [x] Create `inst/extdata/configs/bcfishpass/` directory +- [x] Move existing bcfishpass files into it (rules, dimensions, parameters_fresh, wsg_species, observation_exclusions, overrides) +- [x] Write `inst/extdata/configs/bcfishpass/config.yaml` manifest +- [x] Write `inst/extdata/configs/bcfishpass/README.md` describing the variant +- [x] Verify no broken references — grep for old paths across the repo (R scripts, data-raw, CLAUDE.md) + +## Phase 2: Loader function + +- [x] Write `R/lnk_config.R` — the loader, returns `lnk_config` S3 list +- [x] Implement manifest validation (missing files, wrong keys, bad CSVs) +- [x] Define the return list slot names and types +- [x] Runnable example showing inspection of the loaded object +- [x] Add `yaml` to DESCRIPTION Imports +- [x] Move `%||%` helper into `R/utils.R` + +## Phase 3: Tests + +- [x] Unit tests: identifier validation, missing manifest, missing referenced file, missing required keys, missing required files entries +- [x] Integration tests: load `"bcfishpass"` via name, via path, return shape checks, print method +- [x] Full test suite green (146 / 146 passing) + +## Phase 4: Seed default variant (DEFERRED) + +Deferred — the `default` variant belongs in its own PR where real departures from bcfishpass are added (intermittent streams, saner spawn gradient min, expanded lake rearing). Tracked in #19, #20, #21. An empty clone adds no value. + +## Phase 5: Wire into compare script + +- [x] Update `data-raw/compare_bcfishpass.R` to call `lnk_config("bcfishpass")` instead of hardcoded paths +- [x] Parse-check passes +- [ ] Run BULK end-to-end to verify byte-identical output (deferred — sanity check only; no structural changes, just path source) + +## Phase 6: Docs + release + +- [x] Roxygen examples (runnable + `\dontrun{}` for pipeline wiring) +- [x] pkgdown reference entry (`_pkgdown.yml`) +- [x] NEWS.md entry +- [x] Bump to 0.2.0 +- [x] `/code-check` on staged diff — one real issue found (name-shadowing foot-gun), fixed + regression test added +- [ ] PR with SRED tag (NewGraphEnvironment/sred-2025-2026#24) — Fixes #37 + +## Versions at start + +- fresh: 0.14.0 (just merged — adds frs_barriers_minimal) +- link: main (0.1.0, target 0.2.0) +- bcfishpass: ea3c5d8 +- fwapg: Docker (FWA 20240830) diff --git a/planning/archive/2026-04-22-bcfishpass-comparison/README.md b/planning/archive/2026-04-22-bcfishpass-comparison/README.md new file mode 100644 index 0000000..5a78174 --- /dev/null +++ b/planning/archive/2026-04-22-bcfishpass-comparison/README.md @@ -0,0 +1,15 @@ +# Archive: bcfishpass comparison (closed 2026-04-22) + +## Outcome + +All 4 watershed groups (ADMS, BULK, BABL, ELKR) within 5% of bcfishpass across all species. Key fixes landed in fresh 0.13.5–0.13.8 (SK outlet ordering, spawn_connected, three-phase cluster) and in link CSVs (ST observation_species, WCT threshold, BT cluster_rearing). Final results + DAG documented in `research/bcfishpass_comparison.md`. + +## Closed via + +- Issues: link#16 (end-to-end ADMS), link#31 (ST/WCT gap), fresh#147, fresh#153, fresh#154, fresh#157 +- Merges: fresh PRs #151 (SK outlet), #155 (spawn_connected), #157 (bridge gradient), #159 (three-phase cluster) + +## What superseded it + +- New PWF cycle starts 2026-04-22 for `lnk_config` (link#37) and `_targets.R` pipeline (link#38) +- Generic non-minimal barrier removal extracted to fresh 0.14.0 (`frs_barriers_minimal`, #160) diff --git a/planning/archive/2026-04-22-bcfishpass-comparison/findings.md b/planning/archive/2026-04-22-bcfishpass-comparison/findings.md new file mode 100644 index 0000000..cfdb695 --- /dev/null +++ b/planning/archive/2026-04-22-bcfishpass-comparison/findings.md @@ -0,0 +1,183 @@ +# Findings + +## Confirmed: what matches bcfishpass + +### Base segments +`localcode_ltree IS NOT NULL`, `edge_type != 6010`, `wscode_ltree <@ '999' IS FALSE`. 10,458 segments on ADMS — exact match. + +### Gradient barrier detection +50,063 barriers across 8 classes, matching to 3/50,063. Island grouping, vertex extraction, 100m lookahead — identical algorithm. + +### Non-minimal barrier removal +`fwa_upstream` self-join deletes barriers with another barrier downstream. ADMS: 27,443 → 677. Without this: +149% segments. + +### Sequential breaking +Observations → gradient barriers → habitat endpoints → crossings. GENERATED columns recompute. 1m guard. Matches bcfishpass `break_streams()`. + +### Access model +bcfishpass uses ONLY natural barriers for access (`barriers_bt_dnstr`, `barriers_ch_cm_co_pk_sk_dnstr`, etc.). Anthropogenic barriers (crossings) are recorded but do NOT block access. Our `label_block = "blocked"` is correct. Tested `label_block = c("blocked", "barrier", "potential")` — caused -52% across all species. + +### Channel width +Synced from tunnel (75,736 field measurements). Tightened results ~0.5%. + +### Observation filtering +Species filtered by `wsg_species_presence.csv`. CT remaps (CCT, ACT, CT/RB). 179 vs 178 unique positions on ADMS. + +### Habitat endpoints +Both `downstream_route_measure` and `upstream_route_measure` as break positions. 143 vs 145 on ADMS. + +### SK rearing +Exact match on all WSGs (0.0%). + +## Confirmed: what doesn't match + +### ST spawning/rearing -22%/-25% (BABL) — ROOT CAUSE FOUND +Segment-level comparison: 223 bcfishpass-only ST spawning segments. 382 of 383 overlapping segments are **inaccessible in our system**. The falls at BLK 360886207 DRM 4127 blocks them. Overridden for BT/CH/CM/CO/PK/SK but NOT for ST. + +Cause: `observation_species` in `parameters_fresh_bcfishpass.csv` was `"ST"` (only ST obs counted). bcfishpass `model_access_st.sql` counts ALL salmon + steelhead: `'CH','CM','CO','PK','SK','ST'` with threshold >= 5 post-1990. Zero ST obs upstream but salmon obs exist → barrier stays for ST in our system but gets removed in bcfishpass. + +Fix: changed `observation_species` for ST from `"ST"` to `"CH;CM;CO;PK;SK;ST"`. + +**Result: ST spawning -22.0% → +3.8%, ST rearing -25.4% → +2.4%.** One CSV cell. + +### WCT observation override missing +bcfishpass `model_access_wct.sql` uses WCT-only observations with threshold = 1 (any WCT obs removes barrier). Our CSV had `observation_threshold = NA` (no override). Fixed to threshold = 1, species = WCT. + +**Result: WCT spawning -3.4% → +4.0%, WCT rearing -4.2% → +3.0%.** 685 barriers overridden on ELKR. + +### SK spawning -22.6% (BULK, after ST/WCT fix) +Segment-level comparison: 13 bcfishpass-only segments (7.26 km), 9 ours-only (2.16 km). All bcfishpass-only segments are accessible in our system — it's not access. They're on 3 BLKs near rearing lakes (edge_type 1050/1200 wetland/lake + 1000 stream), low gradient, good channel width. The downstream trace from rearing lakes in bcfishpass reaches these segments but our `frs_connected_spawning` doesn't. + +This is a boundary effect at the 3km distance cap. The bcfishpass-only segments on BLK 360846413 are 3.0-3.4 km from the rearing lake outlet (our outlet DRM 9718, segments start at DRM 6362). Different segment boundaries resolve the cumulative distance slightly differently — some segments fall just inside 3km in bcfishpass but just outside in our system. + +**Root cause found and proven:** `.frs_connected_spawning` line 1385 picks lake outlets with `ORDER BY s2.downstream_route_measure ASC`. bcfishpass uses `ORDER BY s.waterbody_key, s.wscode_ltree, s.localcode_ltree, s.downstream_route_measure`. DRM ordering picks an arbitrary segment on any BLK with the smallest measure. wscode ordering picks the actual network-topological outlet. + +Example: waterbody_key 329064462 spans 10 BLKs. Fresh picks BLK 360504780 DRM 0 (wrong tributary). bcfishpass picks BLK 360846413 DRM 9718 (actual outlet). Downstream trace from the wrong outlet misses 7+ km of spawning habitat. + +**Outlet ordering fix (PR #152):** Corrected and merged in fresh 0.13.5. The outlet ordering and partition are now correct. + +**Remaining -9.6% (BULK):** 9 segments in the downstream trace that don't meet spawning classification thresholds (3 are edge_type 1050 wetland, 3 have cw < 2.0, 1 has gradient > 0.025). The subtractive filter correctly excludes them. + +bcfishpass applies spawning thresholds INSIDE the downstream trace CTE (`load_habitat_linear_sk.sql` lines 100-114 — checks gradient, channel_width, access within the CASE statement). Fresh applies thresholds via `frs_habitat_classify` BEFORE connected_waterbody runs, and the subtractive filter only KEEPS segments already flagged as spawning. + +**Proven additive fix:** After subtractive step, ADD spawning for segments in the downstream trace that meet permissive connected rules (gradient <= 0.05, no cw min, no edge_type filter, accessible). Result: 63 → 71 segments, 22.04 → 24.22 km (bcfishpass 24.38 km). From -9.6% to -0.7%. + +The `spawn_connected` rules in the YAML define the permissive thresholds for the trace zone. Direction = downstream. The additive step applies these after the standard subtractive filter. Generalizable to any species + waterbody type. + +The ST/WCT observation_species fix improved SK from -39.9% to -22.6% by opening access at barriers that previously blocked salmon-accessible habitat. + +### BT rearing +5.4% to +7.1% (BULK, ELKR) +Slightly over. Stream order exception added more segments (+7.1% on ELKR). May be from segment boundary differences or classification predicates we're applying that bcfishpass doesn't. + +### WCT -3.4% spawning, -4.2% rearing (ELKR) +Stream order exception closed 1 point on rearing. Same unknown cause as ST but less severe. + +## Tested and eliminated as ST/WCT cause + +| Hypothesis | Test | Result | +|-----------|------|--------| +| Per-model non-minimal removal | Built bt/salmon/st/wct barrier tables, removed non-minimal within each | No change on ST/WCT | +| Crossings blocking access | `label_block = c("blocked", "barrier", "potential")` | -52% all species, wrong | +| Stream order rearing bypass | Post-classification UPDATE for stream_order=1, parent>=5 | +3 points ST rearing, not main cause | +| Gradient/channel_width thresholds | Compared params_obj vs tunnel parameters_habitat_thresholds | Exact match | +| Access gating mechanism | Compared access counts | Close (12,728 vs 11,673 ST accessible on BABL) | + +## Bugs found + +### bcfishpass access_st checks SK instead of ST +`load_streams_access.sql` line 120: `'SK' = any(obsrvtn_species_codes_upstr)` should be `'ST'`. Copy-paste from SK block. Filed NewGraphEnvironment/bcfishpass#9, referenced in link#33. Does not affect access blocking, only the access code label (1 vs 2). + +## Methodology lessons + +### Segment-level comparison finds root causes in minutes +Guessing from SQL differences wasted hours — tested per-model non-minimal, label_block, stream order exception, three-phase rearing. None were the cause. Dumping bcfishpass segments to a local table, diffing by spatial overlap, and checking accessibility on mismatches found the real cause (wrong observation_species) in one query chain. Do this first next time. + +### One CSV cell can account for -22% +The ST gap was entirely from `observation_species = "ST"` instead of `"CH;CM;CO;PK;SK;ST"` in `parameters_fresh_bcfishpass.csv`. Always verify per-species params against the actual bcfishpass SQL before guessing at architectural causes. + +### Read the per-model SQL, don't assume symmetry +Each bcfishpass model_access_*.sql has different observation species lists and thresholds. BT counts all salmon+steelhead (threshold 1). Salmon counts salmon only (threshold 5, post-1990). ST counts all salmon+steelhead (threshold 5, post-1990). WCT counts WCT only (threshold 1, any date). Don't assume they're all the same. + +### Network topology ordering matters for spatial queries +`ORDER BY downstream_route_measure` picks an arbitrary segment with the smallest measure on any BLK. `ORDER BY wscode_ltree, localcode_ltree, downstream_route_measure` picks the actual network-topological position. For lake outlets spanning multiple BLKs, these give completely different results. DRM is a measure within a BLK — it says nothing about where that BLK sits in the network. wscode is the network position. Any DISTINCT ON query that needs "the most downstream point" must use wscode ordering, not DRM. + +### Prove before filing +We guessed three times at the SK cause (per-model non-minimal, label_block, boundary effect) and filed/commented on fresh#147 prematurely. Each guess wasted time and muddied the issue. The segment comparison found the real cause (wrong outlet ordering) in minutes. The proof query (24.41 vs 24.38 km) took one SQL statement. Always prove with data before attributing a cause. + +## Unverified hypotheses + +### Rearing waterbody filter OR vs AND +bcfishpass rearing: `wb.waterbody_type = 'R' OR (wb.waterbody_type IS NULL OR edge_type IN (...))` — permissive, includes NULL waterbody. +bcfishpass spawning: `wb.waterbody_type = 'R' OR (wb.waterbody_type IS NULL AND edge_type IN (...))` — restrictive. +Not verified whether our rules produce the same SQL. Could add rearing segments on non-standard edge types. + +### Three-phase rearing +bcfishpass rearing runs 3 phases for BT/CH/CO/ST/WCT: +1. On spawning streams (spawning AND rearing thresholds, no connectivity) +2. Downstream of spawning (cluster + fwa_upstream trace) +3. Upstream of spawning (cluster + fwa_downstream trace, 10km, 5% gradient bridge) + +Our frs_cluster does a single pass removing disconnected rearing. Not verified whether the three-phase approach classifies more rearing. + +### Stream order exception across species +Applied to BT, CH, CO, ST, WCT in bcfishpass. Tested as post-classification UPDATE. Adds segments but not enough to explain gaps. CM, PK, SK do not have the exception. CM/PK have spawning only (no rearing model). + +## Results summary (latest: ST/WCT obs fix + stream order exception) + +BULK and ELKR updated with ST/WCT observation_species fix. SK spawning proven +0.1% with outlet ordering fix (not yet in fresh). + +| Species | Metric | ADMS | BULK | BABL | ELKR | +|---------|--------|------|------|------|------| +| BT | spawn | +1.8% | +3.1% | +4.1% | +3.4% | +| BT | rear | -0.3% | +1.3% | +0.4% | +1.7% | +| CH | spawn | +0.5% | +1.9% | +3.8% | — | +| CH | rear | +2.1% | +6.0% | +6.1% | — | +| CO | spawn | +1.6% | +3.1% | +4.8% | — | +| CO | rear | -1.0% | +0.9% | +0.0% | — | +| PK | spawn | — | +2.3% | — | — | +| SK | spawn | +2.6% | -22.6% (proven +0.1% with fix) | -13.6% | — | +| SK | rear | +0.0% | +0.0% | +0.0% | — | +| ST | spawn | — | +1.9% | +3.8% | — | +| ST | rear | — | +3.6% | +2.4% | — | +| WCT | spawn | — | — | — | +4.0% | +| WCT | rear | — | — | — | +3.0% | + +### BT rearing +7% — ROOT CAUSE FOUND +Segment comparison: 646 ours-only segments (224 km) vs 36 bcfishpass-only (8.6 km). We over-classify. BT `cluster_rearing = FALSE` in our params — no rearing connectivity filter. bcfishpass applies three-phase rearing connectivity to BT (on-spawning, downstream, upstream). 224 km of disconnected rearing included in our results. + +Fix: set BT `cluster_rearing = TRUE`. **Result: BT rearing BULK +7.0% → +1.3%, ELKR +7.1% → +1.7%, BABL +2.8% → +0.4%, ADMS +2.6% → -0.3%.** All within 5%. + +### CH rearing +6.0% BULK, +6.1% BABL — frs_cluster upstream boolean too permissive +Segment comparison: 442 ours-only (103 km) vs 26 bcfishpass-only (8.5 km). Breakdown: +- 14.6 km from stream order exception (we add rearing, bcfishpass doesn't at this stage) +- 46.7 km on BLKs with CH spawning — should be "on-spawning rearing" in bcfishpass but isn't classified +- 41.6 km on BLKs WITHOUT CH spawning — our frs_cluster considers connected, bcfishpass three-phase doesn't reach + +Our frs_cluster connects rearing to spawning via network proximity (upstream/downstream within bridge_gradient + bridge_distance). bcfishpass three-phase is more restrictive: on-spawning only (Phase 1), then downstream clusters connected to spawning (Phase 2), then upstream clusters within 10km + 5% gradient bridge (Phase 3). + +**Root cause confirmed:** frs_cluster checks network proximity (is rearing upstream/downstream of spawning within bridge_distance?). bcfishpass Phase 3 traces downstream from each rearing cluster and STOPS at the first >5% gradient — rearing above a steep section doesn't count even if spawning exists below. + +97 of 180 same-BLK CH ours-only segments have a >5% gradient between them and the nearest downstream spawning. bcfishpass excludes them; frs_cluster includes them. + +This is a real difference in how connectivity is evaluated. frs_cluster checks "is there spawning within range?" bcfishpass checks "can fish get from this rearing to the spawning without crossing a gradient barrier?" The 5% gradient bridge in frs_cluster is applied to the cluster boundary, not to every segment along the path. + +This applies to ALL species with cluster_rearing. The excess is from the upstream boolean check keeping rearing that the downstream path gradient would reject. + +Tested `direction = "downstream"`: -93% BT rearing. Too restrictive — loses bcfishpass Phase 2 (rearing downstream of spawning). `direction = "both"` is correct for coverage but the upstream boolean is too permissive. + +bcfishpass Phase 3 traces DOWNSTREAM from each rearing cluster to find spawning — linear, path gradient works. Our `frs_cluster_downstream` does the same. But `direction = "both"` combines it with upstream boolean, which overrides downstream rejections. + +Not a CSV fix. Needs architectural change: `_both` should require downstream path gradient pass, not OR with upstream boolean. Or: fresh implements bcfishpass-style three-phase rearing (on-spawning, downstream-of-spawning, upstream-of-spawning-traced-downstream). Filed as fresh#153. + +Accepted at +6% for now. Only affects CH rearing consistently. Other species within 5%. + +### fresh#153 regression: BT rearing -87.9% on ADMS +The upstream path gradient check in frs_cluster is fundamentally broken. `FWA_Upstream` returns ALL upstream segments (4,770 for one cluster) including tributaries. `row_number` ordered by wscode sorts tributaries before mainstem continuation. A >5% gradient on a distant tributary gets a lower row_number than the adjacent spawning segment on the mainstem, blocking the connection. + +Example: segment 15319 (rearing) has spawning at segment 15320 — adjacent, 123m, zero gradient. But `FWA_Upstream` finds 3,217 steep segments across all upstream tributaries. The wscode ordering places some tributary segments between the rearing and the mainstem spawning. + +bcfishpass only traces DOWNSTREAM for path gradient (linear, no branching). Upstream uses simple boolean `FWA_Upstream` without path constraints. The upstream check can't do path gradient because the network branches upstream. + +Fix: upstream check should use boolean `FWA_Upstream` (exists/not exists) without path gradient. Only the downstream check can apply segment-by-segment gradient constraints because the trace is linear. + +### CO spawning: +4.8% BABL — borderline diff --git a/planning/archive/2026-04-22-bcfishpass-comparison/progress.md b/planning/archive/2026-04-22-bcfishpass-comparison/progress.md new file mode 100644 index 0000000..ff2d8c8 --- /dev/null +++ b/planning/archive/2026-04-22-bcfishpass-comparison/progress.md @@ -0,0 +1,26 @@ +# Progress + +## Session 2026-04-13 (continued) +- Tested per-model non-minimal on BABL, ELKR, ADMS — no effect on ST/WCT +- Tested label_block with crossings — -52% regression, confirmed crossings don't block in bcfishpass +- Read load_streams_access.sql: access uses ONLY natural barriers, NOT anthropogenic +- Found bcfishpass access_st bug: checks 'SK' instead of 'ST' (filed bcfishpass#9, link#33) +- Read load_habitat_linear_st.sql line by line +- Found stream order exception: tested, +3 points on ST rearing, not the main cause +- Found rearing waterbody filter OR vs AND — not verified as cause +- Found three-phase rearing pattern — not verified as cause +- Read all 8 load_habitat_linear_*.sql files for cross-species comparison +- **Key lesson: stop guessing from SQL differences, compare segments directly against tunnel** +- Commits: 88e5af4, 67b67b6, 7b5e888 + +## Session 2026-04-14 +- Segment-level ST comparison: loaded bcfishpass_ref.st_babl + diff tables for QGIS +- Found 382/383 bcfishpass-only segments are inaccessible in our system +- Traced to falls at BLK 360886207 not overridden for ST +- Root cause: observation_species = "ST" should be "CH;CM;CO;PK;SK;ST" +- Fix: one CSV cell. ST spawning -22% → +3.8%, rearing -25% → +2.4% +- Also fixed WCT: added observation_threshold=1, species=WCT (not yet tested) +- **Key lesson: segment-level comparison finds root causes in minutes, guessing from SQL wastes hours** +- SK spawning: traced to wrong lake outlet ordering in frs_connected_spawning line 1385 + ORDER BY downstream_route_measure picks wrong BLK. bcfishpass uses wscode_ltree ordering. + Proven: corrected query gives 24.41 km vs bcfishpass 24.38 km (+0.1%) diff --git a/planning/archive/2026-04-22-bcfishpass-comparison/task_plan.md b/planning/archive/2026-04-22-bcfishpass-comparison/task_plan.md new file mode 100644 index 0000000..9993735 --- /dev/null +++ b/planning/archive/2026-04-22-bcfishpass-comparison/task_plan.md @@ -0,0 +1,41 @@ +# Task Plan: ST/WCT classification gap (#31) + +## Goal +Identify and close the ST -22% spawning / -25% rearing gap on BABL, WCT -4% on ELKR. + +## Status +- Per-model non-minimal: tested, no effect on ST/WCT +- label_block with crossings: tested, -52% regression (crossings don't block in bcfishpass) +- Stream order exception: tested, closed 3 points on ST rearing (-28% → -25%) +- Root cause NOT confirmed. Hypotheses tested and eliminated. Need segment-level comparison. + +## Phase 1: Segment-level ST comparison on BABL +- [x] Query tunnel: bcfishpass ST segments → bcfishpass_ref.st_babl (2,334 rows with geometry) +- [x] Query local: our ST classification → bcfishpass_ref.st_babl_ours (31,580 rows) +- [x] Diff: 223 bcfishpass-only spawning segments (87.9 km), 688 bcfishpass-only rearing (277.7 km) +- [x] For mismatches: 382/383 are inaccessible in our system. Falls at BLK 360886207 blocks them. +- [x] Root cause: observation_species for ST was "ST" only. bcfishpass counts all salmon+steelhead. +- [x] Fix: one CSV cell. ST spawning -22% → +3.8%, ST rearing -25% → +2.4%. + +## Phase 2: WCT + ELKR verification +- [ ] Run ELKR with WCT observation override fix +- [ ] Run BULK with ST fix + +## Phase 3: SK spawning segment-level comparison +- [ ] Same approach: dump bcfishpass SK spawning segments for BULK/BABL +- [ ] Diff against ours +- [ ] Identify whether it's access, classification, or cluster algorithm + +## Tested and eliminated +- Per-model non-minimal barrier removal (no effect) +- label_block with crossings (-52%, crossings don't block access in bcfishpass) +- Stream order exception (3 points, not the main cause) +- Thresholds (spawn_gradient_max, rear_gradient_max, channel_width ranges — all match exactly) +- Access gating (bcfishpass uses only natural barriers, same as us) + +## Filed +- NewGraphEnvironment/bcfishpass#9 — access_st checks 'SK' instead of 'ST' (copy-paste bug) +- NewGraphEnvironment/link#33 — reference to bcfishpass#9 + +## Versions +- fresh: 0.13.4, bcfishpass: v0.5.0, link: 0.1.0 diff --git a/research/bcfishpass_comparison.md b/research/bcfishpass_comparison.md index 21d227b..d06b741 100644 --- a/research/bcfishpass_comparison.md +++ b/research/bcfishpass_comparison.md @@ -45,131 +45,48 @@ All species within 5% on all 4 WSGs. Three-phase cluster, no stream order bypass ## DAG -``` -┌─────────────────────────────────────────────────────────────────┐ -│ Config CSVs │ -│ crossings.csv · falls.csv · override CSVs · habitat confirms │ -│ parameters_fresh · parameters_habitat_dimensions · rules YAML │ -│ wsg_species_presence · observation_exclusions │ -└───────┬──────────┬──────────────┬──────────────┬────────────────┘ - │ │ │ │ - ▼ ▼ │ │ - ┌───────────────────────┐ │ │ - │ lnk_load + lnk_override│ │ │ - │ Load crossings, apply │ │ │ - │ modelled fixes + PSCIS │ │ │ - │ overrides │ │ │ - └───────────┬────────────┘ │ │ - │ │ │ - │ ┌──────────────┘ │ - │ ▼ │ - │ ┌────────────────────────┐ │ - │ │ frs_break_find │ │ - │ │ Gradient barriers on │ │ - │ │ raw FWA (4 classes) │ │ - │ └──────────┬─────────────┘ │ - │ │ │ - │ ▼ │ - │ ┌────────────────────────┐ │ - │ │ lnk_barrier_overrides │◄─────┘ - │ │ Observations + habitat │ - │ │ confirms → skip list │ - │ └──────────┬─────────────┘ - │ │ - │ ▼ - │ ┌────────────────────────┐ - │ │ Non-minimal removal │ - │ │ fwa_upstream self-join │ - │ │ 27,443 → 677 (ADMS) │ - │ └──────────┬─────────────┘ - │ │ - ▼ ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ Load base segments │ -│ Raw FWA + filters + channel_width + stream_order_parent │ -│ frs_col_generate (GENERATED gradient/measures/length) │ -└──────────────────────────┬──────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ Sequential breaking (frs_break_apply × 4) │ -│ 1. Observations 2. Gradient barriers (minimal) │ -│ 3. Habitat endpoints 4. Crossings │ -│ GENERATED columns recompute · 1m guard · id_segment reassigned │ -└──────────────────────────┬──────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ Build breaks table for access gating │ -│ FULL gradient barriers + falls + crossings (WSG-filtered) │ -└──────────────────────────┬──────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ frs_habitat_classify │ -│ Rules YAML + thresholds · access gating + barrier overrides │ -│ Per-species: gradient · channel_width · edge_type · waterbody │ -└──────────────────────────┬──────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ frs_cluster (three-phase) │ -│ Phase 1: on-spawning rearing always valid │ -│ Phase 2: upstream boolean (rearing below spawning) │ -│ Phase 3: FWA_Downstream mainstem trace (rearing above spawning) │ -└──────────────────────────┬──────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ frs_connected_waterbody (SK) │ -│ Subtractive: remove spawning not near rearing lake │ -│ Additive: spawn_connected thresholds in downstream trace zone │ -└─────────────────────────────────────────────────────────────────┘ -``` - -## Pipeline - -### 1. Detect gradient barriers on raw FWA +```mermaid +flowchart TD + CSVs["Config CSVs
crossings · falls · overrides · habitat confirms
rules YAML · dimensions · wsg_species_presence"] -`frs_break_find` on `fwa_stream_networks_sp`. 4 classes (15/20/25/30%). Barrier counts match bcfishpass to within 3/50,063. + CSVs --> Load["lnk_load + lnk_override
Load crossings
Apply modelled fixes + PSCIS overrides"] + CSVs --> Find["frs_break_find
Gradient barriers on raw FWA
4 classes (15/20/25/30 percent)"] + CSVs --> BarOver["lnk_barrier_overrides
Observations + habitat confirms
skip list per species"] -### 2. Build barrier overrides + Find --> Minimal["Non-minimal removal
fwa_upstream self-join
27,443 → 677 barriers (ADMS)"] -`lnk_barrier_overrides` counts observations upstream of each barrier via `fwa_upstream()`. Per-species thresholds: -- BT: >= 1 obs (BT + all salmon/steelhead), any date -- CH/CM/CO/PK/SK: >= 5 obs (salmon only), post-1990 -- ST: >= 5 obs (all salmon + steelhead), post-1990 -- WCT: >= 1 obs (WCT only), any date + Load --> Base["Load base segments
Raw FWA + filters + channel_width
stream_order_parent + frs_col_generate"] + Minimal --> Base -### 3. Remove non-minimal barriers + Base --> Break["Sequential breaking (frs_break_apply × 4)
Observations → Gradient minimal → Habitat endpoints → Crossings"] -`fwa_upstream()` self-join deletes barriers with another barrier downstream. 27,443 → 677 on ADMS. + Break --> Breaks["Build breaks table
FULL gradient + falls + crossings
WSG-filtered for access gating"] -### 4. Load base segments + Breaks --> Classify["frs_habitat_classify
Rules YAML + thresholds
Access gating + barrier overrides
Per-species: gradient · channel_width · edge_type · waterbody"] + BarOver --> Classify -Raw FWA with filters: `localcode_ltree IS NOT NULL`, `edge_type != 6010`, `wscode_ltree <@ '999' IS FALSE`. Channel width joined from `fwa_stream_networks_channel_width`. Stream order parent from `fwa_stream_networks_order_parent`. + Classify --> Cluster["frs_cluster (three-phase)
Phase 1: on-spawning always valid
Phase 2: upstream boolean (below spawning)
Phase 3: FWA_Downstream mainstem (above spawning)"] -### 5. Sequential breaking + Cluster --> Conn["frs_connected_waterbody (SK)
Subtractive: remove spawning not near rearing lake
Additive: spawn_connected in downstream trace"] -Observations → gradient barriers (minimal) → habitat endpoints (both DRM and URM) → crossings. Each round: `frs_break_apply` in-place, GENERATED columns recompute, 1m guard, `id_segment` reassigned. - -### 6. Access gating + classification - -`frs_habitat_classify` with full gradient barriers for access gating (not minimal). Barrier overrides selectively open access per species. Rules YAML defines edge types, waterbody types, thresholds. + classDef fresh fill:#e1f5ff,stroke:#0066cc,color:#003366 + classDef link fill:#fff3e1,stroke:#cc6600,color:#663300 + classDef op fill:#f0f0f0,stroke:#666,color:#333 + class Find,Classify,Cluster,Conn fresh + class Load,BarOver link + class CSVs,Minimal,Base,Break,Breaks op +``` -### 7. Three-phase rearing connectivity +Blue = `fresh` functions. Orange = `lnk_` functions. Grey = composite operations (multiple function calls bundled into one step). -`frs_cluster` with three-phase approach (fresh 0.13.8): -- Phase 1: on-spawning segments always valid (both spawning AND rearing) -- Phase 2: upstream boolean (rearing below spawning) -- Phase 3: `FWA_Downstream` on broken streams table, mainstem only, gradient bridge + distance cap (rearing above spawning) +## Pipeline operations -### 8. Connected waterbody spawning (SK) +Composite steps in the DAG that aren't a single function call: -`frs_connected_waterbody` with `spawn_connected` rules: -- Subtractive: remove spawning not connected to rearing lake -- Additive: add spawning for segments in downstream trace meeting permissive thresholds (gradient_max 0.05, no cw minimum, all edge types) -- Outlet ordering: `wscode_ltree, localcode_ltree, downstream_route_measure` (network topology, not just DRM) +- **Non-minimal removal** — `fwa_upstream()` self-join that deletes gradient barriers which have another gradient barrier downstream. 27,443 → 677 on ADMS. Leaves only the furthest-downstream barrier per reach so the sequential breaking pass isn't redundant. +- **Load base segments** — raw FWA filtered to the AOI (`localcode_ltree IS NOT NULL`, `edge_type != 6010`, no coastlines), with `channel_width` joined from `fwa_stream_networks_channel_width` and `stream_order_parent` from `fwa_stream_networks_order_parent`. `frs_col_generate` adds GENERATED columns for gradient, measures, length. +- **Sequential breaking** — `frs_break_apply` called 4 times in order: observations → minimal gradient barriers → habitat endpoints (DRM + URM) → crossings. Each round reassigns `id_segment`, recomputes GENERATED columns; 1m guard prevents duplicate breaks. +- **Build breaks table** — reassembly of gradient barriers (FULL, not minimal) + falls + crossings, filtered to WSG. Used for access gating during classification. ## Key fixes during comparison diff --git a/tests/testthat/test-lnk_config.R b/tests/testthat/test-lnk_config.R new file mode 100644 index 0000000..0809c4d --- /dev/null +++ b/tests/testthat/test-lnk_config.R @@ -0,0 +1,156 @@ +# lnk_config loads and validates config bundles under +# inst/extdata/configs// (or any custom directory containing a +# config.yaml manifest). + +test_that("lnk_config rejects invalid input", { + expect_error(lnk_config(NULL), "single string") + expect_error(lnk_config(c("a", "b")), "single string") + expect_error(lnk_config(123), "single string") +}) + +test_that("lnk_config errors when bundle not found", { + expect_error( + lnk_config("definitely_not_a_real_config"), + "No config bundle found" + ) +}) + +test_that("lnk_config loads the bundled bcfishpass variant", { + cfg <- lnk_config("bcfishpass") + + expect_s3_class(cfg, "lnk_config") + expect_equal(cfg$name, "bcfishpass") + expect_true(dir.exists(cfg$dir)) + + # Required file paths + expect_true(file.exists(cfg$rules_yaml)) + expect_true(file.exists(cfg$dimensions_csv)) + + # Required CSV tibbles + expect_s3_class(cfg$parameters_fresh, "data.frame") + expect_gt(nrow(cfg$parameters_fresh), 0) + + # Overrides list + expect_type(cfg$overrides, "list") + expect_true(all(vapply(cfg$overrides, is.data.frame, logical(1)))) + + # Pipeline section from manifest + expect_type(cfg$pipeline, "list") + expect_true("break_order" %in% names(cfg$pipeline)) +}) + +test_that("lnk_config does not shadow bundled names with local directories", { + # Regression: bare names must resolve to bundled configs even if a + # directory of the same name exists in the current working directory. + # The old resolver checked `dir.exists(name)` first and silently used + # the local dir. + tmp_parent <- withr::local_tempdir() + dir.create(file.path(tmp_parent, "bcfishpass")) + # Write a decoy manifest so, if shadowed, the test would see "decoy" + yaml::write_yaml( + list(name = "decoy", + files = list(rules_yaml = "r", dimensions_csv = "d", + parameters_fresh = "p")), + file.path(tmp_parent, "bcfishpass", "config.yaml") + ) + + withr::with_dir(tmp_parent, { + cfg <- lnk_config("bcfishpass") + expect_equal(cfg$name, "bcfishpass") + expect_false(grepl(normalizePath(tmp_parent), cfg$dir, fixed = TRUE)) + }) +}) + +test_that("lnk_config errors on path that does not exist", { + expect_error( + lnk_config("./no/such/directory"), + "No config directory found at path" + ) +}) + +test_that("lnk_config accepts a custom path", { + bundled <- system.file("extdata", "configs", "bcfishpass", package = "link") + cfg <- lnk_config(bundled) + + expect_s3_class(cfg, "lnk_config") + expect_equal(cfg$name, "bcfishpass") +}) + +test_that("lnk_config prints a readable summary", { + cfg <- lnk_config("bcfishpass") + out <- capture.output(print(cfg)) + + expect_match(out[1], "^ bcfishpass") + expect_true(any(grepl("rules:", out))) + expect_true(any(grepl("overrides:", out))) +}) + +test_that("lnk_config errors on missing manifest", { + tmp <- withr::local_tempdir() + # No config.yaml written + expect_error(lnk_config(tmp), "config.yaml not found") +}) + +test_that("lnk_config errors on manifest missing required keys", { + tmp <- withr::local_tempdir() + yaml::write_yaml(list(description = "no name"), + file.path(tmp, "config.yaml")) + + expect_error(lnk_config(tmp), "missing required keys.*name") +}) + +test_that("lnk_config errors on manifest missing required files entries", { + tmp <- withr::local_tempdir() + yaml::write_yaml( + list(name = "x", files = list(rules_yaml = "r.yaml")), + file.path(tmp, "config.yaml") + ) + + expect_error(lnk_config(tmp), + "missing required entries.*dimensions_csv.*parameters_fresh") +}) + +test_that("lnk_config errors on manifest referencing missing file", { + tmp <- withr::local_tempdir() + yaml::write_yaml( + list( + name = "x", + files = list( + rules_yaml = "rules.yaml", + dimensions_csv = "dims.csv", + parameters_fresh = "params.csv" + ) + ), + file.path(tmp, "config.yaml") + ) + # None of the referenced files exist + + expect_error(lnk_config(tmp), + "references missing file") +}) + +test_that("lnk_config errors when an override file is missing", { + tmp <- withr::local_tempdir() + # Write valid required files + file.create(file.path(tmp, "rules.yaml")) + write.csv(data.frame(a = 1), file.path(tmp, "dims.csv"), row.names = FALSE) + write.csv(data.frame(a = 1), file.path(tmp, "params.csv"), + row.names = FALSE) + yaml::write_yaml( + list( + name = "x", + files = list( + rules_yaml = "rules.yaml", + dimensions_csv = "dims.csv", + parameters_fresh = "params.csv" + ), + overrides = list( + missing_one = "overrides/nope.csv" + ) + ), + file.path(tmp, "config.yaml") + ) + + expect_error(lnk_config(tmp), + "overrides.*references missing file") +})