Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 78 additions & 10 deletions R/cd_compare.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,48 @@
#' Compare arbitrary time windows
#'
#' Computes the mean value for two user-defined time windows and
#' the difference between them. Enables custom comparisons beyond
#' standard baseline anomalies.
#' Computes the mean value for two user-defined time windows, the
#' difference between them, and (by default) a p-value testing
#' whether the two windows differ.
#'
#' Defaults compare a recent decade (`2015:2025`) to the WMO-style
#' standard normal reference period (`1951:1980`) — the framing
#' used in the package vignettes. This is a *cumulative-impact*
#' comparison ("how much warmer is the recent decade than the
#' pre-warming reference?") rather than a rate-of-change
#' comparison; for the latter use [cd_trend()].
#'
#' The window-vs-window p-value (`test = "t"` Welch t-test or
#' `test = "wilcox"` Mann-Whitney U) tests whether the means of
#' the two windows differ — a different question than
#' [cd_trend()]'s Mann-Kendall test, which checks for a monotonic
#' trend across the full series. Two windows can differ
#' significantly without a monotonic trend (step changes, U-shapes)
#' and a non-significant trend doesn't always mean the windows are
#' indistinguishable. The test treats annual values as independent;
#' for series with strong autocorrelation the t-test p is mildly
#' anti-conservative — the Wilcoxon alternative is more robust to
#' non-Gaussian tails / outliers but shares the independence
#' assumption.
#'
#' @param x A tibble from [cd_extract()] with columns `variable`,
#' `period`, `year`, `value`.
#' @param window_a Integer vector of years for the first window.
#' @param window_b Integer vector of years for the second window.
#' @param window_a Integer vector of years for the recent / impact
#' window. Default `2015:2025`.
#' @param window_b Integer vector of years for the reference
#' window. Default `1951:1980` (WMO-style standard normal).
#' @param method Character. Comparison method: `"mean_diff"` for
#' `mean_a - mean_b`, or `"pct_change"` for percentage change
#' relative to window_b.
#' @param test Character or NULL. Window-vs-window significance
#' test: `"t"` for Welch's two-sample t-test (default) or
#' `"wilcox"` for Mann-Whitney U. Set to `NULL` to skip — output
#' then drops the `p_value` column. Rows where either window has
#' fewer than 8 non-NA values get `p_value = NA` and a single
#' batched warning.
#'
#' @return A tibble with columns `variable`, `period`, `mean_a`,
#' `mean_b`, `difference`, `method`.
#' `mean_b`, `difference`, `method`, and (when `test` is
#' non-NULL) `p_value`.
#'
#' @examples
#' catalog <- cd_catalog(
Expand All @@ -25,15 +54,24 @@
#' )
#' ts <- cd_extract(catalog, aoi)
#'
#' # How has the recent period shifted from the early period?
#' cd_compare(ts, window_a = 1956:1960, window_b = 1951:1955)
#' # The toy example catalog only spans 1951:1960, so the windows
#' # below override the defaults (2015:2025 vs 1951:1980) — those
#' # are the right choice on the live STAC catalog. test = NULL
#' # because the toy windows have < 8 years.
#' cd_compare(ts, window_a = 1956:1960, window_b = 1951:1955, test = NULL)
#'
#' # Same comparison as percentage change
#' cd_compare(ts, window_a = 1956:1960, window_b = 1951:1955, method = "pct_change")
#' cd_compare(ts, window_a = 1956:1960, window_b = 1951:1955,
#' method = "pct_change", test = NULL)
#'
#' @export
cd_compare <- function(x, window_a, window_b, method = "mean_diff") {
cd_compare <- function(x,
window_a = 2015:2025,
window_b = 1951:1980,
method = "mean_diff",
test = "t") {
method <- match.arg(method, c("mean_diff", "pct_change"))
if (!is.null(test)) test <- match.arg(test, c("t", "wilcox"))

mean_a <- x |>
dplyr::filter(.data$year %in% window_a) |>
Expand All @@ -56,5 +94,35 @@ cd_compare <- function(x, window_a, window_b, method = "mean_diff") {
)
out$method <- method

if (!is.null(test)) {
p <- vapply(seq_len(nrow(out)), function(i) {
v <- out$variable[i]
p_ <- out$period[i]
a <- x$value[x$variable == v & x$period == p_ & x$year %in% window_a]
b <- x$value[x$variable == v & x$period == p_ & x$year %in% window_b]
a <- a[!is.na(a)]
b <- b[!is.na(b)]
if (length(a) < 8 || length(b) < 8) return(NA_real_)
pv <- tryCatch(
switch(test,
t = stats::t.test(a, b, var.equal = FALSE)$p.value,
wilcox = stats::wilcox.test(a, b, exact = FALSE)$p.value
),
error = function(e) NA_real_
)
round(pv, 4)
}, numeric(1))

if (any(is.na(p))) {
bad <- which(is.na(p))
warning(sprintf(
"p_value set to NA for %d row(s) with < 8 years in either window: %s",
length(bad),
paste(sprintf("%s/%s", out$variable[bad], out$period[bad]), collapse = ", ")
), call. = FALSE)
}
out$p_value <- p
}

out
}
10 changes: 4 additions & 6 deletions data-raw/kootenay_lake_vignette_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,10 @@ regional_ts <- cd_extract(catalog, aoi)
regional_bl <- cd_baseline(regional_ts, baseline_years = 1951:1980)
regional_ano <- cd_anomaly(regional_ts, regional_bl)
regional_trn <- cd_trend(regional_ano, trend_start = c(1951, 1981))
regional_cmp <- cd_compare(regional_ts,
window_a = 2015:2025, window_b = 1951:1980,
method = "mean_diff")
regional_cmp <- cd_compare(regional_ts, method = "mean_diff") # defaults: 2015:2025 vs 1951:1980, Welch t
regional_cmp_pct <- cd_compare(
regional_ts[regional_ts$variable %in% pct_normal_vars, ],
window_a = 2015:2025, window_b = 1951:1980, method = "pct_change"
method = "pct_change"
)

# -- Per-ecoregion -----------------------------------------------------------
Expand All @@ -65,7 +63,7 @@ ecoregion_results <- lapply(seq_len(nrow(ecoregions)), function(i) {
trn <- cd_trend(ano, trend_start = c(1951, 1981))
cmp_pct <- cd_compare(
ts[ts$variable %in% pct_normal_vars, ],
window_a = 2015:2025, window_b = 1951:1980, method = "pct_change"
method = "pct_change"
)
list(ts = ts, ano = ano, trn = trn, cmp_pct = cmp_pct)
})
Expand All @@ -81,7 +79,7 @@ wsg_results <- lapply(seq_len(nrow(wsgs)), function(i) {
trn <- cd_trend(ano, trend_start = c(1951, 1981))
cmp_pct <- cd_compare(
ts[ts$variable %in% pct_normal_vars, ],
window_a = 2015:2025, window_b = 1951:1980, method = "pct_change"
method = "pct_change"
)
list(ts = ts, ano = ano, trn = trn, cmp_pct = cmp_pct)
})
Expand Down
8 changes: 3 additions & 5 deletions data-raw/peace_fwcp_vignette_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,14 @@ regional_ts <- cd_extract(catalog, aoi)
regional_bl <- cd_baseline(regional_ts, baseline_years = 1951:1980)
regional_ano <- cd_anomaly(regional_ts, regional_bl)
regional_trn <- cd_trend(regional_ano, trend_start = c(1951, 1981))
regional_cmp <- cd_compare(regional_ts,
window_a = 2015:2025, window_b = 1951:1980,
method = "mean_diff")
regional_cmp <- cd_compare(regional_ts, method = "mean_diff") # defaults: 2015:2025 vs 1951:1980, Welch t
# pct_change for vars with pct_normal anomaly type (#48 added swe / snowfall /
# snowmelt to this list; snow_cover and snowfall_fraction are pct_point_diff so
# mean_diff is the right comparison method for those — already in regional_cmp).
pct_normal_vars <- c("prcp", "soil_moisture", "swe", "snowfall", "snowmelt")
regional_cmp_pct <- cd_compare(
regional_ts[regional_ts$variable %in% pct_normal_vars, ],
window_a = 2015:2025, window_b = 1951:1980, method = "pct_change"
method = "pct_change"
)

# -- Per-ecoregion -----------------------------------------------------------
Expand All @@ -63,7 +61,7 @@ ecoregion_results <- lapply(seq_len(nrow(ecoregions)), function(i) {
trn <- cd_trend(ano, trend_start = c(1951, 1981))
cmp_pct <- cd_compare(
ts[ts$variable %in% pct_normal_vars, ],
window_a = 2015:2025, window_b = 1951:1980, method = "pct_change"
method = "pct_change"
)
list(ts = ts, ano = ano, trn = trn, cmp_pct = cmp_pct)
})
Expand Down
11 changes: 8 additions & 3 deletions man/cd_anomaly.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 51 additions & 10 deletions man/cd_compare.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 13 additions & 6 deletions man/cd_variables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading