Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0.9.2: hierarchical K options #177

Merged
merged 14 commits into from
Mar 24, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: streamMetabolizer
Type: Package
Title: Estimate Stream Metabolism from Dissolved Oxygen Data
Version: 0.9.1
Date: 2016-03-20
Version: 0.9.2
Date: 2016-03-23
Author: Alison Appling, Jordan Read, Luke Winslow, Bob Hall
Maintainer: Alison Appling <aappling@usgs.gov>
Description: Modeling package for stream metabolism.
Expand Down
15 changes: 14 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
# streamMetabolizer 0.9.2

## Changes

* Hierarchical constraints on K600 are now available! Options are 'normal',
'linear', and 'binned'; see the details section on pool_K600 in ?mm_name and the
description of parameters starting with K600_daily in ?specs.

* Interface change: specs lists now print more prettily and have class 'specs'
(though they're still fundamentally just lists)

* Vignette: see vignette('getstarted')

# streamMetabolizer 0.9.0

## Changes
Expand All @@ -6,7 +19,7 @@
specs to metab() and expect the appropriate model to be chosen and called based
on model_name in the specs list.

* New function: data_metab() produces a dataset for testing/demonstration, with
* New function: data_metab() produces a dataset for testing/demonstration, with
options for the resolution & flaws to introduce.

* Newly public function: mm_model_by_ply is now public. Its interface has also
Expand Down
2 changes: 1 addition & 1 deletion R/metab_Kmodel.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ metab_Kmodel <- function(
stop("specs$transforms['K600'] should be NA or 'log'")

# Check data for correct column names & units
dat_list <- mm_validate_data(if(missing(data)) NULL else data, data_daily, "metab_Kmodel")
dat_list <- mm_validate_data(if(missing(data)) NULL else data, if(missing(data_daily)) NULL else data_daily, "metab_Kmodel")
data <- dat_list[['data']]
data_daily <- dat_list[['data_daily']]

Expand Down
121 changes: 107 additions & 14 deletions R/metab_bayes.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ NULL
#' @family metab_model
metab_bayes <- function(
specs=specs(mm_name('bayes')),
data=mm_data(solar.time, DO.obs, DO.sat, depth, temp.water, light),
data_daily=mm_data(NULL),
data=mm_data(solar.time, DO.obs, DO.sat, depth, temp.water, light, discharge, optional='discharge'),
data_daily=mm_data(date, discharge.daily, optional='all'),
info=NULL
) {

Expand All @@ -43,10 +43,91 @@ metab_bayes <- function(
}
fitting_time <- system.time({
# Check data for correct column names & units
dat_list <- mm_validate_data(data, if(missing(data_daily)) NULL else data_daily, "metab_bayes")
data <- v(dat_list[['data']])
data_daily <- v(dat_list[['data_daily']])
dat_list <- mm_validate_data(if(missing(data)) NULL else data, if(missing(data_daily)) NULL else data_daily, "metab_bayes")
num_discharge_cols <- length(grep('discharge', c(names(dat_list$data), names(dat_list$data_daily))))
pool_K600 <- mm_parse_name(specs$model_name)$pool_K600
if(xor(num_discharge_cols > 0, pool_K600 %in% c('linear','binned')))
stop('discharge data should be included if & only if pool_K600 indicates hierarchy')
if(num_discharge_cols > 1)
stop('either discharge or discharge.daily may be specified, but not both')

# Handle discharge. If K600 is a hierarchical function of discharge and
# data$discharge was given, compute daily discharge and store in data.daily
# where it'll be accessible for the user to inspect it after model fitting
if((pool_K600 %in% c('linear','binned')) && ('discharge' %in% names(dat_list$data))) {
# calculate daily discharge
dailymean <- function(data_ply, data_daily_ply, day_start, day_end, ply_date, ply_validity, timestep_days, ...) {
data.frame(discharge.daily = if(ply_validity) mean(data_ply$discharge) else NA)
}
dischdaily <- mm_model_by_ply(
model_fun=dailymean, data=v(dat_list$data), day_start=specs$day_start, day_end=specs$day_end)

# add units if either of the input dfs were unitted
if(is.unitted(dat_list$data) || is.unitted(dat_list$data_daily)) {
dischdaily_units <- get_units(mm_data(date, discharge.daily))
if(is.unitted(dat_list$data)) {
if(dischdaily_units['discharge.daily'] != get_units(dat_list$data$discharge))
stop('mismatch between discharge units in data (',get_units(dat_list$data$discharge),
') and requirement for data_daily (', dischdaily_units['discharge.daily'] ,')')
}
dischdaily <- u(dischdaily, dischdaily_units)
}

# merge with any existing dat_list$data_daily
if(is.null(v(dat_list$data_daily))) {
dat_list$data_daily <- dischdaily
} else {
# need both or neither dfs to be unitted for the full_join. if
# data_daily was unitted then dischdaily will already also be unitted
# (see add units chunk above), so just check/fix the case where
# data_daily lacks units but data & therefore dischdaily has them
if(is.unitted(dischdaily) && !is.unitted(dat_list$data_daily)) {
dat_list$data_daily <- u(dat_list$data_daily, get_units(mm_data()[names(dat_list$data_daily)]))
}
dat_list$data_daily <- full_join(dat_list$data_daily, dischdaily, by='date')
}
}
# If we have discharge.daily, then we need logged discharge.daily. compute
# and store it now
if('discharge.daily' %in% names(dat_list$data_daily)) {
dat_list$data_daily$ln.discharge.daily <- log(v(dat_list$data_daily$discharge.daily))
}
# If we need discharge bins, compute & store those now, as well
if(pool_K600 %in% c('binned')) {
if(is.character(specs$K600_daily_beta_cuts)) {
if(!requireNamespace('ggplot2', quietly=TRUE)) {
stop("need ggplot2 for K600_pool='binned' and character value for K600_daily_beta_cuts. ",
"either install the ggplot2 package or switch to a numeric vector for K600_daily_beta_cuts")
}
cut_fun <- switch(
specs$K600_daily_beta_cuts,
interval = ggplot2::cut_interval,
number = ggplot2::cut_number)
# run once with high dig.lab to parse the breaks from levels(cuts) as numeric
cuts <- cut_fun(v(dat_list$data_daily$ln.discharge.daily), n=specs$K600_daily_beta_num, dig.lab=20)
specs$K600_daily_beta_breaks <- levels(cuts) %>%
strsplit('\\[|\\(|\\]|,') %>%
lapply(function(lev) as.numeric(lev[2:3])) %>%
unlist() %>%
unique()
# run again with default dig.lab for prettier bin labels
cuts <- cut_fun(v(dat_list$data_daily$ln.discharge.daily), n=specs$K600_daily_beta_num)
} else {
# we already know breaks precisely because we're supplying them
specs$K600_daily_beta_breaks <- specs$K600_daily_beta_cuts
cuts <- cut(dat_list$data_daily$ln.discharge.daily, breaks=specs$K600_daily_beta_cuts, ordered_result=TRUE)
}
dat_list$data_daily$discharge.bin.daily <- as.numeric(cuts)
specs$K600_daily_beta_bins <- levels(cuts)
# you should be able to retrieve the bin names with
# specs$K600_daily_beta_bins[dat_list[['data_daily']]$discharge.bin.daily] or, later,
# get_specs(fit)$K600_daily_beta_bins[get_data_daily(fit)$discharge.bin.daily]
}

# Use de-unitted version until we pack up the model to return
data <- v(dat_list$data)
data_daily <- v(dat_list$data_daily)

# Check and parse model file path. First try the streamMetabolizer models
# dir, then try a regular path, then complain / continue depending on
# whether we found a file. Add the complete path to specs. This is
Expand Down Expand Up @@ -89,9 +170,10 @@ metab_bayes <- function(

} else if(specs$split_dates == FALSE) {
# all days at a time, after first filtering out bad days
if((specs$day_end - specs$day_start) > 24) warning("multi-day models should probably have day_end - day_start <= 24 hours")
filtered <- mm_filter_valid_days(
data, data_daily, day_start=specs$day_start, day_end=specs$day_end, day_tests=specs$day_tests)
if(length(unique(filtered$data$date)) > 1 && (specs$day_end - specs$day_start) > 24)
warning("multi-day models should probably have day_end - day_start <= 24 hours")
bayes_all_list <- bayes_allply(
data_all=filtered$data, data_daily_all=filtered$data_daily, removed=filtered$removed,
specs=specs)
Expand All @@ -111,8 +193,8 @@ metab_bayes <- function(
mcmc=bayes_mcmc,
fitting_time=fitting_time,
specs=specs,
data=dat_list[['data']], # keep the units if given
data_daily=dat_list[['data_daily']])
data=dat_list$data, # keep the units if given
data_daily=dat_list$data_daily)

# Update data with DO predictions
mm@data <- predict_DO(mm)
Expand Down Expand Up @@ -323,7 +405,18 @@ prepdata_bayes <- function(
d = num_dates,

# Daily
n = num_daily_obs, # one value applicable to every day
n = num_daily_obs # one value applicable to every day
),

switch(
features$pool_K600,
linear = list(ln_discharge_daily = data_daily$ln.discharge.daily),
binned = list(
b = specs$K600_daily_beta_num,
discharge_bin_daily = data_daily$discharge.bin.daily)
),

list(
DO_obs_1 = array(time_by_date_matrix(data$DO.obs)[1,], dim=num_dates), # duplication of effort below should be small compared to MCMC time

# Every timestep
Expand Down Expand Up @@ -351,8 +444,8 @@ prepdata_bayes <- function(
features$pool_K600,
none=c('K600_daily_mu', 'K600_daily_sigma'),
normal=c('K600_daily_mu_mu', 'K600_daily_mu_sigma', 'K600_daily_sigma_shape', 'K600_daily_sigma_rate'),
linear=c('K600_daily_beta0_mu', 'K600_daily_beta0_sigma', 'K600_daily_beta1_mu', 'K600_daily_beta1_sigma', 'K600_daily_sigma_shape', 'K600_daily_sigma_rate'),
binned=stop('need to think about this one')),
linear=c('K600_daily_beta_mu', 'K600_daily_beta_sigma', 'K600_daily_sigma_shape', 'K600_daily_sigma_rate'),
binned=c('K600_daily_beta_mu', 'K600_daily_beta_sigma', 'K600_daily_sigma_shape', 'K600_daily_sigma_rate')),
if(features$err_obs_iid) c('err_obs_iid_sigma_shape', 'err_obs_iid_sigma_rate'),
if(features$err_proc_acor) c('err_proc_acor_phi_shape', 'err_proc_acor_phi_rate', 'err_proc_acor_sigma_shape', 'err_proc_acor_sigma_rate'),
if(features$err_proc_iid) c('err_proc_iid_sigma_shape', 'err_proc_iid_sigma_rate')
Expand Down Expand Up @@ -562,7 +655,7 @@ runstan_bayes <- function(data_list, model_path, params_out, split_dates, keep_m
stat <- val <- . <- rowname <- variable <- index <- varstat <- '.dplyr_var'

# determine how many unique nrows, & therefore data.frames, there should be
var_table <- table(gsub("\\[[[:digit:]]\\]", "", rownames(stan_mat)))
var_table <- table(gsub("\\[[[:digit:]]+\\]", "", rownames(stan_mat)))
all_dims <- unique(var_table) %>% setNames(., .)
names(all_dims)[all_dims == 1] <- "overall"
names(all_dims)[all_dims == data_list$d] <- "daily" # overrides 'overall' if d==1. that's OK
Expand All @@ -577,8 +670,8 @@ runstan_bayes <- function(data_list, model_path, params_out, split_dates, keep_m
as.data.frame(stan_mat[dim_rows,]) %>%
add_rownames() %>%
gather(stat, value=val, 2:ncol(.)) %>%
mutate(variable=gsub("\\[[[:digit:]]\\]", "", rowname),
index=if(odim == 1) 1 else sapply(strsplit(rowname, "\\[|\\]"), `[[`, 2),
mutate(variable=gsub("\\[[[:digit:]]+\\]", "", rowname),
index=if(odim == 1) 1 else as.numeric(sapply(strsplit(rowname, "\\[|\\]"), `[[`, 2)),
varstat=ordered(paste0(variable, '_', stat), varstat_order)) %>%
select(index, varstat, val) %>%
spread(varstat, val)
Expand Down
9 changes: 3 additions & 6 deletions R/metab_inputs.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ metab_inputs <- function(type=c('bayes','mle','night','Kmodel','sim'), input=c('
input <- match.arg(input)

if(input == 'specs') {
eg <- paste0("specs(mm_name('",type,"'))",
paste0("specs(mm_name('",type,"'))",
" # see ?mm_name, ?mm_specs for more options")
} else if(input %in% c('data','data_daily')) {
mfun <- paste0('metab_',type)
Expand All @@ -36,7 +36,6 @@ metab_inputs <- function(type=c('bayes','mle','night','Kmodel','sim'), input=c('
},
units = {
get_units(eg) %>%
replace(.=='', NA) %>%
unname()
},
need = {
Expand All @@ -50,12 +49,10 @@ metab_inputs <- function(type=c('bayes','mle','night','Kmodel','sim'), input=c('
}
}
)
}
}
# bind_rows %>%
# u(c(type=NA, get_units(mm_data(everything())))[names(.)])
} else if(input == 'info') {
eg <- "info may be NULL, a list, or any other data you want to attach to the output of metab()"
"info may be NULL, a list, or any other data you want to attach to the output of metab()"
}

eg
}
2 changes: 1 addition & 1 deletion R/metab_mle.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ metab_mle <- function(
}
fitting_time <- system.time({
# Check data for correct column names & units
dat_list <- mm_validate_data(data, if(missing(data_daily)) NULL else data_daily, "metab_mle")
dat_list <- mm_validate_data(if(missing(data)) NULL else data, if(missing(data_daily)) NULL else data_daily, "metab_mle")
data <- v(dat_list[['data']])
data_daily <- v(dat_list[['data_daily']])

Expand Down
2 changes: 1 addition & 1 deletion R/metab_night.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ metab_night <- function(
}
fitting_time <- system.time({
# Check data for correct column names & units
dat_list <- mm_validate_data(data, if(missing(data_daily)) NULL else data_daily, "metab_night")
dat_list <- mm_validate_data(if(missing(data)) NULL else data, if(missing(data_daily)) NULL else data_daily, "metab_night")
data <- v(dat_list[['data']])
data_daily <- v(dat_list[['data_daily']])

Expand Down
2 changes: 1 addition & 1 deletion R/metab_sim.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ metab_sim <- function(
}
fitting_time <- system.time({
# Check data for correct column names & units
dat_list <- mm_validate_data(data, if(missing(data_daily)) NULL else data_daily, "metab_sim")
dat_list <- mm_validate_data(if(missing(data)) NULL else data, if(missing(data_daily)) NULL else data_daily, "metab_sim")

# Move the simulation-relevant parameters to calc_DO_args for use in predict_DO
calc_DO_arg_names <- c('err.obs.sigma','err.obs.phi','err.proc.sigma','err.proc.phi','ODE_method')
Expand Down
1 change: 1 addition & 0 deletions R/mm_check_mcmc_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ mm_check_mcmc_file <- function(model_file) {
#' \dontrun{
#' # takes a long time, so run only when needed
#' checks <- streamMetabolizer:::mm_check_mcmc_files()
#' saveRDS(checks, file='temp/bayes_model_checks.Rds')
#' checks <- streamMetabolizer:::mm_check_mcmc_files("\\.jags")
#' checks <- streamMetabolizer:::mm_check_mcmc_files("*ko\\.stan")
#' checks <- streamMetabolizer:::mm_check_mcmc_files("b_np_.*_ko\\.stan")
Expand Down
6 changes: 3 additions & 3 deletions R/mm_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@
#' \item{ \code{DO.obs} dissolved oxygen concentration observations, \eqn{mg
#' O[2] L^{-1}}{mg O2 / L}}
#'
#' \item{ \code{GPP} daily estimates of GPP, \eqn{g m^-2 d^-1}}
#' \item{ \code{GPP} daily estimates of GPP, \eqn{g O[2] m^-2 d^-1}}
#'
#' \item{ \code{ER} daily estimates of ER, \eqn{g m^-2 d^-1}}
#' \item{ \code{ER} daily estimates of ER, \eqn{g O[2] m^-2 d^-1}}
#'
#' \item{ \code{K600} daily estimates of K600, \eqn{d^-1}}
#'
#' \item{ \code{discharge.daily} daily mean river discharge, \eqn{ft^3 s^-1}}
#' \item{ \code{discharge.daily} daily mean river discharge, \eqn{m^3 s^-1}}
#'
#' \item{ \code{velocity.daily} daily mean river flow velocity, \eqn{m s^-1}}
#'
Expand Down
7 changes: 5 additions & 2 deletions R/mm_filter_valid_days.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,12 @@ mm_filter_valid_days <- function(

# filter the daily data to match & return
if(!is.null(data_daily)) {
daily_unmatched <- as.Date(setdiff(
as.character(data_daily$date),
c(unique(format(data$solar.time, "%Y-%m-%d")), as.character(removed$date))))
daily_removed <- data.frame(
date=as.Date(setdiff(as.character(data_daily$date), c(unique(format(data$solar.time, "%Y-%m-%d")), as.character(removed$date)))),
errors="date in data_daily but not data")
date=daily_unmatched,
errors=rep("date in data_daily but not data", length(daily_unmatched)))
removed <- rbind(removed, daily_removed)
removed <- removed[order(removed$date),]
rownames(removed) <- NULL
Expand Down
Loading