Skip to content

Commit

Permalink
Merge pull request #380 from ldecicco-USGS/main
Browse files Browse the repository at this point in the history
New closest code
  • Loading branch information
ldecicco-USGS committed Feb 8, 2024
2 parents 9cc2d17 + 70fe123 commit 2a070f9
Show file tree
Hide file tree
Showing 7 changed files with 591 additions and 259 deletions.
5 changes: 4 additions & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ vignettes/figure
^\.Rproj\.user$
README.Rmd
README_files/
^Temp$
CONDUCT.md
DISCLAIMER.md
LICENSE.md
Expand Down Expand Up @@ -40,7 +41,9 @@ vignettes/pairResults2.rds
vignettes/WRTDSK.Rmd
vignettes/ChainBridge.TP.RData
vignettes/dataPreperation.Rmd
vignettes/Join_clpsest.Rmd
vignettes/Join_closest.Rmd
vignettes/Compare_QW_and_UV.Rmd
vignettes/helper_functions.R
vignettes/Method.bib
vignettes/Extend_method.bib
vignettes/Regional_studies.bib
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:

- name: Build site
run: |
install.packages('zoo') |
install.packages(c('zoo', 'data.table')) |
pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE, dest_dir = "public") |
file.copy(from = "./public/articles/logo.png",to = "./public/reference/logo.png")
shell: Rscript {0}
Expand Down
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ getready:
- mkdir -p $R_LIBS_USER
- mkdir -p $APT_CACHE
- echo "options(Ncpus=$(nproc --all), repos=c(CRAN='$CRAN'))" >> $R_PROFILE
- Rscript -e "install.packages(c('devtools', 'pkgdown', 'covr', 'connectapi', 'zoo', 'rsconnect'))"
- Rscript -e "install.packages(c('devtools', 'pkgdown', 'covr', 'connectapi', 'zoo', 'rsconnect', 'data.table'))"
- Rscript -e 'remotes::install_deps(dependencies=TRUE)'
cache:
paths:
Expand Down
4 changes: 4 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ navbar:
href: articles/AlternativeQMethod.html
- text: Annual_Hydrograph_Timing
href: articles/Annual_Hydrograph_Timing.html
- text: Join discrete and sensor data
href: articles/Join_closest.html
- text: Compare QW and UV
href: articles/Compare_QW_and_UV.html
- text: Custom Units
href: articles/units.html
- text: Bibliograpy
Expand Down
381 changes: 381 additions & 0 deletions vignettes/Compare_QW_and_UV.Rmd

Large diffs are not rendered by default.

367 changes: 111 additions & 256 deletions vignettes/Join_closest.Rmd

Large diffs are not rendered by default.

89 changes: 89 additions & 0 deletions vignettes/helper_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@

join_qw_uv <- function(qw_data, # data from readWQP
uv_flow_qw, # data from readNWISuv
hour_threshold = 24, # hours threshold for joining
join_by_qw = "ActivityStartDateTime",
join_by_uv = "dateTime",
qw_val = "ResultMeasureValue",
qw_rmk = "ResultDetectionConditionText",
qw_det_val = "DetectionQuantitationLimitMeasure.MeasureValue",
qw_val_uv, # water quality value column in uv data
qw_rmk_uv, # water quality remark column in uv data
flow_val = "X_00060_00000", # uv flow parameter
flow_rmk = "X_00060_00000_cd"){ # uv flow parameter cd

library(data.table)
req_cols <- c(join_by_qw, qw_val, qw_rmk, qw_det_val)
if(!all(req_cols %in% names(qw_data))){
stop(paste('qw_data missing columns:', req_cols[!req_cols %in% names(qw_data)]))
}

req_cols_uv <- c(join_by_uv)
if(!all(req_cols_uv %in% names(uv_flow_qw))){
stop(paste('uv_data missing columns:', req_cols_uv[!req_cols_uv %in% names(uv_flow_qw)]))
}

data.table::setDT(qw_data)[, eval(parse(text = paste("join_date :=", join_by_qw)))]

data.table::setDT(uv_flow_qw)[, eval(parse(text = paste("join_date :=", join_by_uv)))]

# rolling join
x <- uv_flow_qw[qw_data, on = .(join_date), roll = "nearest"]

setnames(x, c(qw_val, join_by_uv, join_by_qw, qw_rmk, qw_det_val),
c("val_qw","uv_date", "qw_date", "qw_rmk", "qw_det_val"))

x <- x[order(qw_date)]

x_tib <- as_tibble(x)

if(!is.na(flow_val) | flow_val != ""){
x_tib$flow_uv <- x_tib[[flow_val]]
}
if(!is.na(flow_rmk) | flow_rmk != ""){
x_tib$flow_rmk_uv <- x_tib[[flow_rmk]]
}

if(!is.na(qw_val_uv) | qw_val_uv != ""){
x_tib$qw_val_uv <- x_tib[[qw_val_uv]]
}
if(!is.na(qw_rmk_uv) | qw_rmk_uv != ""){
x_tib$qw_rmk_uv <- x_tib[[qw_rmk_uv]]
}

toMatch <- c("NON-DETECT", "NON DETECT", "NOT DETECTED",
"DETECTED NOT QUANTIFIED", "BELOW QUANTIFICATION LIMIT")

x_tib <- x_tib |>
mutate(delta_time = difftime(qw_date, uv_date, units = "hours"),
qw_val_uv = if_else(abs(as.numeric(delta_time)) >= hour_threshold,
NA, qw_val_uv),
qualifier = if_else(grepl(paste(toMatch,collapse="|"),
toupper(qw_rmk)),
"<", ""),
value = if_else(qualifier == "<", qw_det_val, val_qw),
date = as.Date(qw_date)) |>
select(any_of(c("uv_date", "qw_date", "delta_time", "date",
"qw_val_uv", "qw_rmk_uv",
"value", "qualifier",
"flow_uv", "flow_rmk_uv"))) |>
rename(dateTime = qw_date)


compressedData <- EGRET::compressData(x_tib[, c("date",
"qualifier",
"value")],
verbose = FALSE)
Sample <- EGRET::populateSampleColumns(compressedData)
Sample <- Sample |>
left_join(x_tib |>
select(-qualifier) |>
rename(qw_dateTime = dateTime,
uv_dateTime = uv_date,
Date = date,
ConcHigh = value),
by = c("Date", "ConcHigh"))

return(Sample)

}

0 comments on commit 2a070f9

Please sign in to comment.