diff --git a/.Rbuildignore b/.Rbuildignore index 3431e72..793b48b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,4 @@ >>>>>>> 79146b21e9e5f977d8ddab23bba1f84504844c48 ^doc$ ^Meta$ +^Rdev$ diff --git a/.gitignore b/.gitignore index 2118eda..471318b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,39 @@ -.Rproj.user +# History files .Rhistory +.Rapp.history + +# Session Data files .RData -.Ruserdata \ No newline at end of file + +# User-specific files +.Ruserdata + +# Example code in package build process +*-Ex.R + +# Output files from R CMD build +/*.tar.gz + +# Output files from R CMD check +/*.Rcheck/ + +# RStudio files +.Rproj.user/ + +# produced vignettes +vignettes/*.html +vignettes/*.pdf + +# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 +.httr-oauth + +# knitr and R markdown default cache directories +*_cache/ +/cache/ + +# Temporary files created by R markdown +*.utf8.md +*.knit.md + +# Downloads +/downloads/ \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 790539f..81f7d58 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ language: R cache: packages before_install: - sudo apt-get install libgdal-dev - - R -e 'install.packages("rgdal", repos=c("http://R-Forge.R-project.org", "http://cran.rstudio.com"))' r_packages: - covr diff --git a/DESCRIPTION b/DESCRIPTION index 120ef12..ba1e6ca 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,17 @@ Package: isoWater Type: Package -Title: Infer source of water using stable isotope data -Version: 0.2.0.9000 -Author: Gabe Bowen -Maintainer: Gabe Bowen -Description: Bayesian inference of the source and source isotope composition of water samples that may have experienced evaporation. Original algorithms presented in Bowen et al. (2018) . -Imports: R2jags, parallel, abind, R2WinBUGS, doParallel, foreach +Title: Discovery, Retrieval, and Analysis of Water Isotope Data +Version: 0.2.1 +Authors@R: person("Gabriel", "Bowen", email = "gabe.bowen@utah.edu", + role = c("aut", "cre")) +Description: wiDB_ functions provide interface to the public API of + the wiDB: build, check and submit queries, and receive and + unpack responses. Data analysis functions support Bayesian + inference of the source and source isotope composition of water + samples that may have experienced evaporation. Algorithms + adapted from Bowen et al. (2018) . +Imports: R2jags, parallel, abind, R2WinBUGS, doParallel, foreach, + httr, jsonlite Depends: R (>= 3.5) Suggests: knitr, diff --git a/NAMESPACE b/NAMESPACE index 67726b3..9c9d1db 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,10 +2,16 @@ export(mwlSource) export(mixSource) export(mwl) export(iso) +export(wiDB_sites) +export(wiDB_data) +export(wiDB_values) +importFrom(httr, GET, content) +importFrom(jsonlite, fromJSON) importFrom(doParallel, registerDoParallel, stopImplicitCluster) importFrom(foreach, foreach, "%dopar%") importFrom(R2jags, jags) importFrom(R2WinBUGS, monitor) importFrom(stats, sd, var) importFrom(graphics, abline, lines, par, points) -importFrom(abind, abind) \ No newline at end of file +importFrom(abind, abind) +importFrom(utils, read.csv, unzip) \ No newline at end of file diff --git a/NEWS.md b/NEWS.md index 1e83ed5..12eca36 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,9 @@ # isoWater news -## isoWater 0.2.0.9000 -* Updated documentation +## isoWater 0.2.1 +* Merged functions and documentation from wiDButil package +* Updated documentation and vignette + ## isoWater 0.2.0 * Reformatted code to create package diff --git a/R/constructors.R b/R/constructors.R index 8cc91b6..cf47b5b 100644 --- a/R/constructors.R +++ b/R/constructors.R @@ -25,6 +25,10 @@ mwl = function(HO, plot = TRUE){ stop("Non-numeric values in HO") } ns = nrow(HO) + HO = HO[!(is.na(HO[,1]) | is.na(HO[,2])),] + if(nrow(HO) != ns){ + warning("Missing values removed from HO") + } if(ns < 3){ stop("At least 3 sample values required") } diff --git a/R/watercomp.R b/R/watercomp.R index b7ad892..cf3c821 100644 --- a/R/watercomp.R +++ b/R/watercomp.R @@ -89,7 +89,7 @@ mwlSource = function(obs, MWL=c(8.01, 9.57, -8.096, 2564532.2, 5.76, 80672), #takes values of observed and hypothesized endmember source waters (each type 'iso'),hypothesized EL slope, #prior (as relative contribution of each source to mixture), and number of parameter draws mixSource = function(obs, sources, slope, prior=rep(1,nrow(sources)), - shp=2, ngens=1e5, ncores = 1){ + shp=1, ngens=1e5, ncores = 1){ if(class(obs)[2] != "iso"){ warning("Expecting iso object for obs, this argument may be diff --git a/R/wiDBfunctions.R b/R/wiDBfunctions.R new file mode 100644 index 0000000..cbd247e --- /dev/null +++ b/R/wiDBfunctions.R @@ -0,0 +1,223 @@ +##### +#Validate query input +##### +wiDB_validate = function(minLat, maxLat, minLong, maxLong, minElev, maxElev, + minDate, maxDate, countries, states, types, projects){ + + qStr = "" + + if(!is.null(minLat)){ + if(class(minLat) != "numeric"){stop("minLat must be numeric")} + qStr = paste0(qStr, "&minLat=", minLat) + } + if(!is.null(maxLat)){ + if(class(maxLat) != "numeric"){stop("maxLat must be numeric")} + qStr = paste0(qStr, "&maxLat=", maxLat) + } + if(!is.null(minLong)){ + if(class(minLong) != "numeric"){stop("minLong must be numeric")} + qStr = paste0(qStr, "&minLong=", minLong) + } + if(!is.null(maxLong)){ + if(class(maxLong) != "numeric"){stop("maxLong must be numeric")} + qStr = paste0(qStr, "&maxLong=", maxLong) + } + if(!is.null(minElev)){ + if(class(minElev) != "numeric"){stop("minElev must be numeric")} + qStr = paste0(qStr, "&minElev=", minElev) + } + if(!is.null(maxElev)){ + if(class(maxElev) != "numeric"){stop("maxElev must be numeric")} + qStr = paste0(qStr, "&maxElev=", maxElev) + } + if(!is.null(minDate)){ + if(class(minDate) != "character"){stop("minDate must be string")} + td = c(as.numeric(substr(minDate, 1, 4)), as.numeric(substr(minDate, 6, 7)), + as.numeric(substr(minDate, 9, 10))) + if(NA %in% td){stop("minDate format must be YYYY-MM-DD")} + qStr = paste0(qStr, "&minDate=", minDate) + } + if(!is.null(maxDate)){ + if(class(maxDate) != "character"){stop("maxDate must be string")} + td = c(as.numeric(substr(maxDate, 1, 4)), as.numeric(substr(maxDate, 6, 7)), + as.numeric(substr(maxDate, 9, 10))) + if(NA %in% td){stop("maxDate format must be YYYY-MM-DD")} + qStr = paste0(qStr, "&maxDate=", maxDate) + } + if(!is.null(countries)){ + if(class(countries) != "character"){stop("countries must be string")} + countries = gsub(" ", "", countries) + countries = gsub(" ", "", countries) + if(length(countries > 1)){ + countries = paste0(countries, collapse = ",") + } + qStr = paste0(qStr, "&countries=", countries) + } + if(!is.null(states)){ + if(class(states) != "character"){stop("states must be string")} + states = gsub(" ", "", states) + states = gsub(" ", "", states) + if(length(states > 1)){ + states = paste0(states, collapse = ",") + } + qStr = paste0(qStr, "&states=", states) + } + if(!is.null(types)){ + if(class(types) != "character"){stop("types must be string")} + types = gsub(" ", "", types) + types = gsub(" ", "", types) + if(length(types > 1)){ + types = paste0(types, collapse = ",") + } + qStr = paste0(qStr, "&types=", types) + } + if(!is.null(projects)){ + if(class(projects) != "character"){stop("projects must be string")} + projects = gsub(" ", "", projects) + projects = gsub(" ", "", projects) + if(length(types > 1)){ + types = paste0(types, collapse = ",") + } + qStr = paste0(qStr, "&projects=", projects) + } + + if(nchar(qStr) == 0){stop("No query arguments provided")} + + qStr = paste0("?", substr(qStr, 2, nchar(qStr))) + + return(qStr) +} + +##### +#Find sites +##### +wiDB_sites = function(minLat = NULL, maxLat = NULL, minLong = NULL, maxLong = NULL, + minElev = NULL, maxElev = NULL, minDate = NULL, maxDate = NULL, + countries = NULL, states = NULL, types = NULL, projects = NULL){ + + qStr = wiDB_validate(minLat, maxLat, minLong, maxLong, minElev, maxElev, + minDate, maxDate, countries, states, types, projects) + + baseStr = "https://wateriso.utah.edu/api/v1/sites.php" + q = paste0(baseStr, qStr) + d = GET(q) + + if(d$status_code != 200){stop(paste("Request returned error code", d$status_code))} + + resp = fromJSON(content(d, as = "text", encoding = "UTF-8")) + + if(length(resp$sites) == 0){ + warning("No sites returned") + return(NULL) + } + + return(resp$sites) +} + +##### +#Obtain data +##### +wiDB_data = function(minLat = NULL, maxLat = NULL, minLong = NULL, maxLong = NULL, + minElev = NULL, maxElev = NULL, minDate = NULL, maxDate = NULL, + countries = NULL, states = NULL, types = NULL, projects = NULL, + fields = NULL, tmpdir = tempdir(), clean = TRUE){ + + qStr = wiDB_validate(minLat, maxLat, minLong, maxLong, minElev, maxElev, + minDate, maxDate, countries, states, types, projects) + + if(!dir.exists(tmpdir)){ + warning("Directory doesn't exist, trying to create") + dir.create(tmpdir) + if(!dir.exists(tmpdir)){stop("Unable to create directory")} + } + + if(class(clean) != "logical"){stop("clean must be TRUE/FALSE")} + + flist = c("Site_ID", "Site_Name", "Latitude", "Longitude", "Elevation", + "Sample_ID", "Type", "Start_Date", "Start_Time_Zone", + "Collection_Date", "Collection_Time_Zone", "Phase", + "Depth_meters", "Sample_Comments", "d2H", "d18O", + "d2H_Analytical_SD", "d18O_Analytical_SD", "WI_Analysis_Source", + "Project_ID") + + if(!is.null(fields)){ + if(class(fields) != "character"){stop("fields must be a string")} + fields = gsub(" ", "", fields) + fields = gsub(" ", "", fields) + fels = strsplit(fields, ",") + fels = fels[[1]] + for(i in 1:length(fels)){ + if(!(fels[i] %in% flist)){stop(paste("Value", i, "in fields is not a valid field name"))} + } + qStr = paste0(qStr, "&return=", fields) + } + + baseStr = "https://wateriso.utah.edu/api/v1/download.php" + q = paste0(baseStr, qStr) + g = GET(q) + + if(g$status_code != 200){stop(paste("Request returned error code", g$status_code))} + + fn = g$headers$`content-disposition` + fn = strsplit(fn, "=")[[1]][2] + writeBin(g$content, paste0(tmpdir, "/", fn)) + + #unzip and output .csv + unzip(paste0(tmpdir, "/", fn), exdir = paste0(tmpdir, "/downloads")) + + #get and order file list + froot = strsplit(fn, "-")[[1]][1] + df = paste0(tmpdir, "/downloads/", froot, "-data.csv") + pf = paste0(tmpdir, "/downloads/", froot, "-project.csv") + + if(file.size(df) == 0){ + file.remove(c(paste0(tmpdir, "/", fn), df, pf)) + warning("No records returned") + return(NULL) + } + + #read in data + d = read.csv(df) + + #read in projects + p = read.csv(pf) + + file.remove(paste0(tmpdir, "/", fn)) + + if(clean){ + file.remove(c(df, pf)) + } + + return(list("data" = d, "projects" = p)) +} + +##### +#Get field values +##### +wiDB_values = function(fields){ + + if(!is.character(fields)){ + stop("fields values must be character strings") + } + for(i in 1:length(fields)){ + if(!(fields[i] %in% c("countries", "states", "types", "projects", + "Site_ID", "Sample_ID", "WI_Analysis_ID", + "Climate_ID"))){ + stop("One or more fields values not supported") + } + } + + baseStr = "https://wateriso.utah.edu/api/v1/values.php?fields=" + q = baseStr + for(i in fields){ + q = paste0(q, i, ",") + } + q = substr(q, 1, nchar(q) - 1) + d = GET(q) + + if(d$status_code != 200){stop(paste("Request returned error code", d$status_code))} + + resp = fromJSON(content(d, as = "text", encoding = "UTF-8")) + + return(resp) +} diff --git a/README.md b/README.md index abd733f..9be0c94 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,15 @@ # isoWater -An R package for inferring the source of waters based on their isotopic composition and allowing for isotope effects associated with partial evaporation. Install from GitHub using the devtools package: +An R package for obtaining and analyzing water isotope data. Includes interfaces to the Waterisotopes Database and tools for inferring the source of waters based on their isotopic composition accounting for isotope effects associated with partial evaporation. Install the latest release from GitHub using the devtools package: ``` library(devtools) -install_GitHub("SPATIAL-Lab/isoWater") +install_GitHub("SPATIAL-Lab/isoWater@*release") library(isoWater) ``` References: -Bowen, G. J., Putman, A., Brooks, J. R., Bowling, D. R., Oerter, E. J., & Good, S. P. (2018). Inferring the source of evaporated waters using stable H and O isotopes. Oecologia, 187(4), 1025-1039. \ No newline at end of file +Bowen, G. J., Putman, A. L., Brooks, J. R., Bowling, D. R., Oerter, E. J., & Good, S. P. (2018) Inferring the source of evaporated waters using stable H and O isotopes. Oecologia, 187(4), 1025-1039. + +Putman, A. L., & Bowen, G. J. (2019) A global database of the stable isotopic ratios of meteoric and terrestrial waters. Hydrology and Earth System Sciences 23(10), 4389-4396. \ No newline at end of file diff --git a/Rdev/checkCountries.R b/Rdev/checkCountries.R new file mode 100644 index 0000000..76305d3 --- /dev/null +++ b/Rdev/checkCountries.R @@ -0,0 +1,202 @@ +library(openxlsx) +library(sp) +library(rgdal) +options(stringsAsFactors = FALSE) +setwd("C:/Users/gjbowen/Dropbox/HypoMirror/WIDB/wiDB_data_staging/to_work_on") + +#File to read +ifile = "Isoforensics_dbMatch.xlsx" + +#Read in sites +sites = read.xlsx(ifile, sheet = "Sites") + +#Read in countries, use high res for initial matching +cty = readOGR("../aux_data/TM_WORLD_BORDERS-0.3.shp") + +#Make sites spatial; assumes 1) all sites have lat/lon, 2) lat/lon are WGS84 +sites.sp = SpatialPointsDataFrame(data.frame(sites$Longitude, sites$Latitude), + data = sites, proj4string = crs(cty)) + +#Spatial intersect gets map values for each point +mcs = over(sites.sp, cty) +#Use this to track whether the site has already been resolved, allowing +#stop/restart for developers +mcs$RES = rep(FALSE) + +#Count the number of missing or mis-matched values +ct = 0 +for(i in seq_along(sites.sp)){ + if(is.na(sites.sp$Country[i]) | is.na(mcs$ISO2[i])){ + ct = ct + 1 + }else if(sites.sp$Country[i] != mcs$ISO2[i]){ + ct = ct + 1 + } +} + +#Does the user want to review the sites? +g = readline(paste(ct, "sites to review. Continue (y/n)? ")) + +#If so, here we go... +if(g == "y" | g == "Y"){ + #Read in countries, use low res for map display + cty = readOGR("../aux_data/TM_WORLD_BORDERS_SIMPL-0.3.shp") + + for(i in seq_along(sites.sp)){ + #Grab the map code for the site + map.code = mcs$ISO2[i] + #If there's no map code for the coordinates set to "MISSING" + if(is.na(map.code)){map.code = "MISSING"} + + #If there is a country code for the site or + #If the country code doesn't match the map + if(!mcs$RES[i] & (is.na(sites.sp$Country[i]) | + sites.sp$Country[i] != map.code)){ + cat("Point or country may be incorrect\n") + print(sites.sp@data[i, c(2,3,4,7,8,9)]) + + #If the site's country code is not ISO2 complient + if(!(sites.sp$Country[i] %in% cty$ISO2)){ + #Plot point and matching country + plot(cty, xlim=c(max(sites.sp$Longitude[i] - 30, -180), + min(sites.sp$Longitude[i] + 30, 180)), + ylim=c(max(sites.sp$Latitude[i] - 30, -90), + min(sites.sp$Latitude[i] + 30, 90))) + points(sites.sp[i,], pch=21, bg="red") + + #If there is no map code for location + if(map.code == "MISSING"){ + cat("Unknown coutry code. No new code available.\n") + f = readline("Delete (d), type new code (t), or return to keep. ") + if(f == "d"){ + sites.sp$Country[i] = NA + annotate(sites.sp$Site_Comments[i], "unknown country code deleted") + }else if(f == "t"){ + c = readline("Type 2-letter ISO code: ") + sites.sp$Country[i] = c + annotate(sites.sp$Site_Comments[i], "country code manually edited") + } + + #If there is a map code for the location + }else{ + cat("Unknown country code. Replace with", map.code, "?\n") + f = readline("Replace (r), type new code (t), or return to keep. ") + if(f == "r"){ + sites.sp$Country[i] = map.code + sites.sp$Site_Comments[i] = annotate(sites.sp$Site_Comments[i], + "country code updated based on lat/lon coordinates") + }else if(f == "t"){ + c = readline("Type 2-letter ISO code: ") + sites.sp$Country[i] = c + annotate(sites.sp$Site_Comments[i], "country code manually edited") + } + } + + #If the code is complient but doesn't match + }else{ + #Report map code + cat("Map code is", map.code, "\n") + #Report country name equivalent to site code + cind = match(sites.sp$Country[i], cty$ISO2) + if(!is.na(cind)){ + cat("Country name for site code is", cty$NAME[cind], "\n") + } + #Get coordinates of country matching code for plotting + cty.bb = bbox(cty[cty$ISO2 == sites.sp$Country[i],]) + #Plot point and matching country + plot(cty, xlim=c(max(min(cty.bb[1,1], sites.sp$Longitude[i], + sites.sp$Longitude[i] - 20), -180), + min(max(cty.bb[1,2], sites.sp$Longitude[i], + sites.sp$Longitude[i] + 20), 180)), + ylim=c(max(min(cty.bb[2,1], sites.sp$Latitude[i], + sites.sp$Latitude[i] - 20), -90), + min(max(cty.bb[2,2], sites.sp$Latitude[i], + sites.sp$Latitude[i] + 20), 90))) + plot(cty[cty$ISO2 == sites.sp$Country[i],], col="grey", add=TRUE) + points(sites.sp[i,], pch=21, bg="red") + #Prompt for changes + cat("Fix common problems: + invert latitude (a), + invert longitude (o), + invert both (b), + swap lat/lon (s), + type lat/lon (t), + type country code (c), + use country code from map (u)?\n") + f = readline("Selection (or return for no action): ") + #If latitude is inverted fix and annotate + if(f == "a"){ + sites.sp$Latitude[i] = -sites.sp$Latitude[i] + sites.sp$Site_Comments[i] = annotate(sites.sp$Site_Comments[i], + "latitude sign inverted") + #If longitude is inverted fix and annotate + }else if(f == "o"){ + sites.sp$Longitude[i] = -sites$Longitude[i] + sites.sp$Site_Comments[i] = annotate(sites.sp$Site_Comments[i], + "longitude sign inverted") + }else if(f == "b"){ + sites.sp$Latitude[i] = -sites.sp$Latitude[i] + sites.sp$Longitude[i] = -sites$Longitude[i] + sites.sp$Site_Comments[i] = annotate(sites.sp$Site_Comments[i], + "latitude and longitude signs inverted") + #If both are inverted fix and annotate + }else if(f == "s"){ + sites.sp$Latitude[i] = sites.sp$Longitude[i] + sites.sp$Longitude[i] = sites$Latitude[i] + sites.sp$Site_Comments[i] = annotate(sites.sp$Site_Comments[i], + "latitude and longitude switched") + #If user wants to manually enter values + }else if(f == "t"){ + lat = readline("Type latitude (S negative) or return to keep value: ") + lon = readline("Type longidude (W negative) or return to keep value: ") + if(lat != ""){ + sites.sp$Latitude[i] = as.numeric(lat) + } + if(lon != ""){ + sites.sp$Longitude[i] = as.numeric(lon) + } + sites.sp$Site_Comments[i] = annotate(sites.sp$Site_Comments[i], + "latitude and/or longitude manually edited") + #If country code is wrong fix it manually + }else if(f == "c"){ + c = readline("Type new country code: ") + sites.sp$Country[i] = c + sites.sp$Site_Comments[i] = annotate(sites.sp$Site_Comments[i], + "country code manually edited") + #If country code is wrong fix it by using map value + }else if(f == "u"){ + sites.sp$Country[i] = map.code + sites.sp$Site_Comments[i] = annotate(sites.sp$Site_Comments[i], + "country code updated based on lat/lon coordinates") + } + } + } + mcs$RES[i] = TRUE + } +} + +#Output file name +ofile = sub(".xlsx", "_checkCountries.xlsx", ifile) + +#Get the input file +s = getSheetNames(ifile) +shts = list() +for(i in seq(s)){ + shts[[i]] = read.xlsx(ifile, i) +} +names(shts) = s + +#Replace the old site data with updated records +shts$Sites = sites.sp@data + +#Write output +write.xlsx(shts, ofile) + +#combines old and new comments +annotate = function(old, new){ + if(is.na(old)){ + return(new) + }else{ + return(paste(old, new, sep = "; ")) + } +} + diff --git a/Rdev/checkElevations.R b/Rdev/checkElevations.R new file mode 100644 index 0000000..dd62d08 --- /dev/null +++ b/Rdev/checkElevations.R @@ -0,0 +1,143 @@ +library(openxlsx) +library(httr) +library(jsonlite) +options(stringsAsFactors = FALSE) +setwd("C:/Users/gjbowen/Dropbox/HypoMirror/WIDB/wiDB_data_staging/to_work_on") + +#Free elevation API: https://elevation-api.io/; limit 2000 req/min from +#5km DEM w/ key +api_root = "https://elevation-api.io/api/elevation?points=" +api_key = "&key=kVzeefC0JPIIcfaVaC2r-qofepiJe3" + +#File to read +ifile = "Isoforensics_dbMatch_checkCountries.xlsx" + +#Read in sites +sites = read.xlsx(ifile, sheet = "Sites") + +#Space for values +elevs = data.frame(character(), numeric()) + +#Chunks of 10 sites; 10 points allowed per GET +for(i in 1:(ceiling(nrow(sites)/10))){ + #Subset sites for this chunk + sites.sub = sites[((i-1)*10+1):min((i*10), nrow(sites)),] + + #Space for point coordinates string + pts = character() + + #Build character string of coordinates + for(j in seq(sites.sub$Site_ID)){ + pts = paste0(pts, "(", sites.sub$Latitude[j], ",", sites.sub$Longitude[j], "),") + } + #Remove final comma + pts = substr(pts, 1, nchar(pts)-1) + + #Full GET request + api_call = paste0(api_root, pts, api_key) + + #Make the request + e = GET(api_call) + + #Check status + if(e$status_code != 200){stop(paste("Error code", e$status_code, + "returned by server"))} + #Extract data + ep = parse_json(e) + + + #Add Site_ID and elevation to data frame of results + for(j in seq(sites.sub$Site_ID)){ + elevs = rbind(elevs, c(sites.sub$Site_ID[j], ep$elevations[[j]]$elevation)) + } +} + +names(elevs) = c("Site_ID", "Elev") +elevs$Elev = as.numeric(elevs$Elev) + +elevs.sub = elevs[elevs$Elev / sites$Elevation_mabsl < 0.35 & + elevs$Elev / sites$Elevation_mabsl > 0.25,] +sites.sub = sites[elevs$Elev / sites$Elevation_mabsl < 0.35 & + elevs$Elev / sites$Elevation_mabsl > 0.25,] + +elevs.sub = elevs.sub[!is.na(elevs.sub$Site_ID),] +sites.sub = sites.sub[!is.na(sites.sub$Site_ID),] + +#First check whether it looks like all elevations are in feet +#If so update all non-NA elevations and annotate +if(nrow(sites.sub) > 0.75 * nrow(sites)){ + yn = readline("It looks like elevation is in feet, not meters. Convert to + meters (y/n)? ") + if(yn == "y" | yn == "Y"){ + sites$Elevation_mabsl = sites$Elevation_mabsl * 0.304 + for(i in seq(sites$Site_ID)){ + if(!is.na(sites$Elevation_mabsl)){ + if(!is.na(sites$Site_Comments[i])){ + sites$Site_Comments[i] = paste0(sites$Site_Comments[i], "; elevation + converted from feet to meters") + }else{ + sites$Site_Comments[i] = "elevation converted from feet to meters" + } + } + } + } +#review and update requested elevations +#need to annotate which ones were changed +}else{ + for(i in seq(sites.sub$Site_ID)){ + print(sites.sub[i,c(2,3,4,5,9,10)]) + yn = readline("Elevation may be in feet, convert to meters (y/n)? ") + if(yn == "y"){ + sites[sites$Site_ID == sites.sub$Site_ID[i],]$Elevation_mabsl = + sites.sub$Elevation_mabsl[i] * 0.304 + sites[sites$Site_ID == sites.sub$Site_ID[i],]$Site_Comments = + annotate(sites[sites$Site_ID == sites.sub$Site_ID[i],]$Site_Comments, + "elevation converted from feet to meters") + } + } +} + +elevs.sub = elevs[abs(elevs$Elev - sites$Elevation_mabsl) > 500,] +sites.sub = sites[abs(elevs$Elev - sites$Elevation_mabsl) > 500,] + +elevs.sub = elevs.sub[!is.na(elevs.sub$Site_ID),] +sites.sub = sites.sub[!is.na(sites.sub$Site_ID),] + +for(i in seq(sites.sub$Site_ID)){ + print(sites.sub[i,c(1,2,3,4,5,9,10)]) + print(elevs.sub[i,]) + yn = readline("Elevation may be wrong, replace from DEM (y/n)? ") + if(yn == "y" | yn == "Y"){ + sites[sites$Site_ID == sites.sub$Site_ID[i],]$Elevation_mabsl = + sites.sub$Elevation_mabsl[i] * 0.304 + sites[sites$Site_ID == sites.sub$Site_ID[i],]$Site_Comments = + annotate(sites[sites$Site_ID == sites.sub$Site_ID[i],]$Site_Comments, + "elevation replaced from 5km DEM") + } +} + +#Output file name +ofile = sub(".xlsx", "_checkElevations.xlsx", ifile) + +#Get the input file +s = getSheetNames(ifile) +shts = list() +for(i in seq(s)){ + shts[[i]] = read.xlsx(ifile, i) +} +names(shts) = s + +#Replace the old site data with updated records +shts$Sites = sites + +#Write output +write.xlsx(shts, ofile) + +#combines old and new comments +annotate = function(old, new){ + if(is.na(old)){ + return(new) + }else{ + return(paste(old, new, sep = "; ")) + } +} \ No newline at end of file diff --git a/Rdev/cleanComments.R b/Rdev/cleanComments.R new file mode 100644 index 0000000..a0ef7d2 --- /dev/null +++ b/Rdev/cleanComments.R @@ -0,0 +1,44 @@ +library(openxlsx) +library(sp) +library(rgdal) +options(stringsAsFactors = FALSE) +setwd("C:/Users/gjbowen/Dropbox/HypoMirror/WIDB/wiDB_data_staging/to_work_on") + +#File to read +ifile = "Isoforensics_dbMatch_checkCountries_checkElevations.xlsx" + +#Read in sites +sites = read.xlsx(ifile, sheet = "Sites") + +#Replace some bad characters +sites$Site_Comments = gsub("\n", " ", sites$Site_Comments) +sites$Site_Comments = gsub("'", "", sites$Site_Comments) +sites$Site_Comments = gsub(",", ";", sites$Site_Comments) + +#Condense internal whitespace +sc = sites$Site_Comments +for(i in 1:20){ + scn = gsub(" ", " ", sc) + if(identical(scn, sc)){break()} + sc = scn +} + +#Add the edited comments back to the DF, also trim any trailing or leading ws +sites$Site_Comments = trimws(scn) + +#Output file name +ofile = sub(".xlsx", "_cleanComments.xlsx", ifile) + +#Get the input file +s = getSheetNames(ifile) +shts = list() +for(i in seq(s)){ + shts[[i]] = read.xlsx(ifile, i) +} +names(shts) = s + +#Replace the old site data with updated records +shts$Sites = sites + +#Write output +write.xlsx(shts, ofile) diff --git a/Rdev/dbMatch.R b/Rdev/dbMatch.R new file mode 100644 index 0000000..525eac6 --- /dev/null +++ b/Rdev/dbMatch.R @@ -0,0 +1,154 @@ +library(devtools) +install_github("SPATIAL-Lab/wiDButil", force = TRUE) +library(wiDButil) +library(openxlsx) +options(stringsAsFactors = FALSE) +setwd("C:/Users/gjbowen/Dropbox/HypoMirror/WIDB/wiDB_data_staging/to_work_on") + +newSites = read.xlsx("Isoforensics_2019_processed.xlsx", 1) +newSamples = read.xlsx("Isoforensics_2019_processed.xlsx", 2) +newWI = read.xlsx("Isoforensics_2019_processed.xlsx", 3) +newSamples$Collection_Date = convertToDate(newSamples$Collection_Date) + +#space to store matching records +dupes = data.frame(character(), character(), character(), character(), character(), + character(), character(), character(), character(), character(), + character()) + +#cycle through sites +for(i in seq(newSites$Site_ID)){ + #using i = 1361 as test + #get site info and all new sample and wi data for that site + site = newSites[i, c(1, 3, 4)] + samples = newSamples[newSamples$Site_ID == site$Site_ID, c(1, 3, 4, 7, 15)] + wi = newWI[newWI$Sample_ID %in% samples$Sample_ID, c(1, 2, 3, 4)] + + if(nrow(samples) > 0 & nrow(wi) > 0){ + #match based on site location and sample date range + if(!is.na(min(samples$Collection_Date))){ + wiDB = wiDB_data(minLat = site$Latitude - 0.1, + maxLat = site$Latitude + 0.1, + minLong = site$Longitude - 0.1, + maxLong = site$Longitude + 0.1, + minDate = as.character(min(samples$Collection_Date)), + maxDate = as.character(max(samples$Collection_Date))) + }else{ + wiDB = NULL + } + + + #cycle through retreived records + if(!is.null(wiDB)){ + wiDBdata = wiDB[[1]] + + wiDBdata$Collection_Date = as.Date(wiDBdata$Collection_Date) + + #find new samples that are potential dupes based on same date + samples.dup = samples[samples$Collection_Date %in% + wiDBdata$Collection_Date,] + + #cycle through potential duplicates + for(j in seq(samples.dup$Sample_ID)){ + + #pull out isotope info for new samples that are potential dups + samples.dup.wi = wi[wi$Sample_ID == samples.dup$Sample_ID[j],] + + #find old samples that are date matches for this new sample + wiDB.dup = wiDBdata[wiDBdata$Collection_Date == + samples.dup$Collection_Date[j],] + + + #compare isotope values for old and new + for(k in seq(samples.dup.wi$WI_Analysis_ID)){ #new wi values + for(l in seq(wiDB.dup$Site_ID)){ #old wi values + #if both have d2H values + if(!is.na(samples.dup.wi$d2H[k]) & !is.na(wiDB.dup$d2H[l])){ + #if those d2H values are equal + if(round(samples.dup.wi$d2H[k], 1) == round(wiDB.dup$d2H[l], 1)){ + #if neither has d18O values + if(is.na(samples.dup.wi$d18O[k]) & is.na(wiDB.dup$d18O[l])){ + #add the new wi row to the list of duplicates + dupes = rbind(dupes, data.frame(samples.dup[j,], + samples.dup.wi[k,1], + wiDB.dup[l,c(1, 6, 7, 10, 14)])) + #If both have d18O values + }else if(!is.na(samples.dup.wi$d18O[k]) & + !is.na(wiDB.dup$d18O[l])){ + #if those d18O values are equal + if(round(samples.dup.wi$d18O[k], 1) == + round(wiDB.dup$d18O[l], 1)){ + #add the new wi row to the list of duplicates + dupes = rbind(dupes, data.frame(samples.dup[j,], + samples.dup.wi[k,1], + wiDB.dup[l,c(1, 6, 7, 10, 14)])) + } + } + } + #if neither has d2H values + }else if(is.na(samples.dup.wi$d2H[k]) & is.na(wiDB.dup$d2H[l])){ + #If both have d18O values + if(!is.na(samples.dup.wi$d18O[k]) & + !is.na(wiDB.dup$d18O[l])){ + #if those d18O values are equal + if(round(samples.dup.wi$d18O[k], 1) == + round(wiDB.dup$d18O[l], 1)){ + #add the new wi row to the list of duplicates + dupes = rbind(dupes, data.frame(samples.dup[j,], + samples.dup.wi[k,1], + wiDB.dup[l,c(1, 6, 7, 10, 14)])) + } + } + } + } + } + } + } + } +} + +####end of the sites loop + +#review matches +names(dupes) = c("Sample_ID_new", "Site_ID_new", "Type_new", "Date_new", + "Comments_new", "WI_ID_new", "Site_ID_old", "Sample_ID_old", + "Type_old", "Date_old", "Comments_old") +View(dupes) + +#remove duplicate analyses +newWI.clean = newWI[!(newWI$WI_Analysis_ID %in% dupes$WI_ID_new),] +#now remove orphaned samples and sites +newSamples.clean = newSamples[newSamples$Sample_ID %in% newWI.clean$Sample_ID,] +newSites.clean = newSites[newSites$Site_ID %in% newSamples.clean$Site_ID,] + +#for each sample with one or more wi matches, if all matches have the same +#old Sample_ID assume that any other new wi data from that new sample +#match to the same old sample. Remove new sample and associate new wi data +#with the old sample +for(i in unique(dupes$Sample_ID_new)){ + old.ids = unique(dupes[dupes$Sample_ID_new == i,]$Sample_ID_old) + if(length(old.ids) == 1){ + if(nrow(newSamples.clean[newSamples.clean$Sample_ID == i,]) > 0){ + newSamples.clean = newSamples.clean[newSamples.clean$Sample_ID != i,] + } + if(nrow(newWI.clean[newWI.clean$Sample_ID == i,]) > 0){ + newWI.clean[newWI.clean$Sample_ID == i,]$Sample_ID = old.ids + } + } +} + +#same for sites +for(i in unique(dupes$Site_ID_new)){ + old.ids = unique(dupes[dupes$Site_ID_new == i,]$Site_ID_old) + if(length(old.ids) == 1){ + if(nrow(newSites.clean[newSites.clean$Site_ID == i,]) > 0){ + newSites.clean = newSites.clean[newSites.clean$Site_ID != i,] + } + if(nrow(newSamples.clean[newSamples.clean$Site_ID == i,]) > 0){ + newSamples.clean[newSamples.clean$Site_ID == i,]$Site_ID = old.ids + } + } +} + +sheets = list(Sites = newSites.clean, Samples = newSamples.clean, + Water_Isotope_Data = newWI.clean) +write.xlsx(sheets, "Isoforensics_dbMatch.xlsx") diff --git a/Rdev/shiny sort app.R b/Rdev/shiny sort app.R new file mode 100644 index 0000000..fc9d8b7 --- /dev/null +++ b/Rdev/shiny sort app.R @@ -0,0 +1,668 @@ +library(shiny) +library(readxl) +runApp( + list( + ui = fluidPage( + #options(stringsAsFactors = FALSE), + shinyalert::useShinyalert(), + titlePanel(""), + tabsetPanel( + tabPanel("File Input", fluid = TRUE, + sidebarLayout( + sidebarPanel( + checkboxInput('header', "Contains header in the first row"), + checkboxInput('one', "Data is stored on one sheet"), + fileInput('file', 'Choose a xlsx file', accept = c(".xlsx"), ), + ), + + mainPanel( + tableOutput('table') + ) + ) + ), + tabPanel("Sites", fluid = TRUE, + sidebarLayout( + sidebarPanel( + p("Choose column that best fits, otherwise, choose NA"), + uiOutput('id',), + p("If Site_ID is NA, please enter an ID prefix. Otherwise, leave empty."), + uiOutput('projectname',), + uiOutput('sitename',), + uiOutput('latitude',), + uiOutput('longitude',), + uiOutput('elevation',), + uiOutput('address',), + uiOutput('city',), + uiOutput('state',), + uiOutput('country',), + uiOutput('scomment',), + actionButton('done', 'Next') + + ), + mainPanel( + tableOutput('siteTable') + ) + ) + ), + tabPanel("Samples", fluid = TRUE, + sidebarLayout( + sidebarPanel( + p("Choose column that best fits, otherwise, choose NA"), + uiOutput('sid',), + uiOutput('type',), + uiOutput('startD',), + uiOutput('startTZ',), + uiOutput('collectionD',), + uiOutput('collectionTZ',), + uiOutput('sampleVol',), + uiOutput('collectT',), + uiOutput('phase',), + uiOutput('depth',), + uiOutput('sampleSource',), + uiOutput('sampIgnore',), + uiOutput('sampCom',), + uiOutput('projectID',), + actionButton('done1', 'Next') + ), + mainPanel( + tableOutput('sampleTable') + ) + ) + ), + tabPanel("Export", fluid = TRUE, + sidebarLayout( + sidebarPanel( + downloadButton('d1', 'Export') + ), + mainPanel( + #tabsetPanel(type = "tabs", + #tabPanel("Sites", tableOutput("ss"))) + ) + ) + + ) + ), + + + + ), + server = function(input, output){ + options(stringsAsFactors = FALSE) + + #list of sheets for download + sheets_list <- reactive({ + list( + Sites = sitesSheet(), + Samples = sampleSheet() + ) + }) + + + conSites = function(sites, samples, siteField = "Site_ID", latField = "Latitude", lonField = "Longitude"){ + sites = sites[order(sites[,4]),] + sites = sites[order(sites[,3]),] + for(i in 1:nrow(sites)){ + if(i == 1){ + cid = sites[i, 1] + } else if(sites[i, 3] != sites[i-1, 3] | sites[i, 4] != sites[i-1, 4]){ + cid = sites[i, 1] + } else{ + oid = sites[i, 1] + sites[i, 1] = cid + ind = samples[,3] == oid + samples[ind, 3] = cid + } + } + sites = sites[!duplicated(sites[,1]),] + return(list(sites, samples)) + } + + finishedSheets <- reactive({ + ss <- sitesSheet() + sms <- sampleSheet() + s <- conSites(ss, sms) + }) + + output$ss <- renderTable(finishedSheets()) + + #downloads sheets + output$d1 <- downloadHandler( + filename = function() {"new1.xlsx"}, + content = function(file) { + writexl::write_xlsx(sheets_list(), file)} + ) + + #takes in user input of table + data <- reactive({ + file1 <- input$file + if(is.null(file1)){return()} + file.rename(file1$datapath, + paste(file1$datapath, ".xlsx", sep="")) + if(input$one == TRUE && input$header == TRUE){ + read_excel(paste(file1$datapath, ".xlsx", sep=""), 1, col_names = input$header, ) + } + else{ + shinyalert::shinyalert("Error", "Please reformat file so that all data is stored on one sheet and header is in the first row.", type= "error") + } + + }) + + #siteid column chosen by user. if "N/A" then we will assign siteids. + siteid <- reactive({ + sid <- input$siteID + df <- data() + pn <- input$pname + if(sid == "N/A"){ + sn = seq_along(1:nrow(df)) + sn = formatC(sn, width = 4, format = "d", flag = "0") + SITEID <- data.frame(paste0(pn, "_site_", sn)) + } + else{ + SITEID <- df[, which(colnames(df) == sid)] + } + }) + + #longitude input + longitude <- reactive({ + df <- data() + LONG <- df[, which(colnames(df) == input$long)] + }) + + #latitude input + latitude <- reactive({ + df <- data() + LAT <- df[, which(colnames(df) == input$lat)] + }) + + #renders table + output$table <- renderTable({ + if(is.null(data())) + {return ()} + data() + }) + + #function that checks that latitude is in correct range + checkLat = function(dataframe, id){ + for(i in 1:length(dataframe)){ + if(dataframe[i,which(colnames(dataframe) == id)] >= 90 || + dataframe[i,which(colnames(dataframe) == id)] <= -90){ + return (FALSE); + } + } + return (TRUE); + } + + #function that checks that longitude is in correct range + checkLong = function(dataframe, id){ + for(i in 1:length(dataframe)){ + if((dataframe[i,which(colnames(dataframe) == id)] >= 180) || + (dataframe[i,which(colnames(dataframe) == id)] <= 0)){ + return (FALSE); + } + } + return (TRUE); + } + + #function that checks if lat/long field are numeric + checkNumeric = function(dataframe, id){ + L <- unlist(dataframe[,which(colnames(dataframe) == id)]) + L <- na.omit(L) + for(i in 1:length(L)){ + L[i] <- as.numeric(L[i]) + } + return(L); + } + + #site sheet + observeEvent(input$done, { + df <- data() + + #checks if lat/long are numeric + LAT <- checkNumeric(df, input$lat) + LONG <- checkNumeric(df, input$long) + + if(is.na(LAT) == FALSE || is.na(LONG) == FALSE){ + if(checkLat(df, input$lat) == FALSE ){ + shinyalert::shinyalert(title = "Error", + "Some values in your latitude column are outside range [-90,90].", type = "error",) + } + else if(checkLong(df, input$long) == FALSE){ + shinyalert::shinyalert(title = "Error", + "Some values in your longitude column are outside range [0,180].", type = "error",) + } + else{ + showModal(modalDialog( + title = "Warning", + "Any rows without longitude or latitude values will be deleted.", + footer = actionButton("dismiss", label = "OK") + )) + } + } + else{ + shinyalert::shinyalert(title = "Error", + "The column you selected for latitude/longitude is not numeric.", + type = "error", ) + } + + }) + + + sitesSheet <- eventReactive(input$dismiss, { + removeModal() + if(!is.null(input$file)){ + df <- data() + SID <- siteid() + + + + #matches user input to column in inputted dataframe + SITENAME <- df[, which(colnames(df) == input$sn)] + LAT <- df[, which(colnames(df) == input$lat)] + LONG <- df[,which(colnames(df) == input$long)] + ELEV <- df[, which(colnames(df) == input$elv)] + ADDY <- df[, which(colnames(df) == input$addy)] + CITY <- df[, which(colnames(df) == input$cityy)] + STATE <- df[, which(colnames(df) == input$stateProvince)] + COUNTRY <- df[, which(colnames(df) == input$cc)] + COMMENT <- df[, which(colnames(df) == input$sc)] + + + if(input$sn == "N/A"){ + SITENAME <- rep(NA) + } + if(input$elv == "N/A"){ + ELEV <- rep(NA) + } + if(input$addy == "N/A"){ + ADDY <- rep(NA) + } + if(input$cityy == "N/A"){ + CITY <- rep(NA) + } + if(input$stateProvince == "N/A"){ + STATE <- rep(NA) + } + if(input$cc == "N/A"){ + COUNTRY <- rep(NA) + } + if(input$sc == "N/A"){ + COMMENT <- rep(NA) + } + + #creates sites sheet + siteHeader <- c("Site_ID", "Site_Name", "Latitude", "Longitude", "Elevation_mabsl", + "Address", "City", "State_or_Province", "Country", "Site_Comments") + dff <- data.frame(SID, SITENAME, LAT, longitude(), ELEV, + ADDY, CITY, STATE, COUNTRY, COMMENT) + names(dff) <- siteHeader + + #removes rows without long/lat. needs to display a pop up dialogue. + dfff <- subset(dff, !is.na(Longitude) | !is.na(Latitude), ) + } + + + }) + + #renders site table + output$siteTable <- renderTable(sitesSheet(), rownames = TRUE) + + + #samples sheet + observeEvent(input$done1, { + showModal(modalDialog( + title = "Warning", "Any rows without a Type value will be deleted.", footer = actionButton("dismiss1", label = "OK") + )) + }) + + sampleSheet <- eventReactive(input$dismiss1, { + removeModal() + if(!is.null(input$file)){ + dframe <- data() + sid <- siteid() + + #matches user input with column in inputted dataframe + SAMPLEID <- dframe[, which(colnames(dframe) == input$ssid)] + TYPE <- dframe[, which(colnames(dframe) == input$tt)] + STARTDATE <- dframe[, which(colnames(dframe) == input$sd)] + STARTTZ <- dframe[, which(colnames(dframe) == input$stz)] + COLD <- dframe[, which(colnames(dframe) == input$cd)] + COLTZ <- dframe[, which(colnames(dframe) == input$ctz)] + SAMPLEVOL <- dframe[, which(colnames(dframe) == input$sml)] + COLTYPE <- dframe[, which(colnames(dframe) == input$ct)] + PHASE <- dframe[, which(colnames(dframe) == input$ph)] + DEPTH <- dframe[, which(colnames(dframe) == input$dm)] + SAMPLESOURCE <- dframe[, which(colnames(dframe) == input$sampS)] + SAMPLEIGNORE <- dframe[, which(colnames(dframe) == input$sampI)] + SAMPLECOM <- dframe[, which(colnames(dframe) == input$sampC)] + PROJECTID <- dframe[, which(colnames(dframe) == input$pID)] + + if(input$ssid == "N/A"){ + SAMPLEID <- rep(NA) + } + if(input$tt == "N/A"){ + TYPE <- rep(NA) + } + if(input$sd == "N/A"){ + STARTDATE <- rep(NA) + } + if(input$stz == "N/A"){ + STARTTZ <- rep(NA) + } + if(input$cd == "N/A"){ + COLD <- rep(NA) + } + if(input$ctz == "N/A"){ + COLTZ <- rep(NA) + } + if(input$sml == "N/A"){ + SAMPLEVOL <- rep(NA) + } + if(input$ct == "N/A"){ + COLTYPE <- rep(NA) + } + if(input$ph == "N/A"){ + PHASE <- rep(NA) + } + if(input$dm == "N/A"){ + DEPTH <- rep(NA) + } + if(input$sampS == "N/A"){ + SAMPLESOURCE <- rep(NA) + } + if(input$sampI == "N/A"){ + SAMPLEIGNORE <- rep(NA) + } + if(input$sampC == "N/A"){ + SAMPLECOM <- rep(NA) + } + if(input$pID == "N/A"){ + PROJECTID <- rep(NA) + } + + #throwing error "object cannot be coerced to type double" + #STARTDATE <- convertToDate(STARTDATE) + #COLD <- convertToDate(COLD) + + #create samples sheet + sampleHeader <- c("SampleID", "Sample_ID_2", "Site_ID", "Type", "Start_Date", "Start_Time_Zone", + "Collection_Date", "Collection_Time_Zone", "Sample_Volume_ml", "Collector_type", + "Phase", "Depth_meters", "Sample_Source", "Sample_Ignore", "Sample_Comment", "Project_ID") + d <- data.frame(SAMPLEID, SAMPLEID, sid, TYPE, STARTDATE, STARTTZ, + COLD, COLTZ, SAMPLEVOL, COLTYPE, PHASE, DEPTH, SAMPLESOURCE, + SAMPLEIGNORE, SAMPLECOM, PROJECTID) + names(d) <- sampleHeader + + #removes rows without a type + dd <- subset(d, !is.na(Type), ) + } + + }) + + #renders sample table + output$sampleTable <- renderTable(sampleSheet(), rownames = TRUE) + + + #function that rearranges names vector if column that might be field is found + fieldMatch = function(d, list){ + if(!is.null(d)){ + cn <- names(d) + for(i in 1:length(cn)){ + if(tolower(cn[i]) %in% list){ + l = cn[i] + cn <- cn[cn != cn[i]] + cn <- append(cn, l, after = 0) + } + } + return( cn <- as.list(cn)) + } + } + + #function that rearranges names vector if column that might be field is found (FIX: doesn't work correctly if match is found) + fieldMatchNA = function(d, list){ + if(!is.null(d)){ + cn <- names(d) + for(i in 1:length(cn)){ + if(tolower(cn[i]) %in% list){ + l = cn[i] + cn <- cn[cn != cn[i]] + cn <- append(cn, l, after = 0) + return( cn <- as.list(cn)) + } + } + cn <- c("N/A", cn) + return( cn <- as.list(cn)) + } + } + + +#field matching + #site fields + siteidd <- reactive({ + sn = c("site_id", "site id", "id") + fieldMatchNA(data(), sn) + }) + #rearranges names vector if column that might be sitename is found + siteName <- reactive({ + sn = c("site_name", "name", "sitename") + fieldMatchNA(data(), sn) + }) + + #rearranges names vector if column that might be latitude is found + lat <- reactive({ + latitude = c("latitude", "lat") + fieldMatch(data(), latitude) + }) + + #rearranges names vector if column that might be longitude is found + long <- reactive({ + longitude = c("longitude", "long", "lon") + fieldMatch(data(), longitude) + }) + + #rearranges names vector if column that might be country is found + country <- reactive({ + Country = c("country") + fieldMatchNA(data(), Country) + }) + + #rearranges names vector if column that might be elevation is found + elev <- reactive({ + elevation = c("elevation", "elev", "elevation_mabsl") + fieldMatchNA(data(), elevation) + }) + + #rearranges names vector if column that might be state/province is found + state <- reactive({ + states = c("state","province", "state_or_province") + fieldMatchNA(data(), states) + }) + + #rearranges names vector if column that might be city is found + city <- reactive({ + City = c("city", "municipality") + fieldMatchNA(data(), City) + }) + + #rearranges names vector if column that might be address is found + address <- reactive({ + a = c("address", "location") + fieldMatchNA(data(), a) + }) + + #rearranges names vector if column that might be site comment is found + sitecomment <- reactive({ + d = c("site_comment", "sitecomment", "site comment", "comment", "comments", "site_comments", "site comments") + fieldMatchNA(data(), d) + }) + + + #sample fields + #rearranges names vector if column that might be type is found + typee <- reactive({ + type = c("type", "source", "water source", "water_source", "water_type", "water type", "sample type", "sample_type") + fieldMatch(data(), type) + }) + + #rearranges names vector if column that might be phase is found + phase <- reactive({ + Phase = c("phase", "water phase") + fieldMatchNA(data(), Phase) + }) + + #rearranges names vector if column that might be collection date is found + collectionDate <- reactive({ + cd = c("collection date", "date collected", "date", "collection_date", "date_collected") + fieldMatchNA(data(), cd) + }) + + #rearranges names vector if column that might be start date is found + startDate <- reactive({ + sd = c("start date", "date", "start", "start_date") + fieldMatchNA(data(), sd) + }) + + #rearranges names vector if column that might be depth is found + depth <- reactive({ + d = c("depth", "depth_meters", "depth meters") + fieldMatchNA(data(), d) + }) + + #rearranges names vector if column that might be start time zone is found + startTimezone <- reactive({ + d = c("timezone", "start_time_zone", "start", "start_timezone", "start timezone") + fieldMatchNA(data(), d) + }) + + #rearranges names vector if column that might be collection time zone is found + collectionTimezone <- reactive({ + d = c("timezone", "collection_time_zone", "collection", "collection_timezone", "collection timezone") + fieldMatchNA(data(), d) + }) + + #rearranges names vector if column that might be sample_Vol is found + samplevolume <- reactive({ + d = c("samplevolume", "sample_volume", "sample volume", "sample_volume_ml", "volume", "ml") + fieldMatchNA(data(), d) + }) + + #rearranges names vector if column that might be collector type is found + collectortype <- reactive({ + d = c("collector", "collector_type", "collector type") + fieldMatchNA(data(), d) + }) + + #rearranges names vector if column that might be sample source is found + samplesource <- reactive({ + d = c("sample_source", "sample source", "source") + fieldMatchNA(data(), d) + }) + + #rearranges names vector if column that might be sample ignore is found + sampleignore <- reactive({ + d = c("sample ignore", "ignore", "sample_ignore") + fieldMatchNA(data(), d) + }) + + #rearranges names vector if column that might be sample comment is found + samplecomment <- reactive({ + d = c("comment", "sample comment", "sample_comment", "comments", "sample comments", "sample_comments") + fieldMatchNA(data(), d) + }) + + #rearranges names vector if column that might be project ID is found + projectid <- reactive({ + d = c("project_id", "project", "id", "project id") + fieldMatchNA(data(), d) + }) + + +#Sites drop down lists + + output$projectname <- renderUI({ + textInput('pname', 'ID Prefix',) + }) + output$id <- renderUI({ + selectInput('siteID', 'Site_ID', choices = c(siteidd())) + }) + output$sitename <- renderUI({ + selectInput('sn', 'Site_Name', choices = c(siteName())) + }) + output$latitude <- renderUI({ + selectInput('lat', 'Latitude', choices = c(lat())) + }) + output$longitude <- renderUI({ + selectInput('long', 'Longitude', choices = c(long())) + }) + output$elevation <- renderUI({ + selectInput('elv', 'Elevation_mabsl', choices = c(elev())) + }) + output$address <- renderUI({ + selectInput('addy', 'Address', choices = c(address())) + }) + output$city <- renderUI({ + selectInput('cityy', 'City', choices = c(city())) + }) + output$state <- renderUI({ + selectInput('stateProvince', 'State_or_Province', choices = c(state())) + }) + output$country <- renderUI({ + selectInput('cc', 'Country', choices = c(country())) + }) + output$scomment <- renderUI({ + selectInput('sc', 'Site_Comment', choices = c(sitecomment())) + }) + + +#Samples drop down lists + + output$sid <- renderUI({ + selectInput('ssid', 'Sample_ID', choices = c(names(data()))) + }) + output$type <- renderUI({ + selectInput('tt', 'Type', choices = c(typee())) + }) + output$startD <- renderUI({ + selectInput('sd', 'Start_Date', choices = c(startDate())) + }) + output$startTZ <- renderUI({ + selectInput('stz', 'Start_Time_Zone', choices = c(startTimezone())) + }) + output$collectionD <- renderUI({ + selectInput('cd', 'Collection_Date', choices = c(collectionDate())) + }) + output$collectionTZ <- renderUI({ + selectInput('ctz', 'Collection_Time_Zone', choices = c(collectionTimezone())) + }) + output$sampleVol <- renderUI({ + selectInput('sml', 'Sample_Volume_ml', choices = c(samplevolume())) + }) + output$collectT <- renderUI({ + selectInput('ct', 'Collector_type', choices = c(collectortype())) + }) + output$phase <- renderUI({ + selectInput('ph', 'Phase', choices = c(phase())) + }) + output$depth <- renderUI({ + selectInput('dm', 'Depth_meters', choices = c(depth())) + }) + output$sampleSource <- renderUI({ + selectInput('sampS', 'Sample_Source', choices = c(samplesource())) + }) + output$sampIgnore <- renderUI({ + selectInput('sampI', 'Sample_Ignore', choices = c(sampleignore())) + }) + output$sampCom <- renderUI({ + selectInput('sampC', 'Sample_Comments', choices = c(samplecomment())) + }) + output$projectID <- renderUI({ + selectInput('pID', 'Project_ID', choices = c(projectid())) + }) + + + + } + ) +) + + + diff --git a/docs/index.html b/docs/index.html index 0df7741..12cc48c 100644 --- a/docs/index.html +++ b/docs/index.html @@ -8,30 +8,13 @@ + isoWater - - - - - - - - - - - - - - - - - - - - - - - - - + -.tabset-dropdown > .nav-tabs > li > a, -.tabset-dropdown > .nav-tabs > li > a:focus, -.tabset-dropdown > .nav-tabs > li > a:hover { - border: none; - display: inline-block; - border-radius: 4px; - background-color: transparent; -} -.tabset-dropdown > .nav-tabs.nav-tabs-open > li { - display: block; - float: none; -} -.tabset-dropdown > .nav-tabs > li { - display: none; -} - - + @@ -154,44 +135,111 @@ -
- - - - -

Setup

-

isoWater uses Bayesian estimation to infer information about the origin of a water sample based on its stable isotope ratios. The package can be installed from the GitHub repository:

-
library(devtools)
-install_github("SPATIAL-Lab/isoWater@*release")
-
library(isoWater)
+

isoWater includes two sets of tools. The first supports queries to the Waterisotopes Database (wiDB) to identify and download data. The second uses Bayesian estimation to infer information about the origin of a water sample based on its stable isotope ratios. The package can be installed from the GitHub repository:

+
library(devtools)
+install_github("SPATIAL-Lab/isoWater@*release")
+
library(isoWater)
+
+
+

wiDB Queries

+

Three functions are available to streamline queries to the wiDB. wiDB_values is designed to expose the list of terms used in the database’s categorical metadata fields, helping the user design other queries:

+
wiDB_values(c("types", "countries"))
+
## $types
+##  [1] "Beer"            "Bottled"         "Canal"           "Cave_drip"      
+##  [5] "Cloud_or_fog"    "Firn_core"       "Ground"          "Ice_core"       
+##  [9] "Lake"            "Milk"            "Mine"            "Ocean"          
+## [13] "Precipitation"   "Rime"            "River_or_stream" "Snow_pit"       
+## [17] "Soil"            "Spring"          "Sprinkler"       "Stem"           
+## [21] "Tap"             "Vapor"          
+## 
+## $countries
+##   [1] "AD" "AE" "AF" "AL" "AM" "AO" "AQ" "AR" "AT" "AU" "BA" "BB" "BD" "BE" "BF"
+##  [16] "BG" "BH" "BI" "BJ" "BM" "BO" "BR" "BW" "BY" "BZ" "CA" "CD" "CF" "CH" "CL"
+##  [31] "CM" "CN" "CO" "CR" "CU" "CV" "CY" "CZ" "DE" "DJ" "DK" "DO" "DZ" "EC" "EE"
+##  [46] "EG" "ES" "ET" "FI" "FK" "FM" "FO" "FR" "GA" "GB" "GE" "GF" "GH" "GI" "GL"
+##  [61] "GR" "GT" "GU" "GY" "HK" "HN" "HR" "HU" "ID" "IE" "IL" "IN" "IO" "IQ" "IR"
+##  [76] "IS" "IT" "JO" "JP" "KE" "KG" "KH" "KI" "KR" "KW" "KY" "KZ" "LA" "LB" "LK"
+##  [91] "LS" "LT" "LU" "LV" "LY" "MA" "MC" "MD" "MG" "MK" "ML" "MM" "MN" "MT" "MU"
+## [106] "MX" "MY" "MZ" "NA" "NE" "NG" "NI" "NL" "NO" "NP" "NZ" "OM" "PA" "PE" "PF"
+## [121] "PG" "PH" "PK" "PL" "PR" "PS" "PT" "PW" "PY" "QA" "RE" "RO" "RS" "RU" "SA"
+## [136] "SC" "SD" "SE" "SG" "SH" "SI" "SJ" "SK" "SN" "ST" "SV" "SY" "SZ" "TD" "TH"
+## [151] "TJ" "TN" "TR" "TW" "TZ" "UA" "UG" "UK" "UM" "US" "UY" "UZ" "VE" "VN" "WS"
+## [166] "ZA" "ZM" "ZW"
+

wiDB_sites queries the database and returns a data.frame containing the latitidue and longitude coordinates for all sites matching the provided query criteria. This supports quick screening of data avaiability, for example:

+
ls = wiDB_sites(countries = "US", types = "Lake")
+ps = wiDB_sites(countries = "US", types = "Precipitation")
+par(mar = c(4, 4, 1, 1))
+plot(ls[, 2:1], xlim = c(-125, -68), ylim = c(25, 50))
+points(ps[, 2:1], col = "red")
+

+

Now we can refine our query and request isotope data using the function wiDB_data. We will focus in on the area around Ames, Iowa, where the sites query showed both lake and precipitation data.

+
ld = wiDB_data(minLat = 41, maxLat = 42, minLong = -94, maxLong = -93, types = "Lake")
+pd = wiDB_data(minLat = 41, maxLat = 42, minLong = -94, maxLong = -93, types = "Precipitation")
+ld$data
+
##         Site_ID         Site_Name Latitude Longitude Elevation Sample_ID Type
+## 1 NLA06608-2036 South Banner Lake 41.43839 -93.55097    238.24    710256 Lake
+## 2 NLA06608-0008      Lake Ahquabi 41.29211 -93.59040    261.56    710288 Lake
+## 3 NLA06608-0008      Lake Ahquabi 41.29211 -93.59040    261.56    710971 Lake
+## 4 NLA06608-2056 Badger Creek Lake 41.46950 -93.92037    278.75    710972 Lake
+##   Start_Date Start_Time_Zone     Collection_Date Collection_Time_Zone  Phase
+## 1         NA              NA 2007-07-11 00:00:00                   NA Liquid
+## 2         NA              NA 2007-07-12 00:00:00                   NA Liquid
+## 3         NA              NA 2007-08-22 00:00:00                   NA Liquid
+## 4         NA              NA 2007-08-22 00:00:00                   NA Liquid
+##   Depth_meters Sample_Comments       d2H       d18O D17O d2H_Analytical_SD
+## 1            0              NA -15.56189 -0.5534326   NA                NA
+## 2            0              NA -25.35876 -2.0486228   NA                NA
+## 3            0              NA -20.81503 -1.9292234   NA                NA
+## 4            0              NA -27.77034 -3.3816347   NA                NA
+##   d18O_Analytical_SD D17O_Analytical_SD WI_Analysis_Source Project_ID
+## 1                 NA                 NA      EPA_Corvallis         10
+## 2                 NA                 NA      EPA_Corvallis         10
+## 3                 NA                 NA      EPA_Corvallis         10
+## 4                 NA                 NA      EPA_Corvallis         10
+
pd$projects
+
##   Project_ID Contact_Name Contact_Email
+## 1         31           NA            NA
+##                                                                                                                               Citation
+## 1 W.W. Simpkins, Isotopic composition of precipitation in central Iowa, Journal of Hydrology, Volume 172, Issue 1, 1995, Pages 185-207
+##                                            URL                Project_Name
+## 1 https://doi.org/10.1016/0022-1694(95)02694-K Iqbal, 2008; Simpkins, 1995
+

Notice that the object returned by the data function is a named list that includes the requested data and a summary of the projects (data sources) from which those data are derived. See the documentation for this function for details, and always remember to cite any data you use!

+

Let’s look at the data a little more closely:

+
par(mar = c(5, 5, 1, 1))
+plot(pd$data$d18O, pd$data$d2H, xlab = expression(delta^{18}*"O"),
+     ylab = expression(delta^{2}*"H"))
+abline(10, 8)
+points(ld$data$d18O, ld$data$d2H, col = "red")
+

+

It appears that the precipitation values vary widely and cluster around the Global Meteoric Water Line, and the lake values are relatively high and somewhat below the GMWL, suggesting that they may be somewhat evaporatively enriched.

Meteoric Water Line source

-

Based one one or more isotopic measurements of a water sample and parameters describing a local meteoric water line (MWL), estimate the isotope ratios of the sample’s source prior to evaporation. If you have data you’d like to use to constrain the MWL, the mwl function will produce a data object describing the line:

-
#hypothetical MWL samples
-O = runif(10, -15, -2)
-H = O * 8 + 10 + rnorm(10, 0, 6)
-HO = data.frame(H, O)
-MWL = mwl(HO)
-

+

What if we wanted to ‘correct’ for the isotopic effects of evaporation, giving sample values that could be compared directly with one or more unevaporated sources. In other words, based on isotopic data for a water sample and parameters describing a local meteoric water line (MWL), estimate the isotope ratios of the sample’s source prior to evaporation. If you have data you’d like to use to constrain the MWL, like we do here, the mwl function will produce a data object describing the line:

+
#extract the precipitation H and O isotope values
+HO = data.frame(pd$data$d2H, pd$data$d18O)
+MWL = mwl(HO)
+
## Warning in mwl(HO): Missing values removed from HO
+

If you don’t have local data for a MWL, the function mwlSource (see below) has parameters for the Global Meteoric Water Line embedded as a default.

Now bundle the data for your water sample as an “iso” data object using the function iso. Note that isoWater uses a bivariate normal distribution to describe all water sample values; thus the parameters in an iso object include the H and O isotope values, their 1 standard deviation uncertainties, and the error covariance for H and O. For measured sample values the covariance might be expected to be zero (the default value in iso), but for many values (e.g., potential sample or source values estimated from multiple measurements) the H and O error covariance will be substantial.

-
#hypothetical sample value
-obs = iso(-60, -6, 0.5, 0.1)
-

Lastly, we need to provide a prior distribution for the evaporation line slope, which is represented as a univariate normal distribution with mean and standard deviation parameters:

-
slope = c(4.2, 0.3)
+
#we will analyze one of our lake water samples, and assume realistic
+#values of analytical uncertainty and no error covariance
+obs = iso(ld$data$d2H[1], ld$data$d18O[1], 0.5, 0.1)
+

Lastly, we need to provide a prior distribution for the evaporation line slope, which is represented as a univariate normal distribution with mean and standard deviation parameters. These can be tricky to estimate, but there are number of papers out there that provide guidance, as well as some tools and data products that might help.

+
#assumed value that is reasonable for lake water evaporation in this
+#location/climate
+slope = c(5.2, 0.3)

With our inputs prepared, we can run the analysis using mwlSource.

-
mwls1 = mwlSource(obs, MWL, slope, ngens = 1e4)
+
mwls1 = mwlSource(obs, MWL, slope, ngens = 5e3)
## module glm loaded
## Compiling model graph
 ##    Resolving undeclared variables
@@ -202,26 +250,26 @@ 

Meteoric Water Line source

## Total graph size: 46 ## ## Initializing model
-
mwls1$summary
+
mwls1$summary
##                    mean        sd       2.5%        25%         50%         75%
-## E             5.0371161 0.6377203   3.905672   4.586623   5.0048616   5.4210569
-## S             4.1807452 0.2813550   3.634429   3.985830   4.1802854   4.3713444
-## deviance     -0.3397317 1.9839564  -2.265972  -1.753102  -0.9386017   0.4156804
-## source_d18O -11.0405728 0.6201790 -12.369577 -11.405866 -11.0069921 -10.6054573
-## source_d2H  -81.0997281 3.4740644 -88.486883 -83.267829 -80.9373641 -78.7066467
+## E             7.7598533 0.5301221   6.736386   7.375787   7.7482958   8.1303217
+## S             5.2036123 0.1796153   4.846007   5.080064   5.2065274   5.3295832
+## deviance     -0.3821556 1.9319265  -2.271778  -1.774488  -0.9633777   0.3638208
+## source_d18O  -8.3167671 0.5114846  -9.328130  -8.683358  -8.3004191  -7.9468922
+## source_d2H  -55.9984330 3.8348138 -63.608858 -58.800298 -55.8311367 -53.2439812
 ##                  97.5%     Rhat n.eff
-## E             6.387945 1.009561   340
-## S             4.735228 1.001830  1900
-## deviance      5.094030 1.001201  5000
-## source_d18O  -9.956552 1.013695   280
-## source_d2H  -74.898616 1.009765   330
-

The summary object returned by the function provides two statistics that can be used to assess convergence (Rhat and effective sample size), in addition to summary statistics for the posterior distribution. The function’s output also includes an object containing all posterior samples (isoWater functions thin the posterior to 2500 samples per chain by default), which we can plot as a trace plot to visually assess burn-in and convergence:

-
plot(mwls1$results$source_d2H[1:2500], type = "l", ylim = range(mwls1$results$source_d2H))
-lines(mwls1$results$source_d2H[2501:5000], col=2)
-lines(mwls1$results$source_d2H[5001:7500], col=3)
-

-

The MWL model uses one of two line statistics to constrain the source water prior distribution. The default, shown above, uses the MWL confidence interval as the prior, appropriate for samples where the source is best represented as a mixture of many samples of the type used to define the MWL. Alternatively, the argument stype = 2 can be provided to use the prediction interval as the prior constraint, appropriate if the source is best represetned as a single sample of the type used to define the MWL:

-
mwls2 = mwlSource(obs, MWL, slope, stype = 2, ngens = 1e4)
+## E 8.788437 1.015032 410 +## S 5.542221 1.007349 1500 +## deviance 4.918002 1.001162 5500 +## source_d18O -7.313070 1.012854 510 +## source_d2H -48.507818 1.012971 570 +

The summary object returned by the function provides two statistics that can be used to assess convergence (Rhat and effective sample size), in addition to summary statistics for the posterior distribution. For the relatively small chain lengths run in these examples we don’t expect strong convergence. The function’s output also includes an object containing all posterior samples (isoWater functions thin the posterior to 2500 samples per chain by default), which we can plot as a trace plot to visually assess burn-in and convergence:

+
plot(mwls1$results$source_d2H[1:2500], type = "l", ylim = range(mwls1$results$source_d2H))
+lines(mwls1$results$source_d2H[2501:5000], col=2)
+lines(mwls1$results$source_d2H[5001:7500], col=3)
+

+

The MWL model uses one of two line statistics to constrain the source water prior distribution. The default, shown above, uses the MWL confidence interval as the prior, appropriate for samples where the source is best represented as a mixture of many samples of the type used to define the MWL. Alternatively, the argument stype = 2 can be provided to use the prediction interval as the prior constraint, appropriate if the source is best represetned as a single sample of the type used to define the MWL. You can see that this makes a substantial difference in the resulting posterior distribution, so make sure you’re using the right statistics for your problem:

+
mwls2 = mwlSource(obs, MWL, slope, stype = 2, ngens = 5e3)
## Compiling model graph
 ##    Resolving undeclared variables
 ##    Allocating nodes
@@ -231,78 +279,77 @@ 

Meteoric Water Line source

## Total graph size: 46 ## ## Initializing model
-
plot(O, H, xlim = c(-20, 0), ylim = c(-140, 0))
-abline(MWL[2], MWL[1])
-points(obs[2:1], col = "red")
-points(mwls2$results$source_d18O, mwls2$results$source_d2H, col = "blue")
-points(mwls1$results$source_d18O, mwls1$results$source_d2H, col = "green")
-

+
par(mar = c(5, 5, 1, 1))
+plot(HO[, 2:1], xlim = c(-20, 0), ylim = c(-140, 0),
+     xlab = expression(delta^{18}*"O"),
+     ylab = expression(delta^{2}*"H"))
+abline(MWL[2], MWL[1])
+points(obs[2:1], col = "red")
+points(mwls2$results$source_d18O, mwls2$results$source_d2H, col = "blue")
+points(mwls1$results$source_d18O, mwls1$results$source_d2H, col = "green")
+

Mixture source

-

Based on isotope values for two or more potential water sources, what is the mixture of sources that contributed to a water sample? The mixSource function works similarly to mwlSource, but requires different arguments to define the mixing model source priors. Values for all sources are provided in a single iso object:

-
#hypothetical sources
-sourcesO = runif(3, -15, -3)
-sources = iso(sourcesO * 8 + 10 + rnorm(3, 0, 2), sourcesO, 0.5, 0.2, 0.05)
-

Now we can run the analysis; let’s use the parallel option (available in mwlSource also) to speed this up:

-
mixs1 = mixSource(obs, sources, slope, ngens = 1e4, ncores = 3)
-mixs1$summary
-
##                     mean         sd         2.5%          25%         50%
-## E              5.0719653 0.24379898   4.56157835   4.90733841   5.0824975
-## S              3.6397372 0.17059298   3.30829479   3.52678188   3.6388870
-## deviance       0.2004979 2.43983444  -2.24412390  -1.57349698  -0.5324379
-## fracs[1]       0.4983870 0.18736501   0.12501891   0.36396825   0.5096548
-## fracs[2]       0.1553955 0.09785948   0.01854418   0.08078849   0.1375399
-## fracs[3]       0.3462175 0.18804824   0.05557436   0.19354517   0.3247232
-## mixture_d18O -11.1202838 0.22262610 -11.51853135 -11.28048857 -11.1309527
-## mixture_d2H  -78.1425290 1.18611969 -79.95446029 -79.02332246 -78.3360380
+

What if instead of just back-correcting our sample values for evaporation, we wanted to explicitly assess the relative contribution of different water sources to the sample? In other words, based on isotope values for two or more potential water sources, what is the mixture of sources that contributed to the sample? The mixSource function works similarly to mwlSource, but requires different arguments to define the mixing model source priors. Values for all sources are provided in a single iso object:

+
#prep our data - we'll average the precipitation values by quarter
+q = quarters(as.POSIXct(pd$data$Collection_Date))
+qu = sort(unique(q))
+ql = length(qu)
+
+pd_q = data.frame("H" = numeric(ql), "O" = numeric(ql),
+                  "Hsd" = numeric(ql), "Osd" = numeric(ql),
+                  "HOc" = numeric(ql))
+for(i in seq_along(qu)){
+  pd_q$H[i] = mean(pd$data$d2H[q == qu[i]], na.rm = TRUE)
+  pd_q$O[i] = mean(pd$data$d18O[q == qu[i]], na.rm = TRUE)
+  pd_q$Hsd[i] = sd(pd$data$d2H[q == qu[i]], na.rm = TRUE)
+  pd_q$Osd[i] = sd(pd$data$d18O[q == qu[i]], na.rm = TRUE)
+  pd_q$HOc[i] = cov(pd$data$d18O[q == qu[i]], pd$data$d2H[q == qu[i]],
+                    use = "pairwise.complete.obs")
+}
+pd_q
+
##            H          O      Hsd      Osd       HOc
+## 1 -111.36000 -15.634667 59.49320 7.434212 439.68889
+## 2  -32.91821  -4.784643 24.80235 3.257371  77.32559
+## 3  -26.05879  -4.285000 19.06960 2.374869  44.77920
+## 4  -54.53600  -9.051429 36.48143 4.546960 163.92950
+
#make the iso object, providing the stats calculated above
+sources = iso(pd_q$H, pd_q$O, pd_q$Hsd, pd_q$Osd, pd_q$HOc)
+

Now we can run the analysis; let’s use the parallel option (also available in mwlSource) to speed this up and run slightly longer chains:

+
mixs1 = mixSource(obs, sources, slope, ngens = 2e4, ncores = 2)
+mixs1$summary
+
##                     mean        sd         2.5%          25%         50%
+## E              7.6351405 1.2718417   5.44214534   6.82455149   7.5571544
+## S              5.1734262 0.2590821   4.65975563   5.00138203   5.1806913
+## deviance      -0.3803168 1.9535409  -2.27569549  -1.76331817  -1.0059190
+## fracs[1]       0.2713393 0.1672278   0.03870795   0.15546638   0.2325109
+## fracs[2]       0.2449456 0.1477532   0.02350190   0.12792479   0.2263658
+## fracs[3]       0.2800011 0.1897541   0.00179698   0.11214497   0.2734973
+## fracs[4]       0.2037140 0.1633617   0.01212153   0.06693956   0.1694183
+## mixture_d18O  -8.1931155 1.2600775 -11.33635599  -8.82904447  -8.1190980
+## mixture_d2H  -55.1417772 7.5068726 -74.36367430 -59.04583621 -54.6642996
 ##                      75%       97.5%     Rhat n.eff
-## E              5.2435505   5.5164708 1.001816  2300
-## S              3.7541672   3.9750185 1.001241  4500
-## deviance       1.1893195   6.7204173 1.000988  7500
-## fracs[1]       0.6362520   0.8311226 1.004114  7500
-## fracs[2]       0.2145508   0.3825191 1.003812  1100
-## fracs[3]       0.4784215   0.7487126 1.001366  3500
-## mixture_d18O -10.9728238 -10.6503637 1.002056  3000
-## mixture_d2H  -77.4088262 -75.4526929 1.003731  1500
-

The default options use a prior that weights each source evenly, but if we have other information we can use the arguments prior and shp to prescribe different distributions. Values in prior are relative, so for example if we think source 2 is likely to have contributed twice as much as the others…this may or may not have a strong impact on the results, depending on the strength of the constraints provided by the isotopic data:

-
mixs2 = mixSource(obs, sources, slope, prior = c(1, 2, 1), ngens = 1e4, ncores = 3)
-boxplot(mixs1$results[, 3:5], main = "Default")
-boxplot(mixs2$results[, 3:5], main = "Informed prior", add = TRUE, col = "red")
-

-
- - - - +## E 8.2824655 10.7888601 1.035682 150 +## S 5.3514409 5.6686235 1.005108 560 +## deviance 0.3821712 4.7688608 1.000895 5000 +## fracs[1] 0.3403372 0.6836020 1.055780 46 +## fracs[2] 0.3447688 0.5682283 1.004810 380 +## fracs[3] 0.4282478 0.6315143 1.383992 9 +## fracs[4] 0.2978753 0.5879382 1.309282 9 +## mixture_d18O -7.3874747 -5.9903675 1.050280 97 +## mixture_d2H -50.2024168 -42.5523837 1.047276 160 +

Again, we’re a long way from having a strongly converged simulation in which the posterior is likley to be a representative sample of the population parameters, but the initial ‘hint’ is that the lake sample values are consistent with a sub-equal source contribution from precipitation in each calendar quarter. Notice, too, that the evaporation-corrected sample values (‘mixture_d18O’ and ‘mixture_d2H’) are broadly similar to the eqivalent values we obtained with the MWL model, above.

+

The default options use a prior that weights each source evenly, but if we have other information we can use the arguments prior and shp to prescribe different distributions. Values in prior are relative, so for example if we think Q1 is likely to have contributed twice as much as the other seasons…this may or may not have a strong impact on the results, depending on the strength of the constraints provided by the isotopic data:

+
mixs2 = mixSource(obs, sources, slope, prior = c(2, 1, 1, 1), ngens = 2e4, ncores = 2)
+par(mar = c(4, 1, 1, 1))
+boxplot(mixs1$results[, 3:6], outline = FALSE)
+boxplot(mixs2$results[, 3:6], add = TRUE, col = rgb(1, 0, 0, 0.5),
+        outline = FALSE)
+

- - - - - diff --git a/man/isoWater.Rd b/man/isoWater.Rd new file mode 100644 index 0000000..4a99730 --- /dev/null +++ b/man/isoWater.Rd @@ -0,0 +1,19 @@ +\name{isoWater} + +\alias{isoWater} + +\title{Discovery, Retrieval, and Analysis of Water Isotope Data} + +\description{ +wiDB_ functions provide interface to the public API of + the wiDB: build, check and submit queries, and receive and + unpack responses. Data analysis functions support Bayesian + inference of the source and source isotope composition of water + samples that may have experienced evaporation. Algorithms + adapted from Bowen et al. (2018) \href{https://doi.org/10.1007/s00442-018-4192-5}{doi:10.1007/s00442-018-4192-5}. +} + +\author{ +\strong{Maintainer}: Gabriel Bowen \email{gabe.bowen@utah.edu} +\strong{Authors}: Gabriel Bowen +} \ No newline at end of file diff --git a/man/mixSource.Rd b/man/mixSource.Rd index ba83de9..0f74af4 100644 --- a/man/mixSource.Rd +++ b/man/mixSource.Rd @@ -11,7 +11,7 @@ Given isotopic compositions of two or more potential sources, generate a posteri } \usage{ -mixSource(obs, sources, slope, prior = rep(1,nrow(sources)), shp = 2, +mixSource(obs, sources, slope, prior = rep(1,nrow(sources)), shp = 1, ngens = 1e5, ncores = 1) } diff --git a/man/wiDB_data.Rd b/man/wiDB_data.Rd new file mode 100644 index 0000000..7fdfaf9 --- /dev/null +++ b/man/wiDB_data.Rd @@ -0,0 +1,88 @@ +\name{wiDB_data} + +\alias{wiDB_data} + +\title{ +Obtain data from wiDB using a query +} + +\description{ +Obtain data from wiDB using a query +} + +\usage{ +wiDB_data(minLat = NULL, maxLat = NULL, minLong = NULL, + maxLong = NULL, minElev = NULL, maxElev = NULL, minDate = NULL, + maxDate = NULL, countries = NULL, states = NULL, types = NULL, + projects = NULL, fields = NULL, tmpdir = tempdir(), clean = TRUE) +} + +\arguments{ + \item{minLat}{ + numeric. Minimum latitude for query region, in decimal degrees. South negative. + } + \item{maxLat}{ + numeric. Maximum latitude for query region, in decimal degrees. South negative. + } + \item{minLong}{ + numeric. Minimum longitude for query region, in decimal degrees. West negative. + } + \item{maxLong}{ + numeric. Maximum longitude for query region, in decimal degrees. West negative. + } + \item{minElev}{ + numeric. Minimum elevation for query. Meters. + } + \item{maxElev}{ + numeric. Maximum elevation for query. Meters. + } + \item{minDate}{ + character. Minimum date for query. Format: "YYYY-MM-DD" + } + \item{maxDate}{ + character. Maximum date for query. Format: "YYYY-MM-DD" + } + \item{countries}{ + character. Vector of one or more two-letter country codes for query. + } + \item{states}{ + character. Vector of one or more two-letter state or province codes for query. + } + \item{types}{ + character. Vector of one or more sample types for query. See vocabulary in the \href{http://wateriso.utah.edu/waterisotopes/pages/spatial_db/contribute.html}{wiDB tempate}. + } + \item{projects}{ + character. Vector of one or more project codes for query. + } + \item{fields}{ + character. Vector of one or more data fields to return from database. If omitted, returns all default fields. See \href{https://github.com/SPATIAL-Lab/wiDButil/blob/master/Protocol.md}{here} for details. + } + \item{tmpdir}{ + character. Directory path to use for unpacking data object. + } + \item{clean}{ + logical. Remove working files after data object is unpacked? + } +} + +\value{ +named list. See \href{https://github.com/SPATIAL-Lab/wiDButil/blob/master/Protocol.md}{here} for details. +\item{data}{dataframe. Data records for isotope samples returned by query.} +\item{projects}{dataframe. Provenance information associated with samples returned by query.} +Note that some data are embargoed or have been shared under a license that prohibits redistribution. In dataframe \emph{data} values of 9999 indicate that a measurement is available but can't be obtained directly from the wiDB. Project information in dataframe \emph{projects} can be used to contact or visit the primary data source to learn about access to these data. Values of -9999 in \emph{data} indicate no measurement. +} + +\details{ +One or more arguments must be provided. +} + +\examples{ +#Download data for US precipitation in the 1990s +vals = wiDB_data(minDate = "1990-01-01", maxDate = "2000-01-01", + countries = "US", types = "Precipitation") + +#Download data for US Rivers and streams, requesting a subset of data fields +vals = wiDB_data(minDate = "1980-01-01", maxDate = "2000-01-01", + countries = "US", types = "River_or_stream", + fields = "Site_Name,Latitude,Longitude,d2H") +} diff --git a/man/wiDB_sites.Rd b/man/wiDB_sites.Rd new file mode 100644 index 0000000..2b60ac9 --- /dev/null +++ b/man/wiDB_sites.Rd @@ -0,0 +1,69 @@ +\name{wiDB_sites} + +\alias{wiDB_sites} + +\title{ +Obtain information on wiDB sites using a query +} + +\description{ +Obtain information on wiDB sites using a query +} + +\usage{ +wiDB_sites(minLat = NULL, maxLat = NULL, minLong = NULL, maxLong = NULL, + minElev = NULL, maxElev = NULL, minDate = NULL, maxDate = NULL, + countries = NULL, states = NULL, types = NULL, projects = NULL) +} + +\arguments{ + \item{minLat}{ + numeric. Minimum latitude for query region, in decimal degrees. South negative. + } + \item{maxLat}{ + numeric. Maximum latitude for query region, in decimal degrees. South negative. + } + \item{minLong}{ + numeric. Minimum longitude for query region, in decimal degrees. West negative. + } + \item{maxLong}{ + numeric. Maximum longitude for query region, in decimal degrees. West negative. + } + \item{minElev}{ + numeric. Minimum elevation for query. Meters. + } + \item{maxElev}{ + numeric. Maximum elevation for query. Meters. + } + \item{minDate}{ + character. Minimum date for query. Format: "YYYY-MM-DD" + } + \item{maxDate}{ + character. Maximum date for query. Format: "YYYY-MM-DD" + } + \item{countries}{ + character. Vector of one or more two-letter country codes for query. + } + \item{states}{ + character. Vector of one or more two-letter state or province codes for query. + } + \item{types}{ + character. Vector of one or more sample types for query. See vocabulary in the \href{http://wateriso.utah.edu/waterisotopes/pages/spatial_db/contribute.html}{wiDB tempate}. + } + \item{projects}{ + character. Vector of one or more project codes for query. + } +} + +\value{ +dataframe. Contains location and summary information for all wiDB sites returned by query. See \href{https://github.com/SPATIAL-Lab/wiDButil/blob/master/Protocol.md}{here} for details. +} + +\details{ +One or more arguments must be provided. +} + +\examples{ +#Find all sites with tap water data since September, 2019 +sites = wiDB_sites(minDate = "2019-09-01", types = "Tap") +} diff --git a/man/wiDB_values.Rd b/man/wiDB_values.Rd new file mode 100644 index 0000000..82bb222 --- /dev/null +++ b/man/wiDB_values.Rd @@ -0,0 +1,30 @@ +\name{wiDB_values} + +\alias{wiDB_values} + +\title{ +Obtain value lists for categorical fields in wiDB +} + +\description{ +Obtain value lists for categorical fields in wiDB +} + +\usage{ +wiDB_values(fields) +} + +\arguments{ + \item{fields}{ + character. One or more field names for which to obtain value lists. Limited to: "countries", "states", "types", and "projects". + } +} + +\value{ +named list. Each element is a vector or dataframe containing values for the named field. +} + +\examples{ +#List all projects in the wiDB +wiDB_values("projects") +} diff --git a/vignettes/isoWater.Rmd b/vignettes/isoWater.Rmd index 47b4efc..73fd78a 100644 --- a/vignettes/isoWater.Rmd +++ b/vignettes/isoWater.Rmd @@ -1,6 +1,10 @@ --- title: "isoWater" -output: html_document +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{isoWater} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} --- ```{r setup, include=FALSE} @@ -9,7 +13,7 @@ knitr::opts_chunk$set(echo = TRUE) ## Setup -isoWater uses Bayesian estimation to infer information about the origin of a water sample based on its stable isotope ratios. The package can be installed from the GitHub repository: +isoWater includes two sets of tools. The first supports queries to the Waterisotopes Database ([wiDB](https://waterisotopesDB.org)) to identify and download data. The second uses Bayesian estimation to infer information about the origin of a water sample based on its stable isotope ratios. The package can be installed from the GitHub repository: ```{r install, eval=FALSE} library(devtools) @@ -19,15 +23,54 @@ install_github("SPATIAL-Lab/isoWater@*release") library(isoWater) ``` +## wiDB Queries + +Three functions are available to streamline queries to the wiDB. `wiDB_values` is designed to expose the list of terms used in the database's categorical metadata fields, helping the user design other queries: + +```{r values} +wiDB_values(c("types", "countries")) +``` + +`wiDB_sites` queries the database and returns a data.frame containing the latitidue and longitude coordinates for all sites matching the provided query criteria. This supports quick screening of data avaiability, for example: + +```{r sites, fig.width=5, fig.asp=0.6} +ls = wiDB_sites(countries = "US", types = "Lake") +ps = wiDB_sites(countries = "US", types = "Precipitation") +par(mar = c(4, 4, 1, 1)) +plot(ls[, 2:1], xlim = c(-125, -68), ylim = c(25, 50)) +points(ps[, 2:1], col = "red") +``` + +Now we can refine our query and request isotope data using the function `wiDB_data`. We will focus in on the area around Ames, Iowa, where the sites query showed both lake and precipitation data. + +```{r data} +ld = wiDB_data(minLat = 41, maxLat = 42, minLong = -94, maxLong = -93, types = "Lake") +pd = wiDB_data(minLat = 41, maxLat = 42, minLong = -94, maxLong = -93, types = "Precipitation") +ld$data +pd$projects +``` + +Notice that the object returned by the data function is a named list that includes the requested data and a summary of the projects (data sources) from which those data are derived. See the documentation for this function for details, and always remember to cite any data you use! + +Let's look at the data a little more closely: + +```{r plot, fig.width=5, fig.asp=0.8} +par(mar = c(5, 5, 1, 1)) +plot(pd$data$d18O, pd$data$d2H, xlab = expression(delta^{18}*"O"), + ylab = expression(delta^{2}*"H")) +abline(10, 8) +points(ld$data$d18O, ld$data$d2H, col = "red") +``` + +It appears that the precipitation values vary widely and cluster around the Global Meteoric Water Line, and the lake values are relatively high and somewhat below the GMWL, suggesting that they may be somewhat evaporatively enriched. + ## Meteoric Water Line source -Based one one or more isotopic measurements of a water sample and parameters describing a local meteoric water line (MWL), estimate the isotope ratios of the sample's source prior to evaporation. If you have data you'd like to use to constrain the MWL, the `mwl` function will produce a data object describing the line: +What if we wanted to 'correct' for the isotopic effects of evaporation, giving sample values that could be compared directly with one or more unevaporated sources. In other words, based on isotopic data for a water sample and parameters describing a local meteoric water line (MWL), estimate the isotope ratios of the sample's source prior to evaporation. If you have data you'd like to use to constrain the MWL, like we do here, the `mwl` function will produce a data object describing the line: ```{r mwl, fig.width=5, fig.asp=0.8} -#hypothetical MWL samples -O = runif(10, -15, -2) -H = O * 8 + 10 + rnorm(10, 0, 6) -HO = data.frame(H, O) +#extract the precipitation H and O isotope values +HO = data.frame(pd$data$d2H, pd$data$d18O) MWL = mwl(HO) ``` @@ -36,24 +79,27 @@ If you don't have local data for a MWL, the function `mwlSource` (see below) has Now bundle the data for your water sample as an "iso" data object using the function `iso`. Note that isoWater uses a bivariate normal distribution to describe all water sample values; thus the parameters in an iso object include the H and O isotope values, their 1 standard deviation uncertainties, and the error covariance for H and O. For measured sample values the covariance might be expected to be zero (the default value in `iso`), but for many values (e.g., potential sample or source values estimated from multiple measurements) the H and O error covariance will be substantial. ```{r iso} -#hypothetical sample value -obs = iso(-60, -6, 0.5, 0.1) +#we will analyze one of our lake water samples, and assume realistic +#values of analytical uncertainty and no error covariance +obs = iso(ld$data$d2H[1], ld$data$d18O[1], 0.5, 0.1) ``` -Lastly, we need to provide a prior distribution for the evaporation line slope, which is represented as a univariate normal distribution with mean and standard deviation parameters: +Lastly, we need to provide a prior distribution for the evaporation line slope, which is represented as a univariate normal distribution with mean and standard deviation parameters. These can be tricky to estimate, but there are number of papers out there that provide guidance, as well as some tools and data products that might help. ```{r ELslope} -slope = c(4.2, 0.3) +#assumed value that is reasonable for lake water evaporation in this +#location/climate +slope = c(5.2, 0.3) ``` With our inputs prepared, we can run the analysis using `mwlSource`. ```{r mwlSource1} -mwls1 = mwlSource(obs, MWL, slope, ngens = 1e4) +mwls1 = mwlSource(obs, MWL, slope, ngens = 5e3) mwls1$summary ``` -The summary object returned by the function provides two statistics that can be used to assess convergence (Rhat and effective sample size), in addition to summary statistics for the posterior distribution. The function's output also includes an object containing all posterior samples (isoWater functions thin the posterior to 2500 samples per chain by default), which we can plot as a trace plot to visually assess burn-in and convergence: +The summary object returned by the function provides two statistics that can be used to assess convergence (Rhat and effective sample size), in addition to summary statistics for the posterior distribution. For the relatively small chain lengths run in these examples we don't expect strong convergence. The function's output also includes an object containing all posterior samples (isoWater functions thin the posterior to 2500 samples per chain by default), which we can plot as a trace plot to visually assess burn-in and convergence: ```{r traceplot, fig.width=5, fig.asp=0.8} plot(mwls1$results$source_d2H[1:2500], type = "l", ylim = range(mwls1$results$source_d2H)) @@ -61,11 +107,14 @@ lines(mwls1$results$source_d2H[2501:5000], col=2) lines(mwls1$results$source_d2H[5001:7500], col=3) ``` -The MWL model uses one of two line statistics to constrain the source water prior distribution. The default, shown above, uses the MWL confidence interval as the prior, appropriate for samples where the source is best represented as a mixture of many samples of the type used to define the MWL. Alternatively, the argument `stype = 2` can be provided to use the prediction interval as the prior constraint, appropriate if the source is best represetned as a single sample of the type used to define the MWL: +The MWL model uses one of two line statistics to constrain the source water prior distribution. The default, shown above, uses the MWL confidence interval as the prior, appropriate for samples where the source is best represented as a mixture of many samples of the type used to define the MWL. Alternatively, the argument `stype = 2` can be provided to use the prediction interval as the prior constraint, appropriate if the source is best represetned as a single sample of the type used to define the MWL. You can see that this makes a substantial difference in the resulting posterior distribution, so make sure you're using the right statistics for your problem: ```{r mwlSource2, fig.width=5, fig.asp=0.8} -mwls2 = mwlSource(obs, MWL, slope, stype = 2, ngens = 1e4) -plot(O, H, xlim = c(-20, 0), ylim = c(-140, 0)) +mwls2 = mwlSource(obs, MWL, slope, stype = 2, ngens = 5e3) +par(mar = c(5, 5, 1, 1)) +plot(HO[, 2:1], xlim = c(-20, 0), ylim = c(-140, 0), + xlab = expression(delta^{18}*"O"), + ylab = expression(delta^{2}*"H")) abline(MWL[2], MWL[1]) points(obs[2:1], col = "red") points(mwls2$results$source_d18O, mwls2$results$source_d2H, col = "blue") @@ -74,26 +123,47 @@ points(mwls1$results$source_d18O, mwls1$results$source_d2H, col = "green") ## Mixture source -Based on isotope values for two or more potential water sources, what is the mixture of sources that contributed to a water sample? The `mixSource` function works similarly to `mwlSource`, but requires different arguments to define the mixing model source priors. Values for all sources are provided in a single iso object: +What if instead of just back-correcting our sample values for evaporation, we wanted to explicitly assess the relative contribution of different water sources to the sample? In other words, based on isotope values for two or more potential water sources, what is the mixture of sources that contributed to the sample? The `mixSource` function works similarly to `mwlSource`, but requires different arguments to define the mixing model source priors. Values for all sources are provided in a single iso object: ```{r sources} -#hypothetical sources -sourcesO = runif(3, -15, -3) -sources = iso(sourcesO * 8 + 10 + rnorm(3, 0, 2), sourcesO, 0.5, 0.2, 0.05) +#prep our data - we'll average the precipitation values by quarter +q = quarters(as.POSIXct(pd$data$Collection_Date)) +qu = sort(unique(q)) +ql = length(qu) + +pd_q = data.frame("H" = numeric(ql), "O" = numeric(ql), + "Hsd" = numeric(ql), "Osd" = numeric(ql), + "HOc" = numeric(ql)) +for(i in seq_along(qu)){ + pd_q$H[i] = mean(pd$data$d2H[q == qu[i]], na.rm = TRUE) + pd_q$O[i] = mean(pd$data$d18O[q == qu[i]], na.rm = TRUE) + pd_q$Hsd[i] = sd(pd$data$d2H[q == qu[i]], na.rm = TRUE) + pd_q$Osd[i] = sd(pd$data$d18O[q == qu[i]], na.rm = TRUE) + pd_q$HOc[i] = cov(pd$data$d18O[q == qu[i]], pd$data$d2H[q == qu[i]], + use = "pairwise.complete.obs") +} +pd_q + +#make the iso object, providing the stats calculated above +sources = iso(pd_q$H, pd_q$O, pd_q$Hsd, pd_q$Osd, pd_q$HOc) ``` -Now we can run the analysis; let's use the parallel option (available in `mwlSource` also) to speed this up: +Now we can run the analysis; let's use the parallel option (also available in `mwlSource`) to speed this up and run slightly longer chains: ```{r mixSource1} -mixs1 = mixSource(obs, sources, slope, ngens = 1e4, ncores = 3) +mixs1 = mixSource(obs, sources, slope, ngens = 2e4, ncores = 2) mixs1$summary ``` -The default options use a prior that weights each source evenly, but if we have other information we can use the arguments `prior` and `shp` to prescribe different distributions. Values in `prior` are relative, so for example if we think source 2 is likely to have contributed twice as much as the others...this may or may not have a strong impact on the results, depending on the strength of the constraints provided by the isotopic data: +Again, we're a long way from having a strongly converged simulation in which the posterior is likley to be a representative sample of the population parameters, but the initial 'hint' is that the lake sample values are consistent with a sub-equal source contribution from precipitation in each calendar quarter. Notice, too, that the evaporation-corrected sample values ('mixture_d18O' and 'mixture_d2H') are broadly similar to the eqivalent values we obtained with the MWL model, above. + +The default options use a prior that weights each source evenly, but if we have other information we can use the arguments `prior` and `shp` to prescribe different distributions. Values in `prior` are relative, so for example if we think Q1 is likely to have contributed twice as much as the other seasons...this may or may not have a strong impact on the results, depending on the strength of the constraints provided by the isotopic data: -```{r mixSource2, fig.width=5, fig.asp=0.8} -mixs2 = mixSource(obs, sources, slope, prior = c(1, 2, 1), ngens = 1e4, ncores = 3) -boxplot(mixs1$results[, 3:5], main = "Default") -boxplot(mixs2$results[, 3:5], main = "Informed prior", add = TRUE, col = "red") +```{r mixSource2, fig.width=6, fig.asp=0.7} +mixs2 = mixSource(obs, sources, slope, prior = c(2, 1, 1, 1), ngens = 2e4, ncores = 2) +par(mar = c(4, 1, 1, 1)) +boxplot(mixs1$results[, 3:6], outline = FALSE) +boxplot(mixs2$results[, 3:6], add = TRUE, col = rgb(1, 0, 0, 0.5), + outline = FALSE) ```