CUAHSI · lindsayplatt · Feb 21, 2025 · Feb 20, 2025 · Feb 21, 2025 · ehinman
diff --git a/01_fetch.R b/01_fetch.R
@@ -1,29 +1,39 @@
 
-# This file includes the target recipes for targets that download data
+# This file includes the target recipes for targets that load local data
 
 # Load all `src` files for this phase
-source('01_fetch/src/download_wqp_physchem_data.R')
+source('01_fetch/src/load_wqp_file.R')
 
 p1 <- list(
 
-  # Fetch data from the portal using a function defined in the pipeline
+  # Declare an input file as a target
   tar_target(
-    p1_dataset,
-    download_wqp_physchem_data(state = state,
-                               county = county,
-                               start_date = start_date,
-                               end_date = end_date)
-    ),
+    p1_dataset_csv, 
+    "01_fetch/in/wqp_brown_county_wi_data.csv", 
+    # Adding this argument tracks the file *contents* not the name
+    format = "file" 
+  ),
 
-  # Save raw data as a csv to be able to share outside of the pipeline.
-  # Note that target name is the same as the data *except* for the 
+  # Load the raw data from the csv file.
+  # Note that target name is the same as the file *except* for the 
   # suffix `_csv`, denoting the file type.
-  tar_target(p1_dataset_csv, {
-    out_file <- "01_fetch/out/p1_data_file.csv"
-    write_csv(p1_dataset, out_file)
-    return(out_file) # File targets *must* return the filepath at the end
-  }, 
-  format = "file"
+  tar_target(
+    p1_dataset,
+    load_wqp_file(p1_dataset_csv)
+  ),
+
+  # Now do the same but for the site information CSV file
+  tar_target(
+    p1_metadata_csv, 
+    "01_fetch/in/wqp_brown_county_wi_siteInfo.csv", 
+    # Adding this argument tracks the file *contents* not the name
+    format = "file" 
+  ),
+
+  # Load the site info data from the csv file.
+  tar_target(
+    p1_metadata,
+    read_csv(p1_metadata_csv, col_types = cols())
   )
 
 )
diff --git a/01_fetch/in/wqp_brown_county_wi_data.csv b/01_fetch/in/wqp_brown_county_wi_data.csv
diff --git a/01_fetch/in/wqp_brown_county_wi_siteInfo.csv b/01_fetch/in/wqp_brown_county_wi_siteInfo.csv
@@ -0,0 +1,42 @@
+MonitoringLocationIdentifier,MonitoringLocationName,LatitudeMeasure,LongitudeMeasure
+USGS-04072076,"SILVER CREEK AT FLORIST DRIVE AT ONEIDA, WI",44.49,-88.1811111
+USGS-04072150,"DUCK CREEK NEAR HOWARD, WI",44.53338889,-88.1296944
+USGS-04084911,"PLUM CREEK NEAR WRIGHTSTOWN, WI",44.3055449,-88.1712146
+USGS-04085068,"ASHWAUBENON CREEK NEAR LITTLE RAPIDS, WI",44.4141571,-88.1270474
+USGS-040850684,"ASHWAUBENON CREEK AT GRANT ST NR WEST DEPERE, WI",44.445,-88.09888889
+USGS-04085074,"DUTCHMAN CREEK AT CYRUS ST NR ASHWAUBENON, WI",44.4663792,-88.1431588
+USGS-04085078,"DUTCHMAN CREEK AT HANSEN RD AT ASHWAUBENON, WI",44.4835,-88.087
+USGS-04085108,"EAST RIVER @ CNTY TRUNK HIGHWAY ZZ NR GREENLEAF,WI",44.37194444,-88.0922222
+USGS-04085138,"EAST RIVER AT GREEN BAY, WI",44.5091593,-87.9917702
+USGS-040851385,"FOX RIVER AT OIL TANK DEPOT AT GREEN BAY, WI",44.5286111,-88.01
+USGS-441520088045001,"GLRI EAST RIVER WATERWAY 3 NEAR GREENLEAF, WI",44.25569444,-88.0805278
+USGS-441520088045002,"GLRI EAST RIVER TILE 1 NEAR GREENLEAF, WI",44.25563889,-88.0805833
+USGS-441546088082001,"GLRI EAST RIVER WATERWAY NUMBER 2 NR GREENLEAF, WI",44.2628333,-88.13875
+USGS-442114088085701,"GLRI WATERWAY NUMBER 5 NR WRIGHTSTOWN, WI",44.3539722,-88.1491944
+USGS-442119088085501,"GLRI WATERWAY NUMBER 4 NR WRIGHTSTOWN, WI",44.35530556,-88.1486667
+ONEIDA_WQX-DCCL,Dutchman Creek at Cyrus Lane,44.4664,-88.1434
+ONEIDA_WQX-DCPP,Duck Creek at Pamperin Park,44.5448,-88.103
+ONEIDA_WQX-LBN,Lancaster Brook At Navajo,44.5551,-88.1284
+ONEIDA_WQX-OL,Osnuhsa Lake,44.5273,-88.1329
+ONEIDA_WQX-QL,Quarry Lake,44.5178,-88.1661
+ONEIDA_WQX-SLV,Silver Creek at Hwy 54,44.5089,-88.1575
+ONEIDA_WQX-TCBW,Trout Creek at Brookwood Drive,44.5445,-88.154
+ONEIDA_WQX-TCFF,Trout Cr. at HWY FF,44.5356,-88.1298
+ONEIDA_WQX-TCU,Trout Creek at CH U,44.5511,-88.1905
+ONEIDA_WQX-THCC,Thornberry Cr. at Crooked Creek Dr.,44.5564,-88.1376
+WIDNR_WQX-053210,Fox River - Above De Pere Dam,44.444934,-88.062507
+WIDNR_WQX-053222,Fox River at Green Bay Yacht Club,44.536792,-88.004698
+WIDNR_WQX-053508,EAST RIVER - MALLARD ROAD,44.335457,-88.11203
+WIDNR_WQX-053675,EAST RIVER - HWY G,44.435503,-88.024565
+WIDNR_WQX-053683,Baird Creek at Preble WI,44.507416,-87.967544
+WIDNR_WQX-053684,Apple Creek - Rosin Rd,44.348511,-88.161183
+WIDNR_WQX-10009445,BOWER CREEK (1) 50M UPSTREAM OF HWY GV,44.45179,-87.99543
+WIDNR_WQX-10010769,Wequiok Creek 1-Nicolet Rd/Cth A,44.57843,-87.89308
+WIDNR_WQX-10015851,Dutchmans Creek - Oneida Street,44.478602,-88.072319
+WIDNR_WQX-10016494,TRIBUTARY TO PLUM CREEK - DOWNSTREAM OF COUNTY LINE ROAD,44.301832,-88.190164
+WIDNR_WQX-10016502,ASHWAUBENON CREEK - GRANT STREET,44.445086,-88.098749
+WIDNR_WQX-10034510,Unnamed Trib (410000) at Lakeview Dr. (175m US),44.565834,-88.06471
+WIDNR_WQX-10038644,Duck Creek-Pamperin Park,44.544773,-88.10285
+WIDNR_WQX-10043279,East River at Harold Lewis Trail off Main Street,44.51633,-88.005878
+WIDNR_WQX-10046999,Plum Creek at VandeHey Farm crossing,44.315404,-88.171541
+WIDNR_WQX-10057468,FA1.3_FY23_GB_01,44.568609,-87.950251
diff --git a/01_fetch/src/download_wqp_physchem_data.R b/01_fetch/src/download_wqp_physchem_data.R
diff --git a/01_fetch/src/load_wqp_file.R b/01_fetch/src/load_wqp_file.R
@@ -0,0 +1,21 @@
+
+#' @title Small wrapper function to read in CSV data from WQP
+#' @description This is a tiny function that wraps the `readr::read_csv()` function
+#' in order to change some of the default arguments to settings that are
+#' useful when reading in WQP data, e.g. `col_types` and `guess_max`.
+#' 
+#' @param in_file character string of the filepath to load
+#' 
+#' @returns a tibble of WQP data
+#' 
+load_wqp_file <- function(in_file) {
+  read_csv(in_file, 
+           # Suppress console messages about what it 
+           # chose for default column types
+           col_types = cols(),
+           # Increase number of rows it looks at to
+           # choose column types by default. This
+           # makes it more likely to find an actual 
+           # value and not treat a column as logical.
+           guess_max = 5000)
+}
diff --git a/02_prep.R b/02_prep.R
@@ -10,7 +10,9 @@ p2 <- list(
   # columns using a custom fxn defined in `02_prep/src/process_wqp_data.R`
   tar_target(
     p2_refined_dataset,
-    refine_wqp_data(p1_dataset, characteristic, fraction)
+    # Could also use "Total" as the fraction, but note that Nitrate doesn't 
+    # have any values for that fraction in this dataset
+    refine_wqp_data(p1_dataset, characteristic, "Dissolved")
   ),
 
   # Get list of site IDs that appear in the refined data
@@ -25,9 +27,9 @@ p2 <- list(
     summarize_wqp_data_by_site(p2_refined_dataset)
   ),
 
-  # Extract the site metadata from the refined dataset
+  # Filter the site metadata from the refined dataset
   tar_target(
     p2_site_metadata,
-    extract_wqp_site_info(p1_dataset, p2_sites)
+    filter_wqp_site_info(p1_metadata, p2_sites)
   )
 )
diff --git a/02_prep/src/process_wqp_data.R b/02_prep/src/process_wqp_data.R
@@ -77,23 +77,18 @@ summarize_wqp_data_by_year <- function(wqp_data_refined) {
     )
 }
 
-#' @title Isolate just the site metadata from the full dataset
+#' @title Filter metadata to just the sites that stay in the refined data
 #' @description WQP downloaded data via `dataRetrieval` comes with a data.frame
 #' attribute called `siteInfo`, containing metadata for the sites present in the 
 #' downloaded dataset. It includes things like site type, site full name, and 
 #' location. Any filtering done to the data after it was downloaded, such as 
 #' in `refine_wqp_data()`, will *not* filter the siteInfo attribute unless 
 #' specifically added as custom code.
 #' 
-#' @param wqp_data_raw a data.frame with downloaded data from WQP, such as from
-#' the pipeline function `download_wqp_physchem_data()`.
+#' @param wqp_metadata a data.frame with downloaded data from WQP with site info
 #' @param sites a vector of the sites to retain
 #' 
-extract_wqp_site_info <- function(wqp_data_raw, sites) {
-  attr(wqp_data_raw, "siteInfo") |>
-    select(MonitoringLocationIdentifier, MonitoringLocationName,
-           LatitudeMeasure, LongitudeMeasure) %>% 
-    # Filter to just the sites that stay in the refined data
-    # (the cleaning steps do not impact `siteInfo` attribute)
+filter_wqp_site_info <- function(wqp_metadata, sites) {
+  wqp_metadata %>% 
     filter(MonitoringLocationIdentifier %in% sites)
 }
diff --git a/03_summarize.R b/03_summarize.R
@@ -7,6 +7,9 @@ source('03_summarize/src/map_function.R')
 
 p3 <- list(
 
+  # File targets *must* return the filepath at the end, so the functions called
+  # by file targets should have `return(out_file)` as the final step.
+
   # next can add a timeseries plot for the data by year and then maybe an accompanying map?
   # would love to create a very simple shiny app for this.
   tar_target(

diff --git a/_targets.R b/_targets.R
@@ -5,7 +5,6 @@ tar_option_set(
   packages = c(
     'dplyr',
     'ggplot2',
-    'dataRetrieval', # may comment out for actual workshop
     'readr',
     'lubridate',
     'leaflet',
@@ -14,15 +13,8 @@ tar_option_set(
   )
 )
 
-# Set 01_fetch pipeline configurations
-start_date <- "2020-10-01" # Date samples begin
-end_date <- "2023-09-30" # Date samples end
-state <- 'Wisconsin'
-county <- 'Brown'
-
 # Set 02_prep pipeline configurations
-characteristic <- "Chloride" # Phosphorus, Nitrate
-fraction <- "Dissolved"
+characteristic <- "Phosphorus" # Other choices: Chloride, Nitrate
 
 source('01_fetch.R')
 source('02_prep.R')