## Looking at hydrographs and event delineations from [Kincaid et al., 2020](https://agupubs.onlinelibrary.wiley.com/doi/10.1029/2020WR027361). 

- ### Data publicly available here on HydroShare: https://www.hydroshare.org/resource/85fa32a11fbb49779033934a135f54ef/

- ### This larger dataset includes the 2014-2015 discharge and nitrate data from Vaughan, M. (2017). Vermont NEWRnet stations: 2014-2015 high-frequency DOC, nitrate, and discharge data, HydroShare, http://www.hydroshare.org/resource/faac1672244c407e9c9c8644c8211fd6.

- ### I downloaded on 05.02.24 and put it here in this directory /home/millieginty/OneDrive/git-repos/cQ_analysis/millar2021_R_partition_hysteresis

- ### The raw data file has discharge (q m3s), NO3, and SRP with timestamp and event start/end times for each watershed..

## I use the Kincaid 2020 events as delineated using HydRun with manual interventions.
 
 - ### Data were copied to this repo from the BREE OneDrive directory. One csv for each watershed, 2014-2018.
 - #### There was a storm (my storm #22) in the Kincaid Potash file that had an incorrect end year (7/25/2015 21:45 but was 7/25/2016 21:45). I changed the year.

### TO DO

- [ ] use multipeak tag from Kincaid event input

In [3]:
#################
# LOAD PACKAGES #
#################

library(tidyverse)
library(viridis)
library(dplyr)
library(lubridate)
library(glue)

###################
# SET DIRECTORIES #
###################

# Define the input and output directories

# For Kincaid data, input and output in separate directory
input_dir <- "/home/millieginty/OneDrive/git-repos/cQ_analysis/baseflow-rules-determinaton"
output_dir <- "/home/millieginty/OneDrive/git-repos/cQ_analysis/baseflow-rules-determinaton/output"

#################
# SET SITE INFO #
#################

# Set site name
Site = "Potash"

# Set year if doing yearly
#Year = 2015

# Set constituent
Analyte = "NO3"

# Set catchment area based on Site
if (Site == "Hungerford") {
  Area <- 48.1
} else if (Site == "Potash") {
  Area <- 18.4
} else if (Site == "Wade") {
  Area <- 16.7
} else {
  Area <- NA  # or any default value if Site is not one of the specified values
}

# Set stormflow thresholds 
# In this case, based on Kincaid values above in table. Can use a range in other cases (see cell below).
if (Site == "Hungerford") {
  candidateSfThresh <- 0.1
} else if (Site == "Potash") {
  candidateSfThresh <- 0.12
} else if (Site == "Wade") {
  candidateSfThresh <- 0.05
} else {
  candidateSfThresh <- NA  # or any default value if Site is not one of the specified values
}

# Print the Area and SFT to check
print(Area)
print(candidateSfThresh)

############################
# READ IN, TIDY, JOIN DATA #
############################

# Read in raw Hydroshare data csv from Kincaid et al 2020 found at https://www.hydroshare.org/resource/85fa32a11fbb49779033934a135f54ef/
# Downloaded on 05.02.24
allInputData15Min <- read.csv(file.path(input_dir, "hydroshare_rawData.csv"))

# Rename the 'timestamp' column to 'datetime' to conform with Millar script
names(allInputData15Min)[names(allInputData15Min) == "timestamp"] <- "datetime"

# Construct the file name for event delineation based on Site definition
events_file <- paste("Events", Site, "2014to2018.csv", sep = "_")

# Read in the event delineation csv file
customEventDel <- read.csv(file.path(input_dir, "Event_delineations_2014-2018", events_file)) %>%
  # Add a storm ID
  mutate(storm_id = glue("storm_{row_number()}")) %>%
  # Select and rename columns
  select(storm_id, rainfall.start, start = HydRun.start, end = HydRun.end) %>%
  # Convert start and end datetimes to POSIXct
  mutate(start = as.POSIXct(start, format = "%m/%d/%Y %H:%M", tz = "EST"),
         end = as.POSIXct(end, format = "%m/%d/%Y %H:%M", tz = "EST"))

# Filter the data for just the site and for the year/time range you want
# Memory issues if you try to process all the Kincaid 2014-2018 data at once, sometimes
# Remove rows with missing values
Site_input <- allInputData15Min %>%
  filter(site == Site) %>%
  drop_na(q_cms, NO3_mgNL) %>%
  select(datetime, q_cms, conc = NO3_mgNL) %>%
  mutate(datetime = as.POSIXct(datetime, format = "%Y-%m-%d %H:%M:%S", tz = "EST"))

# Create a list of data frames for each storm event
storm_data_list <- customEventDel %>%
  rowwise() %>%
  mutate(data = list(Site_input %>% filter(datetime >= start & datetime <= end))) %>%
  ungroup() %>% # Ungroup to prevent grouped data issues
  select(storm_id, data) %>%
  group_split(storm_id) # Split by storm_id to create a list of data frames

# Convert to a named list of dataframes
storm_data_list <- setNames(lapply(storm_data_list, function(x) x$data[[1]]), customEventDel$storm_id)

# Verify that the list consists of dataframes
print(paste("number of storm events:", length(storm_data_list))) # Should be 153 storms for Hungerford in Kincaid dataset

# Check the structure of the first storm dataframe
print(head(storm_data_list[[1]]))
                                   
eventInputs = storm_data_list

# Set time same timestep as the automatic RDF
timestep_min = 15
                                   
#####################
# SET OUTPUT NAMING #
#####################

# Specify constituent in data set name
dataSetName = paste(Site,"_",Analyte,"_","2014-2018", sep="")

# Chose constitution for plot axes labels (NO3, TOC, or turbidity)
constit <- Analyte

Site_input$datetime <- as.POSIXct(Site_input$datetime,format("%Y-%m-%d %H:%M:%S"),tz="EST")

# Rescale the data
Site_input <- Site_input %>% 
  mutate(rescaled_conc = ((conc-min(conc))/(max(conc)-min(conc))*max(q_cms)))

[1] 18.4
[1] 0.12
[1] "number of storm events: 197"
             datetime q_cms  conc
1 2014-07-27 09:00:00 0.039 0.520
2 2014-07-27 09:15:00 0.042 0.527
3 2014-07-27 09:30:00 0.475 0.534
4 2014-07-27 09:45:00 0.279 0.370
5 2014-07-27 10:00:00 0.214 0.350
6 2014-07-27 10:15:00 0.176 0.327


In [7]:
#######################
# PLOT THE HYDROGRAPH #
#######################

hydrograph_plot <- ggplot() +
  geom_line(data = allInputData15Min, aes(x = datetime, y = q_cms, color = "Hydrograph")) +
  theme_minimal() +
  labs(title = paste(Site, "Brook event hydrographs", sep = " "),
       x = "Datetime",
       y = "Discharge (cms)",
       color = "Data Type") +
  scale_x_datetime(date_labels = "%Y-%m-%d %H:%M", date_breaks = "1 year") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(strip.text = element_text(size = 8)) +
  scale_color_manual(values = c("Hydrograph" = "blue")) 

# Print the modified plot
print(hydrograph_plot)


ERROR: Error: Invalid input: time_trans works with objects of class POSIXct only
