## In this R notebook I use Millar et al's workflow for automatic event delineation/hysteresis index calcs using the s::can and discharge data from [Kincaid et al., 2020](https://agupubs.onlinelibrary.wiley.com/doi/10.1029/2020WR027361). 

## Here I adjust the Millar workflow to accept manual event delineations input and calculate event water yields.

- ### Data publicly available here on HydroShare: https://www.hydroshare.org/resource/85fa32a11fbb49779033934a135f54ef/

- ### This larger dataset includes the 2014-2015 discharge and nitrate data from Vaughan, M. (2017). Vermont NEWRnet stations: 2014-2015 high-frequency DOC, nitrate, and discharge data, HydroShare, http://www.hydroshare.org/resource/faac1672244c407e9c9c8644c8211fd6.

- ### Note that there is a Hungerford data gap in 2016 to adjust this code for

- ### I downloaded on 05.02.24 and put it here in this directory /home/millieginty/OneDrive/git-repos/cQ_analysis/millar2021_R_partition_hysteresis

- ### The raw data file has discharge (q m3s), NO3, and SRP with timestamp and event start/end times for each watershed. The Millar code takes just timestamp, q, and C input csvs so I separate this raw data file into just those parameters for each site over the entire time period (>400 events from 2014 to 2018, no winter events).

## I use the Kincaid 2020 events as delineated using HydRun with manual interventions.
 
 - ### Data were copied to this repo from the BREE OneDrive directory. One csv for each watershed, 2014-2018.

In [66]:
#################
# LOAD PACKAGES #
#################

library(tidyverse)
library(viridis)
library(dplyr)
library(lubridate)
library(glue)

###################
# SET DIRECTORIES #
###################

# Define the input and output directories

# For Kincaid data, input and output in separate directory
input_dir <- "/home/millieginty/OneDrive/git-repos/cQ_analysis/millar2021_R_separation_hysteresis/kincaid2020_hydroshare/"
output_dir <- "/home/millieginty/OneDrive/git-repos/cQ_analysis/millar2021_R_separation_hysteresis/kincaid2020_hydroshare/output/"

# functions script in main millar directory
millar_input_dir <- "/home/millieginty/OneDrive/git-repos/cQ_analysis/millar2021_R_separation_hysteresis/"

#####################
# READ IN FUNCTIONS #
#####################

# 2024-07-08 MED note: I made a new version of the Millar functions script with my modifications
source(file.path(input_dir,"cQ_functions_MED_custom_delineations.R"))

#################
# SET SITE INFO #
#################

# Set site name
Site = "Hungerford"

# Set year if doing yearly
#Year = 2015

# Set constituent
Analyte = "NO3"

# Set catchment area based on Site
if (Site == "Hungerford") {
  Area <- 48.1
} else if (Site == "Potash") {
  Area <- 18.4
} else if (Site == "Wade") {
  Area <- 16.7
} else {
  Area <- NA  # or any default value if Site is not one of the specified values
}

# Set stormflow thresholds 
# In this case, based on Kincaid values above in table. Can use a range in other cases (see cell below).
if (Site == "Hungerford") {
  candidateSfThresh <- 0.1
} else if (Site == "Potash") {
  candidateSfThresh <- 0.12
} else if (Site == "Wade") {
  candidateSfThresh <- 0.05
} else {
  candidateSfThresh <- NA  # or any default value if Site is not one of the specified values
}

# Print the Area and SFT to check
print(Area)
print(candidateSfThresh)

############################
# READ IN, TIDY, JOIN DATA #
############################

# Read in raw Hydroshare data csv from Kincaid et al 2020 found at https://www.hydroshare.org/resource/85fa32a11fbb49779033934a135f54ef/
# Downloaded on 05.02.24
allInputData15Min <- read.csv(file.path(input_dir,"hydroshare_rawData.csv"))

# Rename the 'timestamp' column to 'datetime' to conform with Millar script
names(allInputData15Min)[names(allInputData15Min) == "timestamp"] <- "datetime"

# Construct the file name for event delineation based on Site definition
events_file <- paste("Events", Site, "2014to2018.csv", sep = "_")

# Read in the event delineation csv file
customEventDel <- read.csv(file.path(input_dir, "Event_delineations_2014-2018", events_file)) %>%
  # Add a storm ID
  mutate(storm_id = glue("storm_{row_number()}")) %>%
  # Select and rename columns
  select(storm_id, rainfall.start, start = HydRun.start, end = HydRun.end) %>%
  # Convert start and end datetimes to POSIXct
  mutate(start = as.POSIXct(start, format = "%m/%d/%Y %H:%M", tz = "EST"),
         end = as.POSIXct(end, format = "%m/%d/%Y %H:%M", tz = "EST"))

# Filter the data for just the site and for the year/time range you want
# Memory issues if you try to process all the Kincaid 2014-2018 data at once, sometimes
# Remove rows with missing values
Site_input <- allInputData15Min %>%
  filter(site == Site) %>%
  drop_na(q_cms, NO3_mgNL) %>%
  select(datetime, q_cms, conc = NO3_mgNL) %>%
  mutate(datetime = as.POSIXct(datetime, format = "%Y-%m-%d %H:%M:%S", tz = "EST"))

# Create a list of data frames for each storm event
storm_data_list <- customEventDel %>%
  rowwise() %>%
  mutate(data = list(Site_input %>%
    filter(datetime >= start & datetime <= end))) %>%
  select(storm_id, data) %>%
  group_by(storm_id) %>%
  summarise(data = list(data))

# Convert to a named list of data frames
storm_data_list <- setNames(storm_data_list$data, storm_data_list$storm_id)

# Print the first few elements of the list to verify
print(storm_data_list[1:3])
#####################
# SET OUTPUT NAMING #
#####################

# Specify constituent in data set name
dataSetName = paste(Site,"_",Analyte,"_","2014-2018", sep="")

# Chose constitution for plot axes labels (NO3, TOC, or turbidity)
constit <- Analyte

Site_input$datetime <- as.POSIXct(Site_input$datetime,format("%Y-%m-%d %H:%M:%S"),tz="EST")

# Rescale the data
Site_input <- Site_input %>% 
  mutate(rescaled_conc = ((conc-min(conc))/(max(conc)-min(conc))*max(q_cms)))

[1] 48.1
[1] 0.1
$storm_1
$storm_1[[1]]
               datetime q_cms  conc
1   2014-06-24 23:15:00 0.117 2.362
2   2014-06-24 23:30:00 0.123 2.350
3   2014-06-24 23:45:00 0.135 2.350
4   2014-06-25 00:00:00 0.135 2.351
5   2014-06-25 00:15:00 0.135 2.342
6   2014-06-25 00:30:00 0.135 2.330
7   2014-06-25 00:45:00 0.141 2.316
8   2014-06-25 01:00:00 0.141 2.302
9   2014-06-25 01:15:00 0.141 2.297
10  2014-06-25 01:30:00 0.141 2.293
11  2014-06-25 01:45:00 0.141 2.290
12  2014-06-25 02:00:00 0.147 2.287
13  2014-06-25 02:15:00 0.147 2.281
14  2014-06-25 02:30:00 0.147 2.275
15  2014-06-25 02:45:00 0.154 2.268
16  2014-06-25 03:00:00 0.154 2.261
17  2014-06-25 03:15:00 0.154 2.259
18  2014-06-25 03:30:00 0.161 2.257
19  2014-06-25 03:45:00 0.161 2.253
20  2014-06-25 04:00:00 0.161 2.249
21  2014-06-25 04:15:00 0.161 2.245
22  2014-06-25 04:30:00 0.161 2.242
23  2014-06-25 04:45:00 0.167 2.235
24  2014-06-25 05:00:00 0.167 2.229
25  2014-06-25 05:15:00 0.174 2.225
26  2014-06-25 05:30:00 

## Running Millar water yield calc and HI/FI calcs with custon event start and end datetimes

- ### 2024-07-15 MED note: I saved cQ_functions_MED_custom_delineations from cQ_functions_MED.R
    - #### This version allows for water and constituent event yield calcs
- ### I wanted the `allEventDTs` dataframe created in Millar's Function 4 (`processStormEventsWithConc`) to be supplanted with an analagous dataframe that has the following for columns:





In [10]:
###################
# SET RDF PARAMS #
##################

# Vector containing candidate baseflow separation filter values
candidateFilterPara <- c(0.996,0.98) # Kincaid 2020 used 0.996 for all catchments

# Vector containing candidate stormflow threshold values
#candidateSfThresh <- c(0.098,0.1,0.12) #See cell above for Kincaid comparison use case

# Vector with interpolation intervals used for calculating HI
interp <- seq(0,1,0.01)

###############################
# RUN ANALYSIS TO GET EVENTS #
###############################

batchRun1 <- batchRunBfAndEvSepForCQ(qInputs = Site_input,
                                     bfSepPasses = 3, # orig 3
                                     filterParam = candidateFilterPara,
                                     sfSmoothPasses = 4, # orig 4
                                     sfThresh = candidateSfThresh,
                                     cInputs = Site_input,
                                     timeStep = 15,
                                     minDuration = 4, # Kincaid 2020 uses 4 hrs for Hungerford
                                     maxDuration = 200,
                                     eventInputs = customEventDel) # MED addition

eventsDataAll1 <- getAllStormEvents(batchRun = batchRun1,
                                    timestep_min = 15)

batchRunFlowsLF1 <- batchRunflowCompare(qData = Site_input,
                                         bfSepPasses = 4, # orig 4
                                         filterPara = candidateFilterPara,
                                         sfSmoothPasses = 4) # orig 4

eventsData1 <- stormEventCalcs(batchRun = batchRun1,
                               timestep_min = 15)

eventsData1$filter_para <- as.numeric(eventsData1$filter_para)

# Add water yield column (in mm) using catchment area
eventsData1 <- eventsData1 %>%
  mutate(
    water_yield_mm = tot_q_m3 / (Area * 10^6) * 1000,
  )

# Add constituent yield column (in mm) using catchment area
eventsData1 <- eventsData1 %>%
  mutate(
    constit_yield_mm = (tot_constit_mgN) / (Area * 10^6),
  )

stormCounts1 <- stormCounts(batchRun1)

# Not dealing with HI calc in this workflow
#hysteresisData1 <- getHysteresisIndices(batchRun = batchRun1,
                                        #xForInterp = interp,
                                        #eventsData = eventsData1)