## This notebook details practicing and playing around with Millar et al's R code for automatic event delineation and hysteresis calcs using the s::can and discharge data from [Kincaid et al., 2020](https://agupubs.onlinelibrary.wiley.com/doi/10.1029/2020WR027361) 

- ### Data publicly available here on HydroShare: https://www.hydroshare.org/resource/85fa32a11fbb49779033934a135f54ef/

- ### This larger dataset includes the 2014-2015 discharge and nitrate data from Vaughan, M. (2017). Vermont NEWRnet stations: 2014-2015 high-frequency DOC, nitrate, and discharge data, HydroShare, http://www.hydroshare.org/resource/faac1672244c407e9c9c8644c8211fd6.

- ### I downloaded on 05.02.24 and put it here in this directory /home/millieginty/Documents/git-repos/cQ_analysis/millar2021_R_partition_hysteresis

- ### The raw data file has discharge (q m3s), NO3, and SRP with timestamp and event start/end times for each watershed. The Millar code takes just timestamp, q, and C input csvs so I'll separate this raw data file into just those parameters for each site over the entire time period (>400 events from 2014 to 2018, no winter events).

In [22]:
#################
# LOAD PACKAGES #
#################

library(tidyverse)
library(viridis)

###################
# SET DIRECTORIES #
###################

# Define the input and output directories

# For Kincaid data, input and output in separate directory
input_dir <- "/home/millieginty/Documents/git-repos/cQ_analysis/millar2021_R_partition_hysteresis/kincaid2020_hydroshare/"
output_dir <- "/home/millieginty/Documents/git-repos/cQ_analysis/millar2021_R_partition_hysteresis/kincaid2020_hydroshare/output/"

# functions script in main millar directory
millar_input_dir <- "/home/millieginty/Documents/git-repos/cQ_analysis/millar2021_R_partition_hysteresis/"

#####################
# READ IN FUNCTIONS #
#####################

# MED note: I haven't altered anything in this functions script
source(file.path(millar_input_dir,"cQ_functions.R"))

################
# READ IN DATA #
################

# Read in raw Hydroshare data csv from Kincaid et al 2020 found at https://www.hydroshare.org/resource/85fa32a11fbb49779033934a135f54ef/
# Downloaded on 05.02.24
allInputData15Min <- read.csv(file.path(input_dir,"hydroshare_rawData.csv"))

# Filter the data for just Hungerford Brook
# Rename whatever constituent to 'conc'
Hford <- allInputData15Min %>%
  filter(site == "Hungerford") %>%
  select(datetime = timestamp, q_cms, conc = NO3_mgNL)

# Specify constituent in data set name
dataSetName <- "HF_NO3"

# Chose constitution for plot axes labels (NO3, TOC, or turbidity)
constit <- "NO3"

# MED note that I had to change the column in the Kincaid dataset to 'datetime' from 'timestamp' and also added seconds
Hford$datetime <- as.POSIXct(Hford$datetime,format("%m/%d/%Y %H:%M:%S"),tz="EST")

# Rescle the data
Hford <- Hford %>% 
  mutate(rescaled_conc = ((conc-min(conc))/(max(conc)-min(conc))*max(q_cms)))

# Vector containing candidate baseflow separation filter values
candidateFilterPara <- c(0.99,0.98)

# Vector containing candidate stormflow threshold values
#candidateSfThresh <- c(0.01,0.003,0.05) # Millar et al 2021 values
candidateSfThresh <- c(1.5,3.0,4.8) # MED HF guess values

# Vector with interpolation intervals used for calculating HI
interp <- seq(0,1,0.01)

### Now, running the Millar code to get hysteresis indices

In [23]:
##########################################
# RUN ANALYSIS TO GET HYSTERESIS INDICES #
##########################################

batchRun1 <- batchRunBfAndEvSepForCQ(qInputs = Hford,
                                     bfSepPasses = 3,
                                     filterParam = candidateFilterPara,
                                     sfSmoothPasses = 4,
                                     sfThresh = candidateSfThresh,
                                     cInputs = Hford,
                                     timeStep = 15,
                                     minDuration = 2,
                                     maxDuration = 200)

eventsDataAll1 <- getAllStormEvents(batchRun = batchRun1,
                                    timestep_min = 15)

batchRunFlowsLF1 <- batchRunflowCompare(qData = Hford,
                                         bfSepPasses = 4,
                                         filterPara = candidateFilterPara,
                                         sfSmoothPasses = 4)

eventsData1 <- stormEventCalcs(batchRun = batchRun1,
                               timestep_min = 15)

stormCounts1 <- stormCounts(batchRun1)

hysteresisData1 <- getHysteresisIndices(batchRun = batchRun1,
                                        xForInterp = interp,
                                        eventsData = eventsData1)


ERROR: Error in if (flowData$storm_y_n[dt] == "yes" & flowData$storm_y_n[dt - : missing value where TRUE/FALSE needed


### Export the results

In [24]:
######################
# EXPORT OUTPUT DATA #
######################

write.csv(eventsData1,file = file.path(output_dir,paste(dataSetName,"_StormEventSummaryData.csv",sep="")))
write.csv(batchRunFlowsLF1,file = file.path(output_dir,paste(dataSetName,"_DischargeData.csv",sep="")))
write.csv(hysteresisData1,file = file.path(output_dir,paste(dataSetName,"_HysteresisData.csv",sep="")))
write.csv(eventsDataAll1,file = file.path(output_dir,paste(dataSetName,"_AllCQData.csv",sep="")))
write.csv(stormCounts1,file = file.path(output_dir,paste(dataSetName,"_StormCounts.csv",sep="")))

ERROR: Error in is.data.frame(x): object 'eventsData1' not found
