diff --git a/README.md b/README.md index 3e9c4d4e..f8f343bd 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ This package includes implementations of the following: - GR4J - HyMod - IHACRES -- SYMHYD +- SIMHYD Performance is expected to be similar to implementations in C and Fortran. @@ -26,7 +26,7 @@ Naive timings (using `@time`) for an example dataset spanning 1963-07-05 - 2014- 0.016502 seconds (469.25 k allocations: 12.902 MiB) - GR4JNode \ 0.015274 seconds (224.75 k allocations: 5.584 MiB) -- SYMHYDNode \ +- SIMHYDNode \ 0.039540 seconds (638.01 k allocations: 15.190 MiB, 46.99% gc time) - IHACRESBilinearNode \ 0.021734 seconds (675.63 k allocations: 17.773 MiB) @@ -92,11 +92,11 @@ using CSV, DataFrames, YAML using StatsPlots using Streamfall -example_data_dir = joinpath(dirname(dirname(pathof(Streamfall))), "test/data") +data_dir = joinpath(dirname(dirname(pathof(Streamfall))), "test/data") # Load data file which holds observed streamflow, precipitation and PET data obs_data = CSV.read( - joinpath(example_data_dir, "cotter/climate/CAMELS-AUS_410730.csv"), + joinpath(data_dir, "cotter/climate/CAMELS-AUS_410730.csv"), DataFrame; comment="#" ) @@ -225,12 +225,12 @@ using Streamfall # Load a network from a file, providing a name for the network and the file path. # Creates a graph representation of the stream with associated metadata. -example_data_dir = joinpath(dirname(dirname(pathof(Streamfall))), "test/data") -sn = load_network("Example Network", joinpath(example_data_dir, "campaspe/campaspe_network.yml")) +data_dir = joinpath(dirname(dirname(pathof(Streamfall))), "test/data") +sn = load_network("Example Network", joinpath(data_dir, "campaspe/campaspe_network.yml")) # Load climate data, in this case from a CSV file with data for all nodes. # Indicate which columns are precipitation and evaporation data based on partial identifiers -climate = Climate(joinpath(example_data_dir, "campaspe/climate/climate.csv"), "_rain", "_evap") +climate = Climate(joinpath(data_dir, "campaspe/climate/climate.csv"), "_rain", "_evap") # This runs an entire stream network @info "Running an example stream..." diff --git a/docs/make.jl b/docs/make.jl index ba25bc87..04789b51 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,4 +1,4 @@ -push!(LOAD_PATH,"../src/") +push!(LOAD_PATH, "../src/") # using Pkg @@ -8,10 +8,10 @@ using Documenter, Streamfall makedocs(sitename="Streamfall Documentation", - format = Documenter.HTML( - prettyurls = get(ENV, "CI", nothing) == "true" + format=Documenter.HTML( + prettyurls=get(ENV, "CI", nothing) == "true" ), - pages = [ + pages=[ "index.md", "primer.md", "expected_data_formats.md", @@ -19,17 +19,18 @@ makedocs(sitename="Streamfall Documentation", "examples/examples.md", "examples/node_creation.md", "examples/network_loading.md", + "Model evaluation" => [ + "examples/evaluation/simple_showcase.md", + "examples/evaluation/model_comparison.md", + "examples/evaluation/simple_multisystem.md", + ], "Calibration" => [ # "examples/calibration_setup.md", - "examples/calibration.md", - ], - "Model evaluation" => [ - "examples/simple_showcase.md", - "examples/model_comparison.md", - "examples/simple_multisystem.md", + "examples/calibration/calibration.md", + "examples/calibration/custom_calibration.md", ], "Ensemble modeling" => [ - "examples/weighted_ensembles.md" + "examples/ensembles/weighted_ensembles.md" ] ], "API" => [ @@ -39,7 +40,7 @@ makedocs(sitename="Streamfall Documentation", "API/nodes/IHACRES.md", "API/nodes/HyMod.md", "API/nodes/GR4J.md", - "API/nodes/SYMHYD.md", + "API/nodes/SIMHYD.md", "API/nodes/Dam.md" ], "API/plotting.md", @@ -51,8 +52,8 @@ makedocs(sitename="Streamfall Documentation", ) deploydocs( - repo = "github.com/ConnectedSystems/Streamfall.jl.git", - devbranch = "main", + repo="github.com/ConnectedSystems/Streamfall.jl.git", + devbranch="main", target="build", deps=nothing, make=nothing diff --git a/docs/src/API/nodes/SYMHYD.md b/docs/src/API/nodes/SIMHYD.md similarity index 58% rename from docs/src/API/nodes/SYMHYD.md rename to docs/src/API/nodes/SIMHYD.md index 4ee01e8f..789e42cb 100644 --- a/docs/src/API/nodes/SYMHYD.md +++ b/docs/src/API/nodes/SIMHYD.md @@ -1,7 +1,7 @@ -# SYMHYD +# SIMHYD ```@autodocs Modules = [Streamfall] Order = [:function, :type] -Pages = ["Nodes/SYMHYD/SYMHYDNode.jl"] +Pages = ["Nodes/SIMHYD/SIMHYDNode.jl"] ``` diff --git a/docs/src/assets/custom_calibration_ensemble.png b/docs/src/assets/custom_calibration_ensemble.png new file mode 100644 index 00000000..09e5bdf4 Binary files /dev/null and b/docs/src/assets/custom_calibration_ensemble.png differ diff --git a/docs/src/assets/custom_calibration_gr4j.png b/docs/src/assets/custom_calibration_gr4j.png new file mode 100644 index 00000000..61cde8c8 Binary files /dev/null and b/docs/src/assets/custom_calibration_gr4j.png differ diff --git a/docs/src/assets/default_calibration_gr4j.png b/docs/src/assets/default_calibration_gr4j.png new file mode 100644 index 00000000..8c8ec0d3 Binary files /dev/null and b/docs/src/assets/default_calibration_gr4j.png differ diff --git a/docs/src/examples/calibration.md b/docs/src/examples/calibration/calibration.md similarity index 82% rename from docs/src/examples/calibration.md rename to docs/src/examples/calibration/calibration.md index 9da7ec3b..b1f03f82 100644 --- a/docs/src/examples/calibration.md +++ b/docs/src/examples/calibration/calibration.md @@ -9,18 +9,18 @@ Data is prepped with the script `campaspe_data_prep.jl` in the `test/data/campas directory. """ -using OrderedCollections -using Glob - using Statistics -using CSV, DataFrames, YAML +using CSV, YAML, DataFrames using Streamfall # Import visualization packages to compile extensions -using Plots, GraphPlot - +using StatsPlots, GraphPlot -sn = load_network("Example Network", "../test/data/campaspe/campaspe_network.yml") +example_data_dir = joinpath(dirname(dirname(pathof(Streamfall))), "test/data") +sn = load_network( + "Example Network", + joinpath(example_data_dir, "campaspe/campaspe_network.yml") +) # The Campaspe catchment is represented as a network of eight nodes, including one dam. # All nodes use the IHACRES_CMD rainfall-runoff model. @@ -28,17 +28,22 @@ plot_network(sn) # Load climate data - in this case from a CSV file with data for all nodes. # Indicate which columns are precipitation and evaporation data based on partial identifiers -climate = Climate("../test/data/campaspe/climate/climate.csv", "_rain", "_evap") +example_data_dir = joinpath(dirname(dirname(pathof(Streamfall))), "test/data/campaspe") +climate = Climate(joinpath(example_data_dir, "climate/climate.csv"), "_rain", "_evap") # Historic flows and dam level data calib_data = CSV.read( - "../test/data/campaspe/gauges/outflow_and_level.csv", + joinpath(example_data_dir, "gauges/outflow_and_level.csv"), DataFrame; comment="#" ) # Historic extractions from the dam -extraction_data = CSV.read("../test/data/campaspe/gauges/dam_extraction.csv", DataFrame; comment="#") +extraction_data = CSV.read( + joinpath(example_data_dir, "gauges/dam_extraction.csv"), + DataFrame; + comment="#" +) # We now have a dataset for calibration (`calib_data`) and a dataset indicating the # historic dam extractions (`extraction_data`). @@ -79,8 +84,9 @@ calibrate!( MaxTime=60.0 ); -# Could calibrate a specific node, assuming all nodes upstream have already been calibrated -# Set `calibrate_all=true` to calibrate all upstream nodes as well. +# A specific node can also be calibrated, assuming all nodes upstream have already been +# calibrated. +# Otherwise, set `calibrate_all=true` to calibrate all upstream nodes as well. # To produce the results shown below, the node upstream from the dam was calibrated an # additional 2 hours. # calibrate!( @@ -102,11 +108,11 @@ Streamfall.NSE(dam_obs[366:end], dam_sim[366:end]) Streamfall.mKGE(dam_obs[366:end], dam_sim[366:end]) # Plot results -f = quickplot(dam_obs, dam_sim, climate, "Modelled - 406000", false; burn_in=366) -savefig(f, "example_dam_level.png") +f = quickplot(dam_obs, dam_sim, climate; label="Modelled - 406000", log=false, burn_in=366) +# savefig(f, "example_dam_level.png") # Save calibrated network to a file -save_network(sn, "example_network_calibrated.yml") +# save_network(sn, "example_network_calibrated.yml") # Illustrating that the re-loaded network reproduces the results as above sn2 = load_network("Calibrated Example", "example_network_calibrated.yml") @@ -128,13 +134,13 @@ temporal_cross_section(sim_dates, calib_data[:, "406000"], sn2[3].level) The last two lines produces the plots below - + The `quickplot()` function creates the figure displayed above which shows dam levels on the left (observed and modelled) with a [Q-Q plot](https://en.wikipedia.org/wiki/Q%E2%80%93Q_plot) on the right. - + The above shows a "cross-section" of model predictions for each month-day across simulation time. It is useful to gain an understanding on when models may underperform and give a diff --git a/docs/src/examples/calibration_setup.md b/docs/src/examples/calibration/calibration_setup.md similarity index 83% rename from docs/src/examples/calibration_setup.md rename to docs/src/examples/calibration/calibration_setup.md index 5d5a068f..fb289884 100644 --- a/docs/src/examples/calibration_setup.md +++ b/docs/src/examples/calibration/calibration_setup.md @@ -4,6 +4,9 @@ The calibration examples all rely on the functions shown here. List of metrics provided by Streamfall can be found in [Included metrics](@ref) +The example here assumes the data has been installed or copied locally. +Alternatively, download the `test/data` directory from the project repository and +change the `DATA_PATH` variable below accordingly. ## Importing shared/common packages @@ -12,19 +15,16 @@ List of metrics provided by Streamfall can be found in [Included metrics](@ref) using Statistics, DataFrames, CSV using Distributed, BlackBoxOptim -using ModelParameters -using Graphs, MetaGraphs -using YAML, Plots +using YAML +using StatsPlots, GraphPlot using Streamfall ``` - ## Load network specification -Note that the `DATA_PATH` is pointing to the `test/data/campaspe/` directory. - ```julia # Load and generate stream network +DATA_PATH = joinpath(dirname(dirname(pathof(Streamfall))), "test/data/campaspe/") network = YAML.load_file(joinpath(DATA_PATH, "campaspe_network.yml")) sn = create_network("Example Network", network) ``` @@ -34,14 +34,15 @@ sn = create_network("Example Network", network) ```julia # Load climate data date_format = "YYYY-mm-dd" -climate_data = CSV.File(joinpath(data_path, "climate/climate_historic.csv"), +climate_data = CSV.read(joinpath(DATA_PATH, "climate/climate_historic.csv"), comment="#", - dateformat=date_format) |> DataFrame + dateformat=date_format, DataFrame) + +dam_level_fn = joinpath(DATA_PATH, "gauges/406000_historic_levels_for_fit.csv") +hist_dam_levels = CSV.read(dam_level_fn, dateformat=date_format, DataFrame) -dam_level_fn = joinpath(data_path, "dam/historic_levels_for_fit.csv") -dam_releases_fn = joinpath(data_path, "dam/historic_releases.csv") -hist_dam_levels = CSV.File(dam_level_fn, dateformat=date_format) |> DataFrame -hist_dam_releases = CSV.File(dam_releases_fn, dateformat=date_format) |> DataFrame +dam_releases_fn = joinpath(DATA_PATH, "gauges/406000_historic_outflow.csv") +hist_dam_releases = CSV.read(dam_releases_fn, dateformat=date_format, DataFrame) # Subset to same range climate_data, hist_dam_levels, hist_dam_releases = Streamfall.align_time_frame(climate_data, diff --git a/docs/src/examples/calibration/custom_calibration.md b/docs/src/examples/calibration/custom_calibration.md new file mode 100644 index 00000000..a785dbe2 --- /dev/null +++ b/docs/src/examples/calibration/custom_calibration.md @@ -0,0 +1,292 @@ +# Customized calibration + +Streamfall provides built-in methods for model calibration, but custom metrics +and objective functions can be defined to better suit specific calibration needs. This +example demonstrates how to create custom metrics that combine multiple performance metrics +or apply specific weightings to different aspects of model performance. + +## Basic Structure + +Streamfall adopts a two-pronged approach to handling objective functions. + +The first is a "default" method assigned to each node (which can be overwritten/replaced). + +A custom objective function for calibration typically accepts: +1. The parameter values to be evaluated +2. Climate data for the simulation +3. The network or node being calibrated +4. Observational data for comparison +5. The metric used to assess performance +6. Inputs to account for additional inflow, extraction and groundwater flux + +The function then returns a single scalar value representing the optimization target +(to be minimized). + +Below is an example of the default implementation, copied and defined outside of Streamfall. + +```julia +function custom_obj_func( + params, climate::Streamfall.Climate, node::Streamfall.NetworkNode, calib_data::Array; + metric::F, inflow=nothing, extraction=nothing, exchange=nothing +) where {F} + update_params!(node, params...) + + metric_func = (sim, obs) -> handle_missing(metric, sim, obs; handle_missing=:skip) + + run_node!(node, climate; inflow=inflow, extraction=extraction, exchange=exchange) + score = metric_func(node.outflow, calib_data) + + # Reset to clear stored values + reset!(node) + + return score +end + +# Example usage: create a node +ihacres_node = create_node(IHACRESBilinearNode, "410730_ihacres", 129.2) + +# Replace default method with the custom objective function +ihacres_node.obj_func = custom_obj_func + +# Then proceed with calibration as normal +# calibrate!(...); +``` + +The above allows a finer level of control, allowing the objective function to account for +climatic conditions as well as other external forcings (such as groundwater exchange). + +Alternatively, performance metrics can be defined for specific nodes, or node instances +that make up an ensemble. + +Lets begin with a default calibration for comparison + +```julia +using Statistics +using CSV, DataFrames +using Streamfall +using StatsPlots # To activate visualization extensions + +# Set up data +data_dir = joinpath( + dirname(dirname(pathof(Streamfall))), + "test/data" +) + +# Historic flows +obs_data = CSV.read( + joinpath(data_dir, "cotter/climate/CAMELS-AUS_410730.csv"), + DataFrame; + comment="#" +) + +Qo = extract_flow(obs_data, "410730") +climate = extract_climate(obs_data) + +# Create a node +gr4j_node = create_node(GR4JNode, "410730", 129.2) + +# Calibrate the model using NmKGE +calibrate!( + gr4j_node, climate, Qo, (obs, sim) -> 1.0 - Streamfall.NmKGE(obs, sim); + extraction=extraction_data, weighting=0.0, + MaxTime=300.0 +); + +# Visualize model performance (using a 1-year burn-in period) +burn_in = 366 +burn_obs = Qo[burn_in:end, "410730"] +run_node!(gr4j_node, climate) +gr4j_qp = quickplot(burn_obs, gr4j_node.outflow[burn_in:end], climate; label="GR4J", log=true) +# savefig(gr4j_qp, "default_calibration_gr4j.png") +``` + +Below, the calibration is repeated using a custom metric which targets low-flow conditions. + +```julia +# Create a node +gr4j_node = create_node(GR4JNode, "410730", 129.2) + +# Define a custom objective function that combines multiple metrics to better account for +# low-flow periods +function custom_low_flow_objective(obs, sim) + # Filter for low flow periods (e.g., flow < 10th percentile) + low_flow_threshold = quantile(obs, 0.1) + low_flow_indices = findall(obs .<= low_flow_threshold) + + # Calculate metrics for low-flow periods + if !isempty(low_flow_indices) + # Apply log transform to emphasize low flow performance + log_obs = log.(obs[low_flow_indices] .+ 1e-6) + log_sim = log.(sim[low_flow_indices] .+ 1e-6) + + # Note: we are using Normalized versions of the usual metrics + # so the returned values are 0 - 1. + # We then take the complement as the optimizer seeks to + # minimize error. + kge_low = 1.0 - Streamfall.NmKGE(log_obs, log_sim) + + # Calculate metrics for all flows for balance + kge_all = 1.0 - Streamfall.NmKGE(obs, sim) + + # Combined score - weighting more heavily toward low flows + score = (0.7 * kge_low) + (0.3 * kge_all) + else + # Fallback if no low flows found + score = 1.0 - Streamfall.NmKGE(obs, sim) + end + + return score +end + +# Calibrate the model +calibrate!( + gr4j_node, climate, Qo, custom_low_flow_objective; + extraction=extraction_data, weighting=0.0, + MaxTime=300.0 +); + +# Visualize model performance (using a 1-year burn-in period) +burn_in = 366 +burn_obs = Qo[burn_in:end, "410730"] +run_node!(gr4j_node, climate) +gr4j_qp = quickplot(burn_obs, gr4j_node.outflow[burn_in:end], climate; label="Weighted Ensemble", log=true) +# savefig(gr4j_qp, "custom_calibration_gr4j.png") +``` + + + + +The results show improved performance under low-flow conditions, at the expense of +mid-to-high flow conditions. + +Below is an example of defining a performance metric for specific nodes. +For further detail on ensemble modeling, see the section on [Weighted Ensemble Modeling](@ref). + +```julia +# Reload data (just in case modifications were made) +obs_data = CSV.read( + joinpath(data_dir, "cotter/climate/CAMELS-AUS_410730.csv"), + DataFrame; + comment="#" +) + +Qo = extract_flow(obs_data, "410730") +climate = extract_climate(obs_data) + +# Create one instance each of IHACRES_CMD, GR4J and SIMHYD +ihacres_node = create_node(IHACRESBilinearNode, "410730_ihacres", 129.2) +gr4j_node = create_node(GR4JNode, "410730_gr4j", 129.2) +simhyd_node = create_node(SIMHYDNode, "410730_simhyd", 129.2) + +# Create a weighted ensemble with equal weights +# The default behavior is to combine component predictions with a normalized weighted sum. +ensemble = create_node( + WeightedEnsembleNode, + [ihacres_node, gr4j_node, simhyd_node], + [0.5, 0.5, 0.5] +) + +# Define a custom objective function that combines multiple metrics +function custom_low_flow_objective(obs, sim) + # Filter for low flow periods (e.g., flow < 10th percentile) + low_flow_threshold = quantile(obs, 0.1) + low_flow_indices = findall(obs .<= low_flow_threshold) + + # Calculate metrics for all flows for balance + kge_all = 1.0 - Streamfall.NmKGE(obs, sim) + + # Calculate metrics for low-flow periods + if !isempty(low_flow_indices) + # Apply log transform to emphasize low flow performance + log_obs = log.(obs[low_flow_indices] .+ 1e-6) + log_sim = log.(sim[low_flow_indices] .+ 1e-6) + + # Note: we are using Normalized versions of the usual metrics + # so the returned values are 0 - 1. + # We then take the complement as the optimizer seeks to + # minimize error. + kge_low = 1.0 - Streamfall.NmKGE(log_obs, log_sim) + + # Combined score - weighting more heavily toward low flows + score = (0.8 * kge_low) + (0.2 * kge_all) + else + # Fallback if no low flows found + score = kge_all + end + + return score +end + +# Assign different metrics to individual nodes +# Make GR4J +custom_metrics = Dict( + "410730_ihacres" => (obs, sim) -> 1.0 - Streamfall.NmKGE(obs, sim), + "410730_gr4j" => custom_low_flow_objective, + "410730_simhyd" => (obs, sim) -> 1.0 - Streamfall.NmKGE(obs, sim) +) + +# Copy flow data (these can be nodes in a network) +Qo[:, "410730_ihacres"] = Qo[:, "410730"] +Qo[:, "410730_gr4j"] = Qo[:, "410730"] +Qo[:, "410730_simhyd"] = Qo[:, "410730"] + +# Copy climate data for each node (these can be nodes in a network) +insertcols!( + climate.climate_data, + "410730_ihacres_P" => climate.climate_data[:, "410730_P"], + "410730_ihacres_PET" => climate.climate_data[:, "410730_PET"], + "410730_ihacres_Q" => climate.climate_data[:, "410730_Q"], + "410730_gr4j_P" => climate.climate_data[:, "410730_P"], + "410730_gr4j_PET" => climate.climate_data[:, "410730_PET"], + "410730_gr4j_Q" => climate.climate_data[:, "410730_Q"], + "410730_simhyd_P" => climate.climate_data[:, "410730_P"], + "410730_simhyd_PET" => climate.climate_data[:, "410730_PET"], + "410730_simhyd_Q" => climate.climate_data[:, "410730_Q"] +) + +# Use the custom objective function in calibration +calibrate_instances!( + ensemble, + climate, + Qo, + custom_metrics; + MaxTime=300 +) + +burn_in = 366 +burn_obs = Qo[burn_in:end, "410730"] +run_node!(ensemble, climate) +ensemble_qp = quickplot(burn_obs, ensemble.outflow[burn_in:end], climate; label="Weighted Ensemble", log=true) +# savefig(ensemble_qp, "custom_calibration_ensemble.png") +``` + + + +Here we see the weighted ensemble retains much of the performance characteristics under +a variety of conditions. + +## Tips for Custom Objective Functions + +1. **Normalization**: Ensure different metrics are on comparable scales. Consider normalizing values to [0,1] range. + +2. **Complementary Metrics**: Different metrics capture different aspects of performance. Combining KGE (overall performance) with RMSE (high flows) and log-transformed metrics (low flows) provides balanced calibration. + +3. **Weighting**: Adjust weights based on your modeling priorities. Higher weights lead to more emphasis on specific aspects. + +4. **Error Handling**: Include error checking for edge cases (e.g., all zero flows, missing data). + +5. **Time Efficiency**: Keep objective functions computationally efficient as they'll be called many times during calibration. + +## Further Reading + +For theoretical background on objective functions and performance metrics: + +1. Fowler, K., Peel, M., Western, A., Zhang, L., 2018. \ + Improved Rainfall-Runoff Calibration for Drying Climate: Choice of Objective Function. \ + Water Resources Research 54, 3392–3408. \ + https://doi.org/10.1029/2017WR022466 + +2. Garcia, F., Folton, N., Oudin, L., 2017. \ + Which objective function to calibrate rainfall–runoff models for low-flow index simulations? \ + Hydrological Sciences Journal 62, 1149–1166. \ + https://doi.org/10.1080/02626667.2017.1308511 \ No newline at end of file diff --git a/docs/src/examples/weighted_ensembles.md b/docs/src/examples/ensembles/weighted_ensembles.md similarity index 86% rename from docs/src/examples/weighted_ensembles.md rename to docs/src/examples/ensembles/weighted_ensembles.md index e796d593..e4173e18 100644 --- a/docs/src/examples/weighted_ensembles.md +++ b/docs/src/examples/ensembles/weighted_ensembles.md @@ -6,17 +6,19 @@ as the ensemble constituents. The default ensemble is a normalized weighted sum. The usual setup process is shown here, detailed in previous sections of this guide. ```julia -using Plots +using StatsPlots using CSV, DataFrames using Statistics using Streamfall - -climate = Climate("../test/data/campaspe/climate/climate.csv", "_rain", "_evap") +data_dir = joinpath( + dirname(dirname(pathof(Streamfall))), + "test/data" +) # Historic flows and dam level data obs_data = CSV.read( - "../test/data/cotter/climate/CAMELS-AUS_410730.csv", + joinpath(data_dir, "cotter/climate/CAMELS-AUS_410730.csv"), DataFrame; comment="#" ) @@ -68,9 +70,9 @@ burn_in = 365 burn_dates = timesteps(climate)[burn_in:end] burn_obs = Qo[burn_in:end, "410730"] -ihacres_qp = quickplot(burn_obs, ihacres_node.outflow[burn_in:end], climate, "IHACRES", true) -gr4j_qp = quickplot(burn_obs, gr4j_node.outflow[burn_in:end], climate, "GR4J", true) -ensemble_qp = quickplot(burn_obs, ensemble.outflow[burn_in:end], climate, "Weighted Ensemble", true) +ihacres_qp = quickplot(burn_obs, ihacres_node.outflow[burn_in:end], climate; label="IHACRES", log=true) +gr4j_qp = quickplot(burn_obs, gr4j_node.outflow[burn_in:end], climate; label="GR4J", log=true) +ensemble_qp = quickplot(burn_obs, ensemble.outflow[burn_in:end], climate; label="Weighted Ensemble", log=true) plot(ihacres_qp, gr4j_qp, ensemble_qp; layout=(3, 1), size=(800, 1200)) ``` @@ -82,7 +84,7 @@ flows and high flows, whereas GR4J had a tendency to overestimate low flows. The weighted ensemble combined characteristics of both, with a tendency to overestimate low flows as with GR4J. - + Comparing the temporal cross section: @@ -96,7 +98,7 @@ plot(ihacres_xs, gr4j_xs, ensemble_xs; layout=(3, 1), size=(800, 1200)) A reduction in the median error can be seen with extreme errors reduced somewhat. - + The median error can then be applied to modelled streamflow (on a month-day basis) as a form of bias correction. @@ -104,7 +106,7 @@ form of bias correction. ```julia q_star = Streamfall.apply_temporal_correction(ensemble, climate, Qo[:, "410730"]) -bc_ensemble_qp = quickplot(burn_obs, q_star[burn_in:end], climate, "Bias Corrected Ensemble", true) +bc_ensemble_qp = quickplot(burn_obs, q_star[burn_in:end], climate; label="Bias Corrected Ensemble", log=true) bias_corrected_xs = temporal_cross_section( burn_dates, @@ -121,4 +123,9 @@ While the median error has increased, its variance has reduced significantly. At time, performance at the 75 and 95% CI remain steady relative to the original weighted ensemble results. - + + +This ensemble approach may be improved further by: + +- Using a rolling window to smooth ensemble predictions +- Defining a custom objective function to emphasize model performance diff --git a/docs/src/examples/model_comparison.md b/docs/src/examples/evaluation/model_comparison.md similarity index 80% rename from docs/src/examples/model_comparison.md rename to docs/src/examples/evaluation/model_comparison.md index b44f68c5..2fcc91b5 100644 --- a/docs/src/examples/model_comparison.md +++ b/docs/src/examples/evaluation/model_comparison.md @@ -6,7 +6,7 @@ Here, the Gingera catchment along the Cotter River is examined. ```julia using Distributed -using Plots +using StatsPlots N = 4 if nworkers() < N @@ -17,8 +17,8 @@ end using CSV, DataFrames using Streamfall - HERE = @__DIR__ - DATA_PATH = joinpath(HERE, "../test/data/cotter/") + HERE = dirname(dirname(pathof(Streamfall))) + DATA_PATH = joinpath(HERE, "test/data/cotter/") # Load observations date_format = @@ -45,13 +45,13 @@ end # Create individual nodes hymod_node = create_node(SimpleHyModNode, "410730", 129.2) gr4j_node = create_node(GR4JNode, "410730", 129.2) -symhyd_node = create_node(SYMHYDNode, "410730", 129.2) +simhyd_node = create_node(SIMHYDNode, "410730", 129.2) ihacres_node = create_node(IHACRESBilinearNode, "410730", 129.2) # Calibrate each node separately using multiprocessing -node_types = ["HyMod", "GR4J", "SYMHYD", "IHACRES"] -node_list = [hymod_node, gr4j_node, symhyd_node, ihacres_node] +node_types = ["HyMod", "GR4J", "SIMHYD", "IHACRES"] +node_list = [hymod_node, gr4j_node, simhyd_node, ihacres_node] result = pmap(opt_func, node_list) # Create comparison plot @@ -67,7 +67,7 @@ for ((res, opt), node, n_name) in zip(result, node_list, node_types) node_burn = node.outflow[burn_in:end] # Plot log scale - res_plot = quickplot(Qo, node, climate, n_name, true; burn_in=366) + res_plot = quickplot(Qo, node, climate; label=n_name, log=true, burn_in=366) push!(res_plots, res_plot) end @@ -79,7 +79,7 @@ combined_plot = plot( display(combined_plot) -# savefig("multi_model_comparison.png") +savefig("multi_model_comparison.png") ``` - \ No newline at end of file + \ No newline at end of file diff --git a/docs/src/examples/multisystem_showcase.md b/docs/src/examples/evaluation/multisystem_showcase.md similarity index 100% rename from docs/src/examples/multisystem_showcase.md rename to docs/src/examples/evaluation/multisystem_showcase.md diff --git a/docs/src/examples/simple_multisystem.md b/docs/src/examples/evaluation/simple_multisystem.md similarity index 90% rename from docs/src/examples/simple_multisystem.md rename to docs/src/examples/evaluation/simple_multisystem.md index 2074a045..760d9307 100644 --- a/docs/src/examples/simple_multisystem.md +++ b/docs/src/examples/evaluation/simple_multisystem.md @@ -23,18 +23,24 @@ In practice, water requirements are provided by another model. ```julia using CSV, DataFrames, YAML using Dates -using Plots +using StatsPlots using Streamfall +example_data_dir = joinpath(dirname(dirname(pathof(Streamfall))), "test/data/campaspe") + # Load climate data - in this case from a CSV file with data for all nodes. # Indicate which columns are precipitation and evaporation data based on partial identifiers -climate = Climate("../test/data/campaspe/climate/climate.csv", "_rain", "_evap") +climate = Climate(joinpath(example_data_dir, "climate/climate.csv"), "_rain", "_evap") # Historic extractions from the dam -extraction_data = CSV.read("../test/data/campaspe/gauges/dam_extraction.csv", DataFrame; comment="#") +extraction_data = CSV.read( + joinpath(example_data_dir, "gauges/dam_extraction.csv"), + DataFrame; + comment="#" +) # Load the example network -sn = load_network("Example Network", "../test/data/campaspe/two_node_network.yml") +sn = load_network("Example Network", joinpath(example_data_dir, "two_node_network.yml")) # Run the model for the basin to obtain baseline values run_basin!(sn, climate; extraction=extraction_data) @@ -119,4 +125,4 @@ display(f) Streamfall.temporal_cross_section(sim_dates, calib_data[:, "406000"], sn[2].level) ``` - + diff --git a/docs/src/examples/simple_showcase.md b/docs/src/examples/evaluation/simple_showcase.md similarity index 81% rename from docs/src/examples/simple_showcase.md rename to docs/src/examples/evaluation/simple_showcase.md index b80b1d55..7a0bbeb5 100644 --- a/docs/src/examples/simple_showcase.md +++ b/docs/src/examples/evaluation/simple_showcase.md @@ -20,24 +20,26 @@ This script is run in the `examples` directory. """ using CSV, DataFrames, YAML -using Plots +using StatsPlots using Streamfall +data_dir = joinpath(dirname(dirname(pathof(Streamfall))), "test/data/campaspe") + # Load climate data - in this case from a CSV file with data for all nodes. # Indicate which columns are precipitation and evaporation data based on partial identifiers -climate = Climate("../test/data/campaspe/climate/climate.csv", "_rain", "_evap") +climate = Climate(joinpath(data_dir, "climate/climate.csv"), "_rain", "_evap") calib_data = CSV.read( - "../test/data/campaspe/gauges/outflow_and_level.csv", + joinpath(data_dir, "gauges/outflow_and_level.csv"), DataFrame; comment="#" ) # Historic extractions from the dam -extraction_data = CSV.read("../test/data/campaspe/gauges/dam_extraction.csv", DataFrame; comment="#") +extraction_data = CSV.read(joinpath(data_dir, "gauges/dam_extraction.csv"), DataFrame; comment="#") # Load the two-node example network -sn = load_network("Example Network", "calibration/lake_eppalock.yml") +sn = load_network("Example Network", joinpath(data_dir, "two_node_network.yml")) # Run the dam node and above dam_id, dam_node = sn["406000"] @@ -58,14 +60,14 @@ nse = round(nse_score, digits=4) @info "Scores:" rmse nnse nse # Results of model run -quickplot(dam_obs, dam_sim, climate, "IHACRES", false; burn_in=366) +quickplot(dam_obs, dam_sim, climate; label="IHACRES", log=false, burn_in=366) ``` The `quickplot()` function creates the figure displayed above which shows dam levels on the left (observed and modelled) with a [Q-Q plot](https://en.wikipedia.org/wiki/Q%E2%80%93Q_plot) on the right. - + ```julia sim_dates = Streamfall.timesteps(climate) @@ -84,4 +86,4 @@ Here, we see that while performance is generally good (mean of Median Error is n the model can under-estimate dam levels in late-April to May and displays a tendency to over-estimate dam levels between January and June, relative to other times. - + diff --git a/docs/src/examples/network_loading.md b/docs/src/examples/network_loading.md index 90930807..5c0dbe85 100644 --- a/docs/src/examples/network_loading.md +++ b/docs/src/examples/network_loading.md @@ -4,11 +4,15 @@ Loading a pre-defined network from a YAML file. ```julia using YAML -using Plots, GraphPlot +using StatsPlots, GraphPlot using Streamfall +network_file = joinpath( + dirname(dirname(pathof(Streamfall))), + "test/data/campaspe/campaspe_network.yml" +) -sn = load_network("Example Network", "../test/data/campaspe/campaspe_network.yml") +sn = load_network("Example Network", network_file) # Find all inlets and outlets inlets, outlets = find_inlets_and_outlets(sn) diff --git a/docs/src/examples/node_creation.md b/docs/src/examples/node_creation.md index a85b3dda..62b73e53 100644 --- a/docs/src/examples/node_creation.md +++ b/docs/src/examples/node_creation.md @@ -8,13 +8,13 @@ using Streamfall hymod_node = create_node(SimpleHyModNode, "410730", 129.2) -# Hymod parameters -Sm_max = 250.0 -B = 1.0 -alpha = 0.2 -Kf = 0.5 -Ks = 0.05 +# Hymod parameters ("hy_" prefix is simply to avoid any variable name conflicts) +hy_Sm_max = 250.0 +hy_B = 1.0 +hy_alpha = 0.2 +hy_Kf = 0.5 +hy_Ks = 0.05 # Update parameters -update_params!(hymod_node, Sm_max, B, alpha, Kf, Ks) +update_params!(hymod_node, hy_Sm_max, hy_B, hy_alpha, hy_Kf, hy_Ks) ``` diff --git a/docs/src/index.md b/docs/src/index.md index 2f2c5c09..469fc369 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -12,7 +12,7 @@ This package includes implementations of the following: - GR4J - HyMod - IHACRES -- SYMHYD +- SIMHYD Performance is expected to be similar to implementations in C and Fortran. @@ -59,7 +59,7 @@ more "hands-on" overview.