-
Notifications
You must be signed in to change notification settings - Fork 31
/
calibrate_sim.R
191 lines (166 loc) · 8 KB
/
calibrate_sim.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#'@title Calibrates GLM-AED2 variables to improve fit between observed and simulated data
#'
#'@description Starts a calibration run with multiple iterations using a GLM-AED2 setup. At the end a output csv.-file of all iterations and a
#'diagnostic plot are created.
#'@param var Character vector of valid variable names (see \code{\link{sim_vars}})
#'@param path String with the path to the GLM setup
#'@param field_file CSV or TSV field data (see \link{resample_to_field}for format)
#'@param nml_file String of the glm-namelist file, default is 'glm3.nml'
#'@param calib_setup Data frame containing information regarding the calibration (see \link{get_calib_setup})
#'@param glmcmd String containing the desired glm run command, default is GLM3r
#'@param first.attempt Boolean, if TRUE a nml-template will be created, set it to FALSE after your first calibration run; default is TRUE
#'@param period List that provides a start and a stop date for the simulation
#'@param scaling Boolean, if TRUE variable values will be scaled on the space (0,10), recommended for CMA-ES, default is TRUE
#'@param verbose should operations and output of GLM be shown. Default is TRUE.
#'@param method String of the optimization method, default is 'CMA-ES' (Hansen 2009), alternatively you can also use 'Nelder-Mead'
#'@param metric String of the calibration fit metric, default is RMSE
#'@param target.fit Double of your preferred fit, calibration will stop after reaching that; default is 1.5
#'@param target.iter Double of maximum amount of iterations, default is 150
#'@param plotting Boolean, if TRUE plots all results as heat maps
#'@param output Character array of the output folder path
#'@keywords methods
#'@seealso \code{\link{get_calib_setup}}, \code{\link{get_calib_periods}}, \code{\link{get_calib_init_validation}}
#'@author
#'Robert Ladwig, Tadhg Moore
#'@examples
#'calib_setup <- get_calib_setup()
#'print(calib_setup)
#'
#'#Example calibration
#'#Copy files into temporary directory
#'sim_folder <- tempdir() #simulation path
#'glmtools_folder = system.file('extdata', package = 'glmtools')
#'
#'file.copy(list.files(glmtools_folder,full.names = TRUE), sim_folder, overwrite = TRUE)
#'
#'field_file <- file.path(sim_folder, 'LakeMendota_field_data_hours.csv')
#'nml_file <- file.path(sim_folder, 'glm3.nml')
#'driver_file <- file.path(sim_folder, 'LakeMendota_NLDAS.csv')
#'period = get_calib_periods(nml_file = nml_file, ratio = 1)
#'output = file.path(sim_folder, 'output/output.nc')
#'
#'var = 'temp' # variable to apply the calibration procedure
#'\dontrun{
#'calibrate_sim(var = var, path = sim_folder, field_file = field_file,
#' nml_file = nml_file, calib_setup = calib_setup,
#' glmcmd = NULL,
#' first.attempt = TRUE, period = period, method = 'CMA-ES',
#' scaling = TRUE, #scaling should be TRUE for CMA-ES
#' verbose = FALSE,
#' metric = 'RMSE',plotting = FALSE,
#' target.fit = 1.5,
#' target.iter = 50, output = output)
#' }
#'@import adagio
#'@import GLM3r
#'@import ggplot2
#'@export
calibrate_sim <- function(var = 'temp',
path,
field_file,
nml_file = 'glm3.nml',
calib_setup = NULL,
glmcmd = NULL,
first.attempt = TRUE,
period = NULL,
scaling = TRUE,
verbose = TRUE,
method = 'CMA-ES',
metric = 'RMSE',
target.fit = 1.5,
target.iter = 100,
plotting = TRUE,
output){
# Development message
message('Calibration functions are under development, and are likely to change with future package updates.')
if (first.attempt){
if (file.exists(paste0(path,'/calib_results_',metric,'_',var,'.csv'))){
file.remove(paste0(path,'/calib_results_',metric,'_',var,'.csv'))
}
if (file.exists(paste0(path,'/calib_par_',var,'.csv'))){
file.remove(paste0(path,'/calib_par_',var,'.csv'))
}
}
if(!file.exists('glm4.nml')){
file.copy(nml_file, paste0(path,'/glm4.nml'))
} else if (first.attempt){
file.copy(paste0(path,'/glm4.nml'), nml_file, overwrite = TRUE)
}
if (is.null(calib_setup)){
calib_setup <- get_calib_setup()
}
pars <- as.character(calib_setup$pars)
ub <- calib_setup$ub
lb <- calib_setup$lb
variable <- var
if (scaling){
init.val <- (calib_setup$x0 - lb) *10 /(ub-lb)
}
if (!is.null(period)){
nml <- read_nml(nml_file)
nml <- set_nml(nml, arg_list = period$calibration)
write_nml(nml,nml_file)
}
obs <- read_field_obs(field_file)
calib_GLM(var, ub, lb, init.val, obs, method, glmcmd,
metric, target.fit, target.iter, nml_file, path, scaling, verbose, pars)
# loads all iterations
results <- read.csv(paste0(path,'/calib_results_RMSE_temp.csv'))
results$DateTime <- as.POSIXct(results$DateTime)
g1 <- ggplot(results,
aes(nrow(results):1, .data$RMSE)) +
geom_point() +
geom_smooth(se = FALSE, method = "gam", formula = y ~ s(x), color = 'lightblue4') +
theme_bw() + xlab('Iterations') +
theme(text = element_text(size = 10), axis.text.x = element_text(angle = 90, hjust = 1))
if (plotting == TRUE) {
ggsave(filename = paste0(path,'/optim_',method,'_',var,'.png'), g1, dpi = 300,width = 384,height = 216, units = 'mm')
}
print(g1)
# Compare simulated with observed data
temp_rmse1 <- compare_to_field(output, field_file = field_file,
metric = 'water.temperature', as_value = FALSE, precision= 'hours')
plot.heat = plot_var_compare(nc_file = output, field_file = field_file,var_name = 'temp', precision = 'hours') +
labs(title = 'Calibration Period')
print(plot.heat)
if (plotting == TRUE){
ggsave(plot = plot.heat, paste0(path,'/calib_',method,'_',var,'_',metric,round(temp_rmse1,2),'.png'))
}
# Check the model fit during the validation period
init.temps <- read_nml(nml_file)$init_profiles$the_temps
get_calib_init_validation(nml_file= nml_file, output = output)
nml <- read_nml(nml_file)
nml <- set_nml(nml, arg_list = period$validation)
write_nml(nml,nml_file)
run_glmcmd(glmcmd, path, verbose)
temp_rmse2 <- compare_to_field(output, field_file = field_file,
metric = 'water.temperature', as_value = FALSE, precision= 'hours')
plot.heat = plot_var_compare(nc_file = output, field_file = field_file,var_name = 'temp', precision = 'hours') +
labs(title = 'Validation Period')
print(plot.heat)
if (plotting == TRUE){
ggsave(plot = plot.heat, paste0(path,'/valid_',method,'_',var,'_',metric,round(temp_rmse2,2),'.png'))
}
# check the model fit during the whole time period
nml <- read_nml(nml_file)
total.list <- period$total
total.list[['the_temps']] <- init.temps
nml <- set_nml(nml, arg_list =total.list)
write_nml(nml,nml_file)
run_glmcmd(glmcmd, path, verbose)
temp_rmse3 <- compare_to_field(output, field_file = field_file,
metric = 'water.temperature', as_value = FALSE, precision= 'hours')
plot.heat = plot_var_compare(nc_file = output, field_file = field_file,var_name = 'temp', precision = 'hours') +
labs(title = 'Total Time Period')
print(plot.heat)
if (plotting == TRUE){
ggsave(plot = plot.heat, paste0(path,'/total_',method,'_',var,'_',metric,round(temp_rmse3,2),'.png'))
}
# print a matrix of our constrained variable space, the initial value and the calibrated value
calibrated_results <- cbind(calib_setup, 'calibrated' = as.numeric(round(results[1,2:(ncol(results)-1)],4)))
print(paste('calibration:',round(temp_rmse1,2),'deg C RMSE'))
print(paste('validation:',round(temp_rmse2,2),'deg C RMSE'))
print(paste('total time period:',round(temp_rmse3,2),'deg C RMSE'))
print(calibrated_results)
return(calibrated_results)
}