In [1]:


import os, sys, subprocess, time, toml
import pandas as pd
from MOASMO_parameters import generate_initial_parameter_sets, surrogate_model_train_and_pareto_points, surrogate_model_train_and_pareto_points_experiment, surrogate_model_train_and_pareto_points_oneobjfunc
import run_multiple_paramsets_Derecho

In [2]:
########################################################################################################################
# load configurations

#config_file = '/glade/u/home/guoqiang/CTSM_repos/CTSM_calibration/src/config_templates/_example.MO_ASMO.config_MOASMO.toml'
config_file = '/glade/work/guoqiang/CTSM_CAMELS/Calib_HH_emulator/configuration/_level1-0_config_MOASMO.toml'
config = toml.load(config_file)

iter_end = 1 # e.g., iter_end=2 means outputs from iter0 and iter1 will be used to generate new paprameters for iter 2

# objfunc = 'kge_200iter0' # twoerror, kge, norm2err, kge_200iter0
objfunc = 'kge'
print('Objective function is ', objfunc)

# inputs
file_parameter_list = config['file_calib_param']
path_CTSM_base = config['path_CTSM_case']
path_script_MOASMO = config['path_script_MOASMO']
path_CTSM_source = config['path_CTSM_source']
ref_streamflow = config['file_Qobs']

if 'add_flow_file' in config:
    add_flow_file = config['add_flow_file']
else:
    add_flow_file = 'NA'

script_singlerun = f'{path_script_MOASMO}/run_one_paramset_Derecho.py'
script_clone = f'{path_CTSM_source}/cime/scripts/create_clone'


# outputs
if config['path_calib'] == 'NA':
    path_MOASMOcalib = f'{path_CTSM_base}_calib'
else:
    path_MOASMOcalib = config['path_calib']

if objfunc == 'twoerror':
    path_paramset = f'{path_MOASMOcalib}/param_sets'
    path_submit = f'{path_MOASMOcalib}/run_model'
    path_archive = f'{path_MOASMOcalib}/ctsm_outputs'
elif objfunc == 'kge':
    path_paramset = f'{path_MOASMOcalib}/param_sets_SSEnormKGE'
    path_submit = f'{path_MOASMOcalib}/run_model_SSEnormKGE'
    path_archive = f'{path_MOASMOcalib}/ctsm_outputs_SSEnormKGE'   
elif objfunc == 'norm2err':
    path_paramset = f'{path_MOASMOcalib}/param_sets_norm2err'
    path_submit = f'{path_MOASMOcalib}/run_model_norm2err'
    path_archive = f'{path_MOASMOcalib}/ctsm_outputs_norm2err'

os.makedirs(path_MOASMOcalib, exist_ok=True)

# MO-ASMO parameters
sampling_method = config['sampling_method']
num_init = config['num_init'] # initial number of samples
num_per_iter = config['num_per_iter'] # number of selected pareto parameter sets for each iteration
num_iter = config['num_iter'] # including the initial iteration

# evaluation period
RUN_STARTDATE = config['RUN_STARTDATE']
ignore_month = config['ignore_month']
STOP_OPTION = config['STOP_OPTION']
STOP_N = config['STOP_N']

if 'nonstandard_evaluation' in config:
    nonstandard_evaluation = config['nonstandard_evaluation']
else:
    nonstandard_evaluation = 'NA'

# HPC job settings
job_mode = config['job_mode']
job_CTSMiteration = config['job_CTSMiteration']
# job_controlMOASMO = config['job_controlMOASMO'] # not needed here

date_start = (pd.Timestamp(RUN_STARTDATE) + pd.offsets.DateOffset(months=ignore_month)).strftime('%Y-%m-%d') # ignor the first year when evaluating model
if STOP_OPTION == 'nyears':
    date_end = (pd.Timestamp(RUN_STARTDATE) + pd.offsets.DateOffset(years=STOP_N)).strftime('%Y-%m-%d')
elif STOP_OPTION == 'nmonths':
    date_end = (pd.Timestamp(RUN_STARTDATE) + pd.offsets.DateOffset(months=STOP_N)).strftime('%Y-%m-%d')
else:
    sys.exit(f'STOP_OPTION must be nyears or nmonths. {STOP_OPTION} is not accepted.')



Objective function is  kge


In [3]:
########################################################################################################################
# MO-ASMO main

file_metric_all = []
file_param_all = []

t1 = time.time()
for it in range(0, iter_end):
    print('#'*50)
    print(f'Start iterattion {it}. Total iteration number: {num_iter}')
    

    iterflag = it

    if it == 0:
        sample_num = num_init
    else:
        sample_num = num_per_iter

    file_metric_iter, file_param_iter = run_multiple_paramsets_Derecho.check_if_all_runs_are_finsihed(path_archive, iterflag, sample_num)
    file_metric_all.append(file_metric_iter)
    file_param_all.append(file_param_iter)

##################################################
Start iterattion 0. Total iteration number: 9
Write all metrics for 200 trials in iteration 0 to /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator/level1_0_calib/ctsm_outputs_SSEnormKGE/iter0_all_metric.csv
Write all parameters (mean value) for 200 trials in iteration 0 to /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator/level1_0_calib/ctsm_outputs_SSEnormKGE/iter0_all_meanparam.csv


In [4]:
# train a surrogate model and select pareto parameter sets
if objfunc == 'twoerror':
    surrogate_model_train_and_pareto_points(file_parameter_list, file_param_all, file_metric_all, path_paramset, iterflag, num_per_iter, path_CTSM_base)
elif objfunc == 'norm2err':
    surrogate_model_train_and_pareto_points(file_parameter_list, file_param_all, file_metric_all, path_paramset, iterflag, num_per_iter, path_CTSM_base, normalize_y=True)
elif objfunc == 'kge' or objfunc == 'kge_200iter0':
    # ad-hoc change
    file_metric_all = [i.replace('all_metric.csv', 'many_metrics_mizuroute_s-1.csv') for i in file_metric_all]
    surrogate_model_train_and_pareto_points_oneobjfunc(file_parameter_list, file_param_all, file_metric_all, path_paramset, iterflag, num_per_iter, path_CTSM_base)


GPR CV KGE Score for metric1/metric2:
   Fold     rmse1
0     1  0.152494
1     2  0.176299
2     3  0.155010
3     4  0.149145
4     5  0.166130
5  mean  0.159816
RF CV KGE Score for metric1/metric2:
   Fold     rmse1
0     1  0.150780
1     2  0.137293
2     3  0.099626
3     4  0.088773
4     5  0.113314
5  mean  0.117957
Use RF model
Load default parameter values from: /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator/level1_0_calib/param_sets_SSEnormKGE/all_default_parameters.pkl
Generating parameter file: /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator/level1_0_calib/param_sets_SSEnormKGE/paramset_iter1_trial0.pkl
Generating parameter file: /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator/level1_0_calib/param_sets_SSEnormKGE/paramset_iter1_trial1.pkl
Generating parameter file: /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator/level1_0_calib/param_sets_SSEnormKGE/paramset_iter1_t

In [5]:
# generate submission commands (note, this won't submit a real job on Derecho)
run_multiple_paramsets_Derecho.generate_and_submit_multi_CTSM_runs(iter_end, path_submit, path_paramset, path_CTSM_base, 
                                                                   path_archive, script_singlerun, script_clone, 
                                                                   date_start, date_end, ref_streamflow, add_flow_file,
                                                                   job_CTSMiteration, job_mode)


t2 = time.time()
print(f'Iteration {it} is complete. Time cost (s) is {t2 - t1}')


Iteration 0 is complete. Time cost (s) is 409.1104898452759
