In [2]:
import sys, os, glob, pickle, toml, json
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.cm as cm
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import xarray as xr

In [68]:
# load info
infile_basin_info = f"/glade/work/guoqiang/CTSM_CAMELS/data_mesh_surf/HillslopeHydrology/CAMELS_level1_basin_info.csv"
df_info = pd.read_csv(infile_basin_info)

# input/out and iteration information
iternum = 10
inpath_moasmo = '/glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator/'

In [17]:
test_index = np.arange(0, len(df_info), 5)
train_index = np.setdiff1d(np.arange(len(df_info)), test_index)

# Iter-0 and LSE training in the testing basins (627 basin training)

In [7]:
    # LSE metrics

    df_lse_metric = []

    for tarbasin in test_index:
        for i in range(iternum):
            infile = f'{inpath_moasmo}/level1_{tarbasin}_calib/ctsm_outputs_LSEnormKGE/iter{i}_many_metrics_mizuroute_s-1.csv'    
            dfi = pd.read_csv(infile)
            dfi['basin'] = tarbasin
            dfi['iter'] = i
            dfi['trial'] = np.arange(len(dfi))
            if len(df_lse_metric) == 0:
                df_lse_metric = dfi
            else:
                df_lse_metric = pd.concat([df_lse_metric, dfi])

In [9]:
# statistics-1: best metric of each iteration
metname = 'kge'
met_defa = np.nan * np.zeros(len(test_index))
met_stats_lse = np.nan * np.zeros([len(test_index), iternum, 3]) # best; mean; median
for i in range(iternum):
    indi = (df_lse_metric['iter']==i)
    for j in range(len(test_index)):
        indij = indi & (df_lse_metric['basin']==test_index[j])
        dij = df_lse_metric[indij][metname].values

        met_stats_lse[j, i, 0] = np.nanmax(dij)
        met_stats_lse[j, i, 1] = np.nanmean(dij)
        met_stats_lse[j, i, 2] = np.nanmedian(dij)

        if i == 0:
            met_defa[j] = dij[0]

print('median defa:', np.nanmedian(met_defa))
print('best of each iteration')
print(np.nanmedian(met_stats_lse[:,:,0], axis=0))

median defa: 0.25040086100924586
best of each iteration
[0.49236911 0.54537293]


# Iter-0 and LSE training in the training basins (80% basin training)

In [55]:
    # LSE metrics

    df_lse_metric = []

    for tarbasin in train_index:
        for i in range(iternum):
            infile = f'{inpath_moasmo}/level1_{tarbasin}_calib/ctsm_outputs_LSEnormKGECV0/iter{i}_many_metrics_mizuroute_s-1.csv'    
            dfi = pd.read_csv(infile)
            dfi['basin'] = tarbasin
            dfi['iter'] = i
            dfi['trial'] = np.arange(len(dfi))
            if len(df_lse_metric) == 0:
                df_lse_metric = dfi
            else:
                df_lse_metric = pd.concat([df_lse_metric, dfi])

In [56]:
# statistics-1: best metric of each iteration
metname = 'kge'
met_defa = np.nan * np.zeros(len(train_index))
met_stats_lse = np.nan * np.zeros([len(train_index), iternum, 3]) # best; mean; median
for i in range(iternum):
    indi = (df_lse_metric['iter']==i)
    for j in range(len(train_index)):
        indij = indi & (df_lse_metric['basin']==train_index[j])
        dij = df_lse_metric[indij][metname].values

        met_stats_lse[j, i, 0] = np.nanmax(dij)
        met_stats_lse[j, i, 1] = np.nanmean(dij)
        met_stats_lse[j, i, 2] = np.nanmedian(dij)

        if i == 0:
            met_defa[j] = dij[0]

print('median defa:', np.nanmedian(met_defa))
print('best of each iteration')
print(np.nanmedian(met_stats_lse[:,:,0], axis=0))

median defa: 0.2977536428467691
best of each iteration
[0.5092305  0.51526717 0.54888729 0.5930669  0.6047926  0.62022205
 0.6233694  0.64096502]


# LSE CV in the testing basins (80% basin training)

In [69]:
    # LSE metrics

    df_lsecv_metric = []

    for tarbasin in test_index:
        for i in range(1, iternum):
            infile = f'{inpath_moasmo}/level1_{tarbasin}_calib/ctsm_outputs_LSEnormKGECV0test/iter{i}_trial0/evaluation_many_metrics_mizuroute_s-1.csv'    
            if not os.path.isfile(infile):
                print('file does not exist', infile)
            else:  
                dfi = pd.read_csv(infile)
                dfi['basin'] = tarbasin
                dfi['iter'] = i
                dfi['trial'] = np.arange(len(dfi))
                if len(df_lsecv_metric) == 0:
                    df_lsecv_metric = dfi
                else:
                    df_lsecv_metric = pd.concat([df_lsecv_metric, dfi])

file does not exist /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator//level1_0_calib/ctsm_outputs_LSEnormKGECV0test/iter9_trial0/evaluation_many_metrics_mizuroute_s-1.csv
file does not exist /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator//level1_5_calib/ctsm_outputs_LSEnormKGECV0test/iter9_trial0/evaluation_many_metrics_mizuroute_s-1.csv
file does not exist /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator//level1_10_calib/ctsm_outputs_LSEnormKGECV0test/iter9_trial0/evaluation_many_metrics_mizuroute_s-1.csv
file does not exist /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator//level1_15_calib/ctsm_outputs_LSEnormKGECV0test/iter9_trial0/evaluation_many_metrics_mizuroute_s-1.csv
file does not exist /glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_emulator//level1_20_calib/ctsm_outputs_LSEnormKGECV0test/iter9_trial0/evaluation_many_metrics_mizuroute_s-1.csv
file does no

In [66]:
for i in range(1, iternum):
    dfi = df_lsecv_metric[df_lsecv_metric['iter']==i]
    print(np.nanmedian(dfi['kge'].values))

0.2956390506667323
0.3151840167606052
0.3604315148749162
0.3984709445183466
0.4141417373148615
0.4390827144248037
0.3969689989660876
0.40654761378546034
nan


  return np.nanmean(a, axis, out=out, keepdims=keepdims)


In [62]:
iternum

9