In [1]:
# read / save model data

In [2]:

import glob
import os
import sys

import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sys.path.append("../../MOASMO_support")
from MOASMO_parameters import *

def read_ctsm_default_parameters(param_names, param_sources, path_CTSM_case):
    # use functions from MOASMO_parameters
    param0 = []
    for j in range(len(param_names)):
        param0.append(get_parameter_value_from_CTSM_case(param_names[j], param_sources[j], path_CTSM_case))
    
    return param0

# Load data for all basins

In [3]:
outfile = 'camels_627basin_allinfo.pkl'

if os.path.exists(outfile):
    # Load dataframes from the outfile
    with pd.HDFStore(outfile, 'r') as store:
        df_basin_info = store['df_basin_info']
        df_param_info = store['df_param_info']
        df_att = store['df_att']
    print("Dataframes loaded from outfile.")
else:
    # Load basin info
    infile_basin_info = f"/glade/work/guoqiang/CTSM_CAMELS/data_mesh_surf/HillslopeHydrology/CAMELS_level1_basin_info.csv"
    df_basin_info = pd.read_csv(infile_basin_info)

    # Note that only some are used for specific basins
    infile_param_info = '/glade/u/home/guoqiang/CTSM_repos/CTSM_calibration/src/parameter/CTSM_CAMELS_SA_param_240202.csv'
    df_param_info = pd.read_csv(infile_param_info)

    # Load basin attributes for this cluster
    attfiles = [
        "/glade/campaign/ral/hap/common/camels/camels_geol.txt",
        "/glade/campaign/ral/hap/common/camels/camels_hydro.txt",
        "/glade/campaign/ral/hap/common/camels/camels_clim.txt",
        "/glade/campaign/ral/hap/common/camels/camels_loc_topo.txt",
        "/glade/campaign/ral/hap/common/camels/camels_soil.txt",
        "/glade/campaign/ral/hap/common/camels/camels_vege.txt",
    ]

    for i in range(len(attfiles)):
        dfi = pd.read_csv(attfiles[i], delimiter=";")
        if i == 0:
            df_att = dfi
        else:
            df_att = pd.merge(df_att, dfi, on="gauge_id")

    df_att = df_att.loc[df_att["gauge_id"].isin(df_basin_info["hru_id"].values)]
    df_att.sel_index = np.arange(len(df_att))
    if np.any(df_att["gauge_id"].values != df_basin_info["hru_id"].values):
        sys.exit("Mismatch between att and info ids")
    else:
        print("att and info ids match")
        df_att["hru_id"] = df_basin_info["hru_id"].values

    # Save dataframes to the outfile
    with pd.HDFStore(outfile, 'w') as store:
        store['df_basin_info'] = df_basin_info
        store['df_param_info'] = df_param_info
        store['df_att'] = df_att
    print("Dataframes saved to outfile.")

Dataframes loaded from outfile.


In [4]:
print("All columns are")
print(df_att.columns)

All columns are
Index(['gauge_id', 'geol_1st_class', 'glim_1st_class_frac', 'geol_2nd_class',
       'glim_2nd_class_frac', 'carbonate_rocks_frac', 'geol_porostiy',
       'geol_permeability', 'q_mean', 'runoff_ratio', 'slope_fdc',
       'baseflow_index', 'stream_elas', 'q5', 'q95', 'high_q_freq',
       'high_q_dur', 'low_q_freq', 'low_q_dur', 'zero_q_freq', 'hfd_mean',
       'p_mean', 'pet_mean', 'p_seasonality', 'frac_snow', 'aridity',
       'high_prec_freq', 'high_prec_dur', 'high_prec_timing', 'low_prec_freq',
       'low_prec_dur', 'low_prec_timing', 'huc_02', 'gauge_name', 'gauge_lat',
       'gauge_lon', 'mean_elev', 'mean_slope', 'area_gauges2',
       'area_geospa_fabric', 'abs_rel_error_area', 'soil_depth_pelletier',
       'soil_depth_statsgo', 'soil_porosity', 'soil_conductivity',
       'max_water_content', 'sand_frac', 'silt_frac', 'clay_frac',
       'water_frac', 'organic_frac', 'other_frac', 'frac_forest', 'lai_max',
       'lai_diff', 'gvf_max', 'gvf_diff', 'dom_l

# Load MO-ASMO outputs (metrics and parameters)
Iteration-0

In [5]:
inpath_moasmo = "/glade/campaign/cgd/tss/people/guoqiang/CTSM_CAMELS_proj/Calib_HH_MOASMO_bigrange"

In [6]:
# load default parameters for each basin
param_names = df_param_info['Parameter'].values
param_sources = df_param_info['Source'].values

file_defa_param = 'camels_627basin_ctsm_defa_param.csv'

if os.path.isfile(file_defa_param):
    df_param_defa = pd.read_csv(file_defa_param)
else:
    pathctsm = '/glade/work/guoqiang/CTSM_CAMELS/Calib_HH_MOASMO_bigrange'
    param_defa = np.nan * np.zeros([len(df_basin_info), len(df_param_info)])
    for i in range(len(df_basin_info)):
        path_CTSM_case = f'{pathctsm}/level1_{i}'
        parami_all = read_ctsm_default_parameters(param_names, param_sources, path_CTSM_case)
        parami_mean = [np.mean(p) for p in parami_all]
        param_defa[i, :] = parami_mean
    
    df_param_defa = pd.DataFrame(param_defa, columns=param_names)
    df_param_defa.to_csv(file_defa_param, index=False)

In [7]:
# load parameter values from all basins

file_all_param = 'camels_627basin_ctsm_all_param.csv.gz'

if os.path.isfile(file_all_param):
    df_param = pd.read_csv(file_all_param, compression='gzip')

else:

    df_param = pd.DataFrame()
    
    flag = 0
    for i in range(len(df_basin_info)):
        file_param = f"{inpath_moasmo}/level1_{i}_MOASMOcalib/ctsm_outputs/iter0_all_meanparam.csv"
        df1 = pd.read_csv(file_param)
      
        parami = np.tile(df_param_defa.iloc[i].values, (len(df1), 1))
        for j in range(len(df1.columns)):
            if df1.columns[j] in param_names: # skip binded parameters
                indj = np.where(param_names==df1.columns[j])[0][0]
                parami[:,indj] = df1.values[:,j]
    
        df1 = pd.DataFrame(parami, columns=param_names)
        
    
        if len(df_param) == 0:
            df_param = df1
        else:
            df_param = pd.concat([df_param, df1])
    
        flag = flag + 1
    
    
    df_param.index = np.arange(len(df_param))
    df_param.to_csv(file_all_param, index=False, compression='gzip')


In [8]:
# load parameter upper/lower bounds for all basins

file_param_lb = 'camels_627basin_ctsm_all_param_lb.csv.gz'
file_param_ub = 'camels_627basin_ctsm_all_param_ub.csv.gz'

if os.path.isfile(file_param_lb):
    df_param_lb = pd.read_csv(file_param_lb, compression='gzip')
    df_param_ub = pd.read_csv(file_param_ub, compression='gzip')

else:
    
    param_lb_values = df_param_defa.values.copy()
    param_ub_values = df_param_defa.values.copy()
    
    for i in range(len(df_basin_info)):
        file = f"{inpath_moasmo}/level1_{i}_MOASMOcalib/param_sets/all_default_parameters.pkl"
        dfi = pd.read_pickle(file)
    
        for j in range(len(dfi['Parameter'].values)):
            indj = np.where(df_param_defa.columns.values==dfi['Parameter'].values[j])[0][0]
            param_lb_values[i, indj] = dfi['Lower'].values[j]
            param_ub_values[i, indj] = dfi['Upper'].values[j]
    
    df_param_lb = pd.DataFrame(param_lb_values, columns=df_param_defa.columns.values)
    df_param_ub = pd.DataFrame(param_ub_values, columns=df_param_defa.columns.values)
    
    df_param_lb.to_csv(file_param_lb, index=False, compression='gzip')
    df_param_ub.to_csv(file_param_ub, index=False, compression='gzip')

In [9]:
# load metric values from all basins

file_all_metric = 'camels_627basin_ctsm_all_metric.csv.gz'
file_all_basinid = 'camels_627basin_ctsm_all_basinid.csv.gz'

if os.path.isfile(file_all_param):
    df_metric = pd.read_csv(file_all_metric, compression='gzip')
    df_basinid = pd.read_csv(file_all_basinid, compression='gzip')

else:

    df_metric = pd.DataFrame()
    
    flag = 0
    for i in range(len(df_basin_info)):
        # file_metric = f"{inpath_moasmo}/level1_{i}_MOASMOcalib/ctsm_outputs/iter0_all_metric.csv"
        
        file_metric = f"{inpath_moasmo}/level1_{i}_MOASMOcalib/ctsm_outputs/iter0_many_metric.csv"
        df2 = pd.read_csv(file_metric)
        metnames = df2.columns
    
        df2["basin_num"] = flag
        df2["basin_id"] = i
        df2["hru_id"] = df_basin_info["hru_id"].values[flag]
    
        if len(df_metric) == 0:
            df_metric = df2
        else:
            df_metric = pd.concat([df_metric, df2])
    
        flag = flag + 1

    df_metric.index = np.arange(len(df_metric))
    
    df_basinid = df_metric[["basin_num", "basin_id", "hru_id"]]
    # df_metric = df_metric[["metric1", "metric2"]]
    # df_metric = df_metric[metnames]
    selected_met =['kge', 'mae', 'n_mae', 'nse', 'cc', 'rmse', 'max_mon_abs_err', 'n_max_mon_abs_err',  'kge_log_q']
    df_metric = df_metric[selected_met]
    df_metric.to_csv(file_all_metric, index=False, compression='gzip')
    df_basinid.to_csv(file_all_basinid, index=False, compression='gzip')