# This code renders the parameter config files for CFE-Cver to CFE-Python version

Written by Ryoko Araki (San Diego State University & UCSB, raraki8159@sdsu.edu) in 2023 SI 

In [51]:
# %%
import pandas as pd
import numpy as np
import os
import sys
import json
import re 

from omegaconf import DictConfig, OmegaConf
import hydra
import yaml
import warnings
from tqdm import tqdm


## Read the config

In [52]:
with open('config.yaml', 'r') as f:
    cfg = yaml.safe_load(f)
    
# define GIUH and soil params files
GIUH_soil_dir = cfg['io_dir']['cfe_c_config_dir'].replace("${cwd}", "..")

# define basin list dir
basin_dir = cfg['io_dir']['gauch_2020_dir'].replace("${cwd}", "..")
basin_filename = cfg['model_settings']['basin_file'].replace("${cwd}", "..") # It was 516 basin in 2022 code 

# define camel dataset dir
camels_attr_dir = cfg['io_dir']['ucar_dir'].replace("${cwd}", "..")

# define atmospheric forcing file dir
forcing_path = cfg['io_dir']['nldas_forcing_dir'].replace("${cwd}", "..")

# define dir for exported json
config_dir = cfg['io_dir']['config_dir'].replace("${cwd}", "..")

soil_scheme = cfg['CFE_config']['soil_scheme']
out_path = os.path.join(config_dir, f'soil_{soil_scheme}')


In [53]:
with open(basin_filename, "r") as f:
    basin_list = pd.read_csv(f, header=None)

with open(basin_filename, 'r') as file:
    lines = file.readlines()
    # Remove leading/trailing whitespaces and newline characters
    lines = [line.strip() for line in lines]
basin_list_str = lines

In [54]:
basin_attributes = pd.read_csv(os.path.join(camels_attr_dir, 'camels_attributes_concat.csv'))
basin_attributes['gauge_id'] = basin_attributes['gauge_id'].astype(str).str.zfill(8)
basin_attributes['area_gages2'].head()

# See https://hess.copernicus.org/preprints/hess-2017-169/hess-2017-169.pdf for the definition
# attribute: are_gages2
# Description: catchment area (GAGESII estimate)
# Unit: [km2]
# Data source: N15-USGSdata
# Reference: Falcone (2011)

0    2252.70
1     573.60
2    3676.17
3     769.05
4     909.10
Name: area_gages2, dtype: float64

## Count the number of parameter files in the C version

In [55]:
file_count = sum(1 for _ in os.scandir(GIUH_soil_dir) if _.is_file())

pattern = r'(\d+)\_bmi_config_cfe_pass.txt'
basin_ids_in_c_config = []
for filename in os.listdir(GIUH_soil_dir):
    match = re.match(pattern, filename)
    if match:
        basin_id = match.group(1)
        basin_ids_in_c_config.append(basin_id)
        
print(f"Number of config files that 2022 team received from Luciana: {file_count}")
common_gauges = set(basin_ids_in_c_config) & set(basin_list_str)
print(f"Number of the gauges matching with our basin of interest: {len(common_gauges)}/516")

if len(common_gauges) != 516:
    warnings.warn("Config missing in either of the folders")
else:
    None


Number of config files that 2022 team received from Luciana: 520
Number of the gauges matching with our basin of interest: 516/516


## Read and render the config files (C -> BMIpy readable)

In [56]:

def load_config(GIUH_soil_dir, basin_id):
    # get giuh and soil param file
    giuh_soil_file = os.path.join(GIUH_soil_dir, f'{basin_id}_bmi_config_cfe_pass.txt')
    with open(giuh_soil_file, "r") as f:
        text = f.read()
    return text

def render_config(basin_id, text, default_values):
    
    # Initialize
    parameters = dict()
    parameters['soil_params'] = dict()
    lines = text.strip().split("\n")
    
    parameters['forcing_file'] = os.path.join(forcing_path, f'{basin_id}_hourly_nldas.csv')
    parameters['catchment_area_km2'] = basin_attributes['area_gages2'][basin_attributes.gauge_id == basin_id].values[0]

    # Loop through the lines 
    for line in lines:
        key, value = line.split("=")
        
        if (key == "soil_params.expon") or (key == "soil_params.expon_secondary"):
            continue
        key_parts = key.split(".")
        param_name = key_parts[-1].strip()
        
        if "[" in value:
            value = value.split("[")[0].strip()
        
        if param_name in default_values and value != "NaN":
            
            if "," in value:
                value = [float(v) for v in value.split(",")]
            else:
                try:
                    value = float(value)
                except ValueError:
                    value = default_values[param_name]
                    Warning('Detected NaN in original config file. Replaced with default values.')
            
            if param_name == "b":
                param_name = "bb"

            if param_name in ["depth", "satdk", "satpsi", "slop", "smcmax", "wltsmc", "bb"]:
                parameters["soil_params"][param_name] = value
            else:        
                parameters[param_name] = value

    # Some default CFE-py parameters not generated through Hydrofabric
    parameters['stand_alone'] = 1 
    parameters['unit_test'] = 0
    parameters['compare_results_file'] = ""
    parameters['partition_scheme'] = "Schaake"
    parameters['soil_scheme'] = soil_scheme
    parameters['soil_params']["D"] = 2.0
    parameters["soil_params"]["mult"] = 1000.0

    return parameters

def save_rendered_config(basin_id, config_dir, parameters, out_path):
    # save and export json files
    json_filename = f'cat_{basin_id}_bmi_config_cfe.json'
    json_file = os.path.join(out_path, json_filename)
    if not os.path.exists(out_path):
        os.mkdir(os.path.join(out_path))
    with open(json_file, 'w') as file:
        json.dump(parameters, file, indent=4)

In [57]:
default_values = {
    "depth": 2.0,
    "b": 4.05,
    "satdk": 1e-04,
    "satpsi": 0.355,
    "slop": 0.5,
    "smcmax": 0.439,
    "wltsmc": 0.02,
    "expon": 1.0,
    "refkdt": 3.0, 
    "max_gw_storage": 0.1,
    "Cgw": 1.8e-05,
    "gw_storage": 0.05,
    "alpha_fc": 0.33,
    "K_nash": 0.03,
    "K_lf": 0.01,
    "nash_storage": "0.0,0.0",
    "giuh_ordinates": "0.1, 0.2, 0.4, 0.2, 0.1"
}

# basin_id = basin_list_str[0]
for basin_id in tqdm(basin_list_str):
# for basin_id in tqdm([basin_list_str[0]]):
    text = load_config(GIUH_soil_dir=GIUH_soil_dir, basin_id=basin_id)
    parameters = render_config(basin_id=basin_id, text=text, default_values=default_values)
    save_rendered_config(basin_id=basin_id, config_dir=config_dir, parameters=parameters, out_path=out_path)


IndentationError: expected an indented block after 'for' statement on line 22 (1148430056.py, line 23)

## Check if all the configs are generated

In [None]:
file_count = sum(1 for _ in os.scandir(out_path) if _.is_file())

pattern = r'cat_(\d+)\_bmi_config_cfe.json'
basin_ids_in_py_config = []
for filename in os.listdir(out_path):
    match = re.match(pattern, filename)
    if match:
        basin_id = match.group(1)
        basin_ids_in_py_config.append(basin_id)
        
print(f"Number of config files that 2022 team received from Luciana: {file_count}")
common_gauges = set(basin_ids_in_py_config) & set(basin_list_str)
missing_gauges = set(basin_list_str) - set(basin_ids_in_py_config)
print(f"Number of the gauges matching with our basin of interest: {len(common_gauges)}/516")

if len(common_gauges) != 516:
    warnings.warn("Config missing in either of the directories")
    print(f"missing gauges are: {missing_gauges}")
else:
    None


Number of config files that 2022 team received from Luciana: 1
Number of the gauges matching with our basin of interest: 1/516
missing gauges are: {'04127997', '01078000', '07167500', '01181000', '12451000', '12092000', '03076600', '07291000', '01552500', '02198100', '01639500', '01134500', '07315200', '01567500', '06350000', '08269000', '12035000', '03186500', '08023080', '07197000', '01539000', '03460000', '14138900', '14137000', '04040500', '07335700', '01139800', '08200000', '12048000', '01054200', '06853800', '06878000', '14158790', '02143040', '05584500', '02216180', '02349900', '08150800', '11476600', '02363000', '02472500', '02193340', '03237500', '01532000', '04213075', '08050800', '03368000', '02371500', '02069700', '05593575', '03488000', '02096846', '06917000', '01439500', '03463300', '01415000', '01440000', '02479560', '11176400', '01583500', '01137500', '02430085', '02177000', '01170100', '14309500', '05503800', '08086290', '06876700', '07261000', '01123000', '04216418', 

