# Runoff mass balance errors


In [1]:
# auto-format the code in this notebook
%load_ext jupyter_black

## Setup

In [2]:
import pathlib as pl
from pprint import pprint
from shutil import rmtree, copy2

import hvplot.xarray  # noqa
from IPython.display import display
import numpy as np
import pywatershed as pws
import xarray as xr

In [3]:
domain_name = "drb_2yr"
pws_root = pws.constants.__pywatershed_root__
domain_dir = pws_root / f"../test_data/{domain_name}"
nb_output_dir = pl.Path("./runoff_errors")
nb_output_dir.mkdir(exist_ok=True)
skip_if_exists_prms_mixed = True
skip_if_exists_prms_double = True
skip_if_exists_pws = True

## Run PRMS mixed and double precision runs and convert to netcdf

In [4]:
bin_dir = pws_root / "../prms_src/prms5.2.1/bin/"
bin_mixed = bin_dir / "prms_521_mixed_mac_m1_intel"
bin_double = bin_dir / "prms_521_double_mac_m1_intel"

In [5]:
def run_prms(binary: pl.Path, run_dir: pl.Path, skip_if_exists=False):
    import shlex
    import subprocess

    from pywatershed import CsvFile, Soltab

    from pywatershed.parameters import PrmsParameters

    params = pws.parameters.PrmsParameters.load(
        domain_dir / "myparam.param"
    ).parameters

    if skip_if_exists and run_dir.exists():
        print(
            f"Run ({run_dir}) already exists and skip_if_exists=True. Using existing run."
        )
        return None

    run_dir.mkdir()  # must not exist, on user to delete
    copy2(binary, run_dir / binary.name)
    for ff in [
        "control.test",
        "myparam.param",
        "tmax.cbh",
        "tmin.cbh",
        "prcp.cbh",
        "sf_data",
    ]:
        copy2(domain_dir / ff, run_dir / ff)

    output_dir = run_dir / "output"
    output_dir.mkdir()

    exe_command = f"time ./{binary.name} control.test -MAXDATALNLEN 60000 2>&1 | tee run.log"
    result = subprocess.run(
        exe_command,
        shell=True,
        # stdout = subprocess.PIPE,
        stderr=subprocess.STDOUT,
        universal_newlines=True,
        cwd=run_dir,
    )

    # convert to netcdf
    # could make these arguments
    chunking = {
        "time": 0,
        "doy": 0,
        "nhm_id": 100,
        "nhm_seg": 100,
    }

    output_csvs = output_dir.glob("*.csv")
    for cc in output_csvs:
        if cc.name in ["stats.csv"]:
            continue
        nc_path = cc.with_suffix(".nc")
        CsvFile(cc).to_netcdf(nc_path, chunk_sizes=chunking)

    # previous and change variables
    for vv in [
        "pk_ice",
        "freeh2o",
        "soil_moist",
        "hru_impervstor",
        "dprst_stor_hru",
    ]:
        data = xr.open_dataset(output_dir / f"{vv}.nc")[vv]
        prev_da = data.copy()
        prev_da[:] = np.roll(prev_da.values, 1, axis=0)
        assert (prev_da[1:, :].values == data[0:-1, :].values).all()
        prev_da[0, :] = np.zeros(1)[
            0
        ]  # np.nan better but causes plotting to fail
        change_da = data - prev_da
        prev_da.rename(f"{vv}_prev").to_dataset().to_netcdf(
            output_dir / f"{vv}_prev.nc"
        )
        data[f"{vv}_prev"] = xr.open_dataset(output_dir / f"{vv}_prev.nc")[
            f"{vv}_prev"
        ]

        change_da.rename(f"{vv}_change").to_dataset().to_netcdf(
            output_dir / f"{vv}_change.nc"
        )
        data[f"{vv}_change"] = xr.open_dataset(output_dir / f"{vv}_change.nc")[
            f"{vv}_change"
        ]

    # through_rain
    dep_vars = [
        "pk_ice_prev",
        "freeh2o_prev",
        "newsnow",
        "pptmix_nopack",
        "net_rain",
    ]
    data = {}
    for vv in dep_vars:
        data[vv] = xr.open_dataset(output_dir / f"{vv}.nc")[vv]

    zero = np.zeros([1])[0]
    epsilon64 = np.finfo(zero).eps
    epsilon32 = np.finfo(zero.astype("float32")).eps

    wh_through = (
        ((data["pk_ice_prev"] + data["freeh2o_prev"]) <= epsilon64)
        & ~(data["newsnow"] == 1)
    ) | (data["pptmix_nopack"] == 1)

    through_rain = data["net_rain"].copy()
    through_rain[:] = np.where(wh_through, data["net_rain"], zero)

    through_rain.to_dataset(name="through_rain").to_netcdf(
        output_dir / "through_rain.nc"
    )
    through_rain.close()

    # infil_hru
    imperv_frac = params["hru_percent_imperv"]
    dprst_frac = params["dprst_frac"]
    perv_frac = 1.0 - imperv_frac - dprst_frac
    da = xr.open_dataset(output_dir / "infil.nc")["infil"].rename("infil_hru")
    da *= perv_frac
    da.to_dataset().to_netcdf(output_dir / "infil_hru.nc")
    da.close()

In [6]:
run_prms(
    bin_mixed,
    nb_output_dir / "prms_mixed_run",
    skip_if_exists=skip_if_exists_prms_mixed,
)




                         U.S. Geological Survey
               Precipitation-Runoff Modeling System (PRMS)
                        Version 5.2.1 02/08/2022

        Process            Available Modules
--------------------------------------------------------------------
  Basin Definition: basin
    Cascading Flow: cascade
  Time Series Data: obs, water_use_read, dynamic_param_read
   Potet Solar Rad: soltab
  Temperature Dist: temp_1sta, temp_laps, temp_dist2, climate_hru,
                    temp_map
       Precip Dist: precip_1sta, precip_laps, precip_dist2,
                    climate_hru, precip_map
Temp & Precip Dist: xyz_dist, ide_dist
    Solar Rad Dist: ccsolrad, ddsolrad, climate_hru
Transpiration Dist: transp_tindex, climate_hru, transp_frost
      Potential ET: potet_hamon, potet_jh, potet_pan, climate_hru,
                    potet_hs, potet_pt, potet_pm, potet_pm_sta
      Interception: intcp
Snow & Glacr Dynam: snowcomp, glacr_melt
    Surface Runoff: srunoff_smidx, s

In [7]:
# %debug

In [8]:
prms_dbl_run_dir = nb_output_dir / "prms_double_run"
run_prms(
    bin_double, prms_dbl_run_dir, skip_if_exists=skip_if_exists_prms_double
)




                         U.S. Geological Survey
               Precipitation-Runoff Modeling System (PRMS)
                        Version 5.2.1 02/08/2022

        Process            Available Modules
--------------------------------------------------------------------
  Basin Definition: basin
    Cascading Flow: cascade
  Time Series Data: obs, water_use_read, dynamic_param_read
   Potet Solar Rad: soltab
  Temperature Dist: temp_1sta, temp_laps, temp_dist2, climate_hru,
                    temp_map
       Precip Dist: precip_1sta, precip_laps, precip_dist2,
                    climate_hru, precip_map
Temp & Precip Dist: xyz_dist, ide_dist
    Solar Rad Dist: ccsolrad, ddsolrad, climate_hru
Transpiration Dist: transp_tindex, climate_hru, transp_frost
      Potential ET: potet_hamon, potet_jh, potet_pan, climate_hru,
                    potet_hs, potet_pt, potet_pm, potet_pm_sta
      Interception: intcp
Snow & Glacr Dynam: snowcomp, glacr_melt
    Surface Runoff: srunoff_smidx, s

## Run pywatershed run forced with output from PRMS double precision run

In [9]:
process = [pws.PRMSRunoff]

pws_run_dir = nb_output_dir / "pws_run"
input_dir = pws_run_dir / "pws_input"

In [10]:
control = pws.Control.load(domain_dir / "control.test")
output_dir = pws_run_dir / "output"
control.options = control.options | {
    "input_dir": input_dir,
    "budget_type": "warn",
    "calc_method": "numpy",
    "netcdf_output_dir": output_dir,
}
params = pws.parameters.PrmsParameters.load(domain_dir / "myparam.param")

In [11]:
if output_dir.exists() and skip_if_exists_pws:
    print(
        f"Output ({output_dir}) already exists and skip_if_exists=True. Using existing run."
    )

else:
    input_dir.mkdir(exist_ok=True, parents=True)
    for ff in prms_dbl_run_dir.glob("*.nc"):
        copy2(ff, input_dir / ff.name)
    for ff in (prms_dbl_run_dir / "output").glob("*.nc"):
        copy2(ff, input_dir / ff.name)

    submodel = pws.Model(
        process,
        control=control,
        parameters=params,
    )

    submodel.run(finalize=True)

  0%|          | 0/731 [00:00<?, ?it/s]



model.run(): finalizing


In [12]:
for vv in process[0].get_variables():
    print(vv)
    assert (output_dir / f"{vv}.nc").exists()
    try:
        assert (input_dir / f"{vv}.nc").exists()
    except:
        print(f"********** {vv} not in input_dir")

contrib_fraction
infil
infil_hru
sroff
sroff_vol
********** sroff_vol not in input_dir
hru_sroffp
hru_sroffi
imperv_stor
********** imperv_stor not in input_dir
imperv_evap
********** imperv_evap not in input_dir
hru_impervevap
hru_impervstor
hru_impervstor_old
********** hru_impervstor_old not in input_dir
hru_impervstor_change
dprst_vol_frac
********** dprst_vol_frac not in input_dir
dprst_vol_clos
********** dprst_vol_clos not in input_dir
dprst_vol_open
********** dprst_vol_open not in input_dir
dprst_vol_clos_frac
********** dprst_vol_clos_frac not in input_dir
dprst_vol_open_frac
********** dprst_vol_open_frac not in input_dir
dprst_area_clos
********** dprst_area_clos not in input_dir
dprst_area_open
********** dprst_area_open not in input_dir
dprst_area_clos_max
********** dprst_area_clos_max not in input_dir
dprst_area_open_max
********** dprst_area_open_max not in input_dir
dprst_sroff_hru
dprst_seep_hru
dprst_evap_hru
dprst_insroff_hru
dprst_stor_hru
dprst_stor_hru_old
*****

## Start by comparing the budget variables

In [13]:
budget_terms = process[0].get_mass_budget_terms()

In [14]:
# additional variables
budget_terms["outputs"] += [
    "dprst_insroff_hru",
    "dprst_stor_hru",
]

In [15]:
comparisons = {}
for term, vars in budget_terms.items():
    if term == "inputs":
        continue
    print(term)
    for vv in vars:
        print("    ", vv)

        pws_file = output_dir / f"{vv}.nc"
        assert (pws_file).exists()
        pws_ds = xr.open_dataset(pws_file)[vv].rename("pws")

        prms_file = input_dir / f"{vv}.nc"
        assert (prms_file).exists()
        prms_ds = xr.open_dataset(prms_file)[vv].rename("prms")

        comparisons[vv] = xr.merge([pws_ds, prms_ds])

outputs
     hru_sroffi
     hru_sroffp
     dprst_sroff_hru
     infil_hru
     hru_impervevap
     dprst_seep_hru
     dprst_evap_hru
     dprst_insroff_hru
     dprst_stor_hru
storage_changes
     hru_impervstor_change
     dprst_stor_hru_change


In [16]:
# comparisons

In [17]:
def plot_var(var_name, diff=False, nhm_id: list = None):
    from textwrap import fill

    # lines = textwrap.wrap(text, width, break_long_words=False)
    meta = pws.meta.find_variables(var_name)[var_name]
    ylabel = f"{fill(meta['desc'], 40)}\n({meta['units']})"
    title = var_name
    ds = comparisons[var_name]

    if diff:
        ds = ds.copy()
        ds["error"] = ds["pws"] - ds["prms"]
        ds["relative_error"] = ds["error"] / ds["prms"]
        del ds["pws"], ds["prms"]
        ylabel = "Difference PWS - PRMS\n" + ylabel
        title = "ERRORS: Difference in " + title

    if (nhm_id is not None) and (len(nhm_id) > 0):
        ds = ds.where(ds.nhm_id.isin(nhm_id), drop=True)

    display(
        ds.hvplot(
            frame_width=700,
            groupby="nhm_id",
            title=title,
            ylabel=ylabel,
            # fontsize={"ylabel": "9px"},
        )
    )

In [21]:
def var_close(var_name):
    print(var_name)
    var_ds = comparisons[var_name]
    abs_diff = abs(var_ds["pws"] - var_ds["prms"])
    rel_abs_diff = abs_diff / var_ds["prms"]
    rtol = atol = 1.0e-6
    close = (abs_diff < atol) | (rel_abs_diff < rtol)
    if close.all():
        plot_var(var_name, diff=False)

    else:
        wh_not_close = np.where(~close)
        nhm_ids = abs_diff.nhm_id[wh_not_close[1]]
        plot_var(var_name, diff=True, nhm_id=nhm_ids)

    return

In [22]:
var_close("hru_impervstor_change")

hru_impervstor_change


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


In [23]:
for var_name in comparisons.keys():
    var_close(var_name)

hru_sroffi


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


hru_sroffp


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


dprst_sroff_hru


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


infil_hru


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


hru_impervevap


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


dprst_seep_hru


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


dprst_evap_hru


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


dprst_insroff_hru


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


dprst_stor_hru


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


hru_impervstor_change


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type


dprst_stor_hru_change


  return pd.unique(values)
  return dataset.data.dtypes[idx].type
  return dataset.data.dtypes[idx].type
