# Model Loop Customization of Output
Until a more sophisticated "model obvserver" functionality is developed, it's easy enough to customize
output in the `model.run()` loop. One can subset in space (or time) and write to disk at desired 
times. Here, we'll grab a specific locations of both segments and HRUs at each time into an xarray 
Dataset which we'll then write out to disk at the end. If the we `model.initialize_netcdf(run_dir)`
then we can compare our selected output against the full output. But turning off default output
results in significant speedups, at least for a 2 year Delaware River Basin run and the subset of
data specified, the timings are as follows: 

| calc_method | model.initialize_netcdf | time (m:ss) |
| ------------|-------------------------|-------------|
| numpy       | True                    | 0:31.6      |
| numpy       | False                   | 0:29.6      |
| numba       | True                    | 0:17.4      |
| numba       | False                   | 0:10.7      |


In [None]:
from itertools import product
import math
import pathlib as pl
import shutil
import time

import numpy as np
import pywatershed as pws
from tqdm import tqdm
import xarray as xr

Set up a full NHM model on the DRB domain.

In [None]:
model_components = [
    pws.PRMSSolarGeometry,
    pws.PRMSAtmosphere,
    pws.PRMSCanopy,
    pws.PRMSSnow,
    pws.PRMSRunoff,
    pws.PRMSSoilzone,
    pws.PRMSGroundwater,
    pws.PRMSChannel,
]

domain_name = 'drb_2yr'
domain_dir = pl.Path(f'../test_data/{domain_name}')
run_dir = pl.Path('.') / 'model_loop_custom_output'

if run_dir.exists():
    shutil.rmtree(run_dir)

run_dir.mkdir()
print(f'\nRunning domain "{domain_name}" in {run_dir.resolve()}')

In [None]:
model_initialize_netcdf = False
calc_method = 'numba'

In [None]:
cases = list(product([True, False], ['numpy', 'numba']))

In [None]:
for model_output_netcdf, calc_method in cases:

    start_time = time.time()
    custom_output_file = run_dir / "model_custom_output.nc"
    
    param_file = domain_dir / "myparam.param"
    params = pws.parameters.PrmsParameters.load(param_file)
    control = pws.Control.load_prms(domain_dir / "nhm.control", warn_unused_options=False)
    
    #Sets control options for both cases
    control.options = control.options | {
        "input_dir": domain_dir,
        "budget_type": None,
        "verbosity": 0,
        "calc_method": calc_method,
    }
    
    if model_output_netcdf:
        control.options = control.options | {
            "netcdf_output_var_names": [
                "hru_actet",
                #"potet",
                "tmaxf",
                "sroff_vol",
                "ssres_flow_vol",
                "gwres_flow_vol",
                "seg_outflow",
                "hru_streamflow_out",
                "recharge",
                "snowcov_area", 
                "soil_rechr",
                #"hru_actet",
                "net_rain",
                "net_snow",
                #"net_ppt",
                #"sroff",
                #"ssres_flow",
                "gwres_flow",
                #"seg_outflow",
                #"hru_streamflow_out",
                #"recharge",
                #"gwres_sink",
                "snowmelt",
            ],
            "netcdf_output_dir": run_dir,
        }
    else:
        control.options = control.options | {
            "netcdf_output_var_names": None,
            "netcdf_output_dir": None,
        }
    
    model = pws.Model(
        [
            pws.PRMSSolarGeometry,
            pws.PRMSAtmosphere,
            pws.PRMSCanopy,
            pws.PRMSSnow,
            pws.PRMSRunoff,
            pws.PRMSSoilzone,
            pws.PRMSGroundwater,
            pws.PRMSChannel,
        ],
        control=control,
        parameters=params,
    )
    
    # Custom model output at selected spatial locations for all times.
    # Generally, i'd be careful with xarray performance, but just writing at the
    # end should be fine.
    # Could move to netcdf4 if performance is a concern.
    
    # /////////////////////////////////
    # specfications: what we want this to look like to the user
    var_list = [
        "hru_actet",
        #"potet",
        "tmaxf",
        "seg_outflow",
        "hru_actet",
        "recharge",
        "snowcov_area",
        "soil_rechr",
        "net_rain",
        "net_snow",
        #"net_ppt",
        #"sroff",# values in inches for area weighted averaging
        #"ssres_flow",# values in inches for area weighted averaging
        "gwres_flow",# values in inches for area weighted averaging
        #"gwres_sink",
        "snowmelt",
    ]
    
    
    # want seg_outflow just on poi_gages
    # make it a tuple like the return of np.where
    wh_gages = (params.parameters["poi_gage_segment"] - 1,)# - 1 is related to the indexing in fortran; made a a tuple see above
    spatial_subsets = {
        "poi_gages": {
            "coord_name": "nhm_seg",
            "indices": wh_gages,
            "new_coord": params.parameters["poi_gage_id"],
            "variables": ["seg_outflow", "seg_gwflow"],#can add any other var with same coord here, eg. seg_gwflow/
        },
    }
    
    
    # A novel, diagnostic variable
    def sum_hru_flows(sroff_vol, ssres_flow_vol, gwres_flow_vol): #These vars used to calc, do not need to be in the var list
        return sroff_vol + ssres_flow_vol + gwres_flow_vol
    
    
    diagnostic_var_dict = {
        "hru_streamflow_out": {
            "inputs": ["sroff_vol", "ssres_flow_vol", "gwres_flow_vol"],
            "function": sum_hru_flows,
            "like_var": "sroff_vol",
            "metadata": {"desc": "Total volume to stream network from each HRU", "units": "cubic feet"},
        },
    }
    
    # TODO: specify subsets in time
    # TODO: specify different output files
    
    # /////////////////////////////////
    # code starts here
    
    out_subset_ds = xr.Dataset()
    
    needed_vars = var_list + [
        var for key, val in diagnostic_var_dict.items() for var in val["inputs"]
    ]
    needed_metadata = pws.meta.get_vars(needed_vars)
    dims = set([dim for val in needed_metadata.values() for dim in val["dims"]])
    
    subset_vars = [
        var for key, val in spatial_subsets.items() for var in val["variables"]
    ]
    
    var_subset_key = {
        var: subkey
        for var in subset_vars
        for subkey in spatial_subsets.keys()
        if var in spatial_subsets[subkey]["variables"]
    }
    
    diagnostic_vars = list(diagnostic_var_dict.keys())
    
    # solve the processes for each variable
    var_proc = {
        var: proc_key
        for var in needed_vars
        for proc_key, proc_val in model.processes.items()
        if var in proc_val.get_variables()
    }
    
    time_coord = np.arange(
        control.start_time, control.end_time + control.time_step, dtype="datetime64[D]"
    )
    n_time_steps = len(time_coord)
    out_subset_ds["time"] = xr.Variable(["time"], time_coord)
    out_subset_ds = out_subset_ds.set_coords("time")
    
    # annoying to have to hard-code this
    dim_coord = {"nhru": "nhm_id", "nsegment": "nhm_seg"}
    
    ####################################################################################
    # declare memory for the outputs
    for var in var_list + diagnostic_vars:
        # impostor approach
        orig_diag_var = None
        if var in diagnostic_vars:
            orig_diag_var = var
            var = diagnostic_var_dict[var]["like_var"]
    
        proc = model.processes[var_proc[var]]
        dim_name = needed_metadata[var]["dims"][0]
        dim_len = proc._params.dims[dim_name]
        coord_name = dim_coord[dim_name]
        coord_data = proc._params.coords[dim_coord[dim_name]]
        type = needed_metadata[var]["type"]
    
        var_meta = {
            kk: vv
            for kk, vv in needed_metadata[var].items()
            if kk in ["desc", "units"]
        }
    
        if orig_diag_var is not None:
            var = orig_diag_var
            del var_meta["desc"]
            if "metadata" in diagnostic_var_dict[var]:
                var_meta = diagnostic_var_dict[var]["metadata"]
            if "desc" not in var_meta.keys():
                var_meta["desc"] = "Custom output diagnostic variable"
    
        if var in subset_vars:
            subset_key = var_subset_key[var]
            subset_info = spatial_subsets[subset_key]
            dim_name = f"n{subset_key}"
            coord_name = subset_key
            dim_len = len(subset_info["indices"][0])
            coord_data = subset_info["new_coord"]
    
        if coord_name not in list(out_subset_ds.variables):
            out_subset_ds[coord_name] = xr.DataArray(coord_data, dims=[dim_name])
            out_subset_ds = out_subset_ds.set_coords(coord_name)
    
        out_subset_ds[var] = xr.Variable(
            ["time", dim_name],
            np.full(
                [n_time_steps, dim_len],
                pws.constants.fill_values_dict[np.dtype(type)],
                type,
            ),
        )
    
        out_subset_ds[var].attrs = var_meta
    
    for istep in range(n_time_steps):
        model.advance()
        model.calculate()
    
        if model_output_netcdf:
            model.output()
    
        for var in var_list:
            proc = model.processes[var_proc[var]]
            data = proc[var]
            if isinstance(proc[var], pws.base.timeseries.TimeseriesArray):
                data = data.current
            if var not in subset_vars:
                out_subset_ds[var][istep, :] = data
            else:
                indices = spatial_subsets[var_subset_key[var]]["indices"]
                out_subset_ds[var][istep, :] = data[indices]
    
        for diag_key, diag_val in diagnostic_var_dict.items():
            input_dict = {}
            for ii in diag_val["inputs"]:
                proc = model.processes[var_proc[ii]]
                input_dict[ii] = proc[ii]
    
            out_subset_ds[diag_key][istep, :] = diag_val["function"](**input_dict)#this is where the diag_var is actually being calc'd/time step
    
    
    out_subset_ds.to_netcdf(custom_output_file)
    out_subset_ds.close()
    
    del proc
    del input_dict
    del model
    del out_subset_ds

    duration = time.time()-start_time
    print(f"({model_output_netcdf=}, {calc_method=}): {duration:.3f}")
    

Check the output

In [None]:
out_subset_ds = xr.open_dataset(custom_output_file)

for vv in var_list:
    default_output_file = run_dir / f"{vv}.nc"
    print("checking variable: ", vv)
    answer = xr.load_dataarray(default_output_file)
    
    result = out_subset_ds[vv]

    if vv in subset_vars:
        indices = spatial_subsets[var_subset_key[vv]]["indices"]
        answer = answer[:, indices[0]]

    np.testing.assert_allclose(answer, result)
    answer.close()

for diag_key, diag_val in diagnostic_var_dict.items():
    print("checking diagnostic variable: ", diag_key)
    input_dict = {}
    for ii in diag_val["inputs"]:
        default_output_file = run_dir / f"{ii}.nc"
        input_dict[ii] = xr.load_dataarray(default_output_file)

    answer = diag_val["function"](**input_dict)
    result = out_subset_ds[diag_key]

    np.testing.assert_allclose(answer, result)
    
out_subset_ds.close()

In [None]:
shutil.rmtree(run_dir)