# Preprocess CBH forcing files.

This notebook demonstrates 2 steps in preprocessing CBH file: 1) conversion to netCDF, 2) applying parameter adjustments (with and without soltab input files).

In [None]:
import pathlib as pl
from pprint import pprint
import shutil

import numpy as np

## 1. Convert CBH files to netcdf

In [None]:
from pywatershed.parameters import PrmsParameters
from pywatershed.utils.cbh_utils import cbh_files_to_netcdf

dom_name = "drb_2yr"
dom_dir = pl.Path(f"../test_data/{dom_name}")
cbh_nc_dir = pl.Path(".") / f"{dom_name}_cbh_files"

param_file = dom_dir / "myparam.param"
control_file = dom_dir / "control.test"

params = PrmsParameters.load(dom_dir / "myparam.param")

cbh_files = {
    "prcp": dom_dir / "prcp.cbh",
    "tmax": dom_dir / "tmax.cbh",
    "tmin": dom_dir / "tmin.cbh",
}

if cbh_nc_dir.exists():
    shutil.rmtree(cbh_nc_dir)
cbh_nc_dir.mkdir()  # this should not exist, it should be deleted a the end

for kk, vv in cbh_files.items():
    out_file = cbh_nc_dir / f"{kk}.nc"
    cbh_files_to_netcdf({kk: vv}, params, out_file)

print(sorted(cbh_nc_dir.glob("*.nc")))

## 2. Apply PRMS parameters to input CBH data to get forcings used by the hydrologic components of the model.

When a `PRMSAtmosphere` object is initalized with a `netcdf_output_dir` argument, the adjusted forcings 
are written to this location. Unless one requests specific variables only, all variables are written. 

Typically, the `soltab_potsw.nc` and `soltab_horad_potsw.nc` input files are not available as inputs. 
(These are only output in a fixed width format by a version of PRMS5.2.1 in the pynhm repository
that is translated to netCDF when setting up test data). First it is shown how to get the CBH adjustments
to output files using PRMSSolarGeometry instead of soltab files. Second is shown how to use available
soltab files.

In [None]:
from pywatershed import Control, PRMSAtmosphere, PRMSSolarGeometry

From `help(PRMSAtmosphere)`:

```
Help on class PRMSAtmosphere in module pywatershed.atmosphere.PRMSAtmosphere:

class PRMSAtmosphere(pywatershed.base.storageUnit.StorageUnit)
 |  PRMSAtmosphere(
        control: pywatershed.base.control.Control, 
        prcp: Union[str, pathlib.Path], 
        tmax: Union[str, pathlib.Path], 
        tmin: Union[str, pathlib.Path], 
        soltab_potsw: Union[str, numpy.ndarray, pywatershed.base.adapter.Adapter], 
        soltab_horad_potsw: Union[str, numpy.ndarray, pywatershed.base.adapter.Adapter], 
        budget_type: str = None, 
        verbose: bool = False, 
        netcdf_output_dir: Union[str, pathlib.Path] = None, 
        netcdf_output_vars: list = None, 
        n_time_chunk: int = -1, 
        load_n_time_batches: int = 1)
```

Thus the required inputs are `control`, `prcp`, `tmax`, `tmin`, `soltab_potsw`, `soltab_horad_potsw`. All but control can be specified as files and the `prcp`, `tmax`, and `tmin` must be specified as files. 

### 2a. If soltab output files are not present
For a test domain, specified above, we can generate a dict to pass as arguments specifying the required files.

In [None]:
input_vars = PRMSAtmosphere.get_inputs()
input_files_w_solar_geom = {}
for var in input_vars:
    if "soltab" in var:
        continue
    nc_pth = cbh_nc_dir / f"{var}.nc"
    input_files_w_solar_geom[var] = nc_pth
pprint(input_files_w_solar_geom)

We can query `PRMSAtmosphere` about its outputs and later we'll use this list to confirm that we get all of its variables as netCDF outputs when None was requested.

In [None]:
output_vars = PRMSAtmosphere.get_variables()
output_dir = pl.Path("preprocess_cbh_adj_files")
output_files = [output_dir / f"{vv}.nc" for vv in output_vars]
pprint(output_files)

Establish a file checking function and functions for achieving initialization of `PRMSAtmosphere` specifing a `netcdf_output_dir`.

In [None]:
def check_output_files(netcdf_output_vars):
    if netcdf_output_vars is None:
        check_files = output_files
    else:
        check_files = [output_dir / f"{vv}.nc" for vv in netcdf_output_vars]

    for ff in check_files:
        print(f"checking {ff} file exists")
        assert ff.exists()
        ff.unlink()
        assert not ff.exists()

    return True


def preprocess_w_solar_geom(input_dict, netcdf_output_vars):
    output_dir.mkdir(exist_ok=True)
    solar_geom = PRMSSolarGeometry(control)
    atm = PRMSAtmosphere(
        control=control,
        **input_dict,
        soltab_horad_potsw=solar_geom.soltab_horad_potsw,
        soltab_potsw=solar_geom.soltab_potsw,
        budget_type=None,
        netcdf_output_dir=output_dir,
        netcdf_output_vars=netcdf_output_vars,
    )
    del atm
    assert check_output_files(netcdf_output_vars)
    shutil.rmtree(output_dir)
    return None

We may also want to exercise control over the variables output to netCDF files. Here we'll speficy two options including the default which is all variables written to netcdf when `None` is used. (An empty list would give the same effect as not specifying a `netcdf_output_dir`.)

In [None]:
netcdf_output_vars_dict = {
    "Reduced output set": ["tmaxc", "tminc", "pptmix"],
    "Full/Default output set": None,
}

Run both output variable sets when only the typical CBH files are available.

In [None]:
# Preprocess using PRMSSolarGeom (soltab netCDF files not available)
control = Control.load(control_file, params=params)
for desc, netcdf_output_vars in netcdf_output_vars_dict.items():
    print(f"{desc}:")
    preprocess_w_solar_geom(input_files_w_solar_geom, netcdf_output_vars)
    print("")

### 2b. If soltab output files are present
We repeat the above, dropping the `PRMSSolarGeometry` object as its information is now coming from the soltab files. 

In [None]:
input_vars = PRMSAtmosphere.get_inputs()
input_files = {}
for var in input_vars:
    if "soltab" in var:
        # These are in dom_dir/output for the test datasets
        nc_pth = dom_dir / f"output/{var}.nc"
    else:
        nc_pth = cbh_nc_dir / f"{var}.nc"

    input_files[var] = nc_pth

pprint(input_files)


def preprocess_w_soltab(input_dict, netcdf_output_vars):
    output_dir.mkdir(exist_ok=True)
    atm = PRMSAtmosphere(
        control=control,
        **input_dict,
        budget_type=None,
        netcdf_output_dir=output_dir,
        netcdf_output_vars=netcdf_output_vars,
    )
    del atm
    assert check_output_files(netcdf_output_vars)
    shutil.rmtree(output_dir)
    return None


# Preprocess when soltab netCDF files are available (not typical)
for desc, netcdf_output_vars in netcdf_output_vars_dict.items():
    print(f"{desc}:")
    preprocess_w_soltab(input_files, netcdf_output_vars)
    print("")

## Clean up

In [None]:
# Clean up the cbh netcdf files that were created at the very beginning
shutil.rmtree(cbh_nc_dir)