In [None]:
# Import the required packages.

# Need to install TEEHR to avoid this
import sys
sys.path.insert(0, "../../src")

from dask.distributed import Client
from pathlib import Path

import teehr.loading.nwm_grid_data as tlg
import teehr.loading.generate_weights as gw

In [None]:
# Set some notebook variables to point to the relevant study files.
TEMP_GEO_DIR = Path(Path.home(), "temp/geo")
TEMP_GEO_DIR.mkdir(exist_ok=True, parents=True)

# Generate weights
# fetch "https://storage.googleapis.com/national-water-model/nwm.20220101/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc"
GRID_TEMPLATE_FILE = Path(TEMP_GEO_DIR, "nwm.t00z.short_range.forcing.f001.conus.nc")

# fetch "https://nextgen-hydrofabric.s3.amazonaws.com/v1.2/nextgen_03S.gpkg"
ZONE_GEO_FILE = Path(TEMP_GEO_DIR, "nextgen_03S.gpkg")
ZONAL_WEIGHTS_FILEPATH = Path(TEMP_GEO_DIR, "nextgen_03S_weights.parquet")
UNIQUE_ZONE_ID = "id"
ZONE_LOADING_ARGS = {"layer": "divides"}

# NWM
RUN = "forcing_medium_range"  # forcing_short_range, forcing_analysis_assim
OUTPUT_TYPE = "forcing"
VARIABLE_NAME = "RAINRATE"

START_DATE = "2020-12-18" 
INGEST_DAYS = 1

JSON_DIR = Path(Path.home(), "temp/parquet/jsons/")
OUTPUT_DIR = Path(Path.home(), "temp/parquet")

CONCAT_DIMS = ["time"]  # "reference_time"
T_MINUS = [0, 1, 2]  # Only used if an assimilation run is selected

In [None]:
# Fetch template file
!wget -O /home/jovyan/temp/geo/nwm.t00z.short_range.forcing.f001.conus.nc \
https://storage.googleapis.com/national-water-model/nwm.20220101/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc

In [None]:
# Fetch example polygons
!wget -O /home/jovyan/temp/geo/nextgen_03S.gpkg https://nextgen-hydrofabric.s3.amazonaws.com/v1.2/nextgen_03S.gpkg

In [None]:
gw.generate_weights_file(
    zone_polygon_filepath=ZONE_GEO_FILE,
    template_dataset=GRID_TEMPLATE_FILE,
    variable_name=VARIABLE_NAME,
    output_weights_filepath=str(ZONAL_WEIGHTS_FILEPATH),
    unique_zone_id=UNIQUE_ZONE_ID,
    read_args=ZONE_LOADING_ARGS
)

In [None]:
client = Client(n_workers=16)
client

In [None]:
%%time
tlg.nwm_grids_to_parquet(RUN,
                         OUTPUT_TYPE,
                         VARIABLE_NAME,
                         START_DATE,
                         INGEST_DAYS,
                         ZONAL_WEIGHTS_FILEPATH,
                         JSON_DIR,
                         OUTPUT_DIR,
                         T_MINUS)