In [None]:
# Import the required packages.

# Need to install TEEHR to avoid this
import sys
sys.path.insert(0, "../../src")

from dask.distributed import Client
from pathlib import Path

import teehr.loading.nwm_grid_data as tlg
import teehr.loading.generate_weights as gw

In [None]:
# Set some notebook variables to point to the relevant study files.
TEMP_GEO_DIR = Path(Path.home(), "temp/geo")
TEMP_GEO_DIR.mkdir(exist_ok=True, parents=True)

# Generate weights
# fetch "https://storage.googleapis.com/national-water-model/nwm.20220101/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc"
GRID_TEMPLATE_FILE = Path(TEMP_GEO_DIR, "nwm.t00z.short_range.forcing.f001.conus.nc")

ZONE_GEO_FILE = Path(Path.home(), "shared/rti-eval/org/geo/wbdhu10_conus.parquet")
ZONAL_WEIGHTS_FILEPATH = Path(Path.home(), "temp/geo/wbdhuc10_medium_range_weights.parquet")

# NWM
RUN = "forcing_medium_range"  # forcing_short_range, forcing_analysis_assim
OUTPUT_TYPE = "forcing"
VARIABLE_NAME = "RAINRATE"

START_DATE = "2020-12-18" 
INGEST_DAYS = 1

JSON_DIR = Path(Path.home(), "temp/parquet/jsons/")
OUTPUT_DIR = Path(Path.home(), "temp/parquet")

CONCAT_DIMS = ["time"]  # "reference_time"
T_MINUS = [0, 1, 2]  # Only used if an assimilation run is selected

# -o /home/jovyan/temp/geo/nwm.t00z.short_range.forcing.f001.conus.nc

In [None]:
# Fetch template file
!wget -O /home/jovyan/temp/geo/nwm.t00z.short_range.forcing.f001.conus.nc \
https://storage.googleapis.com/national-water-model/nwm.20220101/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc

In [4]:
# Fetch example polygons
!wget -O /home/jovyan/temp/geo/nextgen_02.gpkg https://nextgen-hydrofabric.s3.amazonaws.com/v1.2/nextgen_02.gpkg

--2023-05-10 12:56:27--  https://nextgen-hydrofabric.s3.amazonaws.com/v1.2/nextgen_02.gpkg
Resolving nextgen-hydrofabric.s3.amazonaws.com (nextgen-hydrofabric.s3.amazonaws.com)... 52.217.122.121, 3.5.29.70, 54.231.226.57, ...
Connecting to nextgen-hydrofabric.s3.amazonaws.com (nextgen-hydrofabric.s3.amazonaws.com)|52.217.122.121|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 139939840 (133M) [binary/octet-stream]
Saving to: ‘/home/sam/temp/nextgen_02.gpkg’


2023-05-10 12:56:32 (34.4 MB/s) - ‘/home/sam/temp/nextgen_02.gpkg’ saved [139939840/139939840]



In [None]:
gw.generate_weights_file(
    zone_polygon_filepath=str(ZONE_GEO_FILE),
    template_dataset=str(GRID_TEMPLATE_FILE),
    variable_name="RAINRATE",
    output_weights_filepath=str(ZONAL_WEIGHTS_FILEPATH),
)

In [None]:
client = Client(n_workers=16)
client

In [None]:
%%time
tlg.nwm_grids_to_parquet(RUN,
                         OUTPUT_TYPE,
                         VARIABLE_NAME,
                         START_DATE,
                         INGEST_DAYS,
                         ZONAL_WEIGHTS_FILEPATH,
                         JSON_DIR,
                         OUTPUT_DIR,
                         T_MINUS)