In [1]:
# Import the required packages.

# Need to install TEEHR to avoid this
import sys
import os
sys.path.insert(0, "../../src")

from dask.distributed import Client
from pathlib import Path

# import teehr.loading.nwm22.nwm_grid_data as tlg  # For NWM data before 2023-09-19
import teehr.loading.nwm30.nwm_grid_data as tlg  # For NWM data after and including 2023-09-19

import teehr.utilities.generate_weights as gw
from teehr.loading.nwm22.const_nwm import CONUS_NWM_WKT

In [2]:
# Set some notebook variables to point to the relevant study files.
TEMP_GEO_DIR = Path(Path.home(), "temp/geo")
TEMP_GEO_DIR.mkdir(exist_ok=True, parents=True)

# Generate weights
# fetch "https://storage.googleapis.com/national-water-model/nwm.20220101/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc"
GRID_TEMPLATE_FILE = Path(TEMP_GEO_DIR, "nwm.t00z.short_range.forcing.f001.conus.nc")

# fetch "https://nextgen-hydrofabric.s3.amazonaws.com/v1.2/nextgen_03S.gpkg"
ZONE_GEO_FILE = Path(TEMP_GEO_DIR, "nextgen_03S.gpkg")
ZONAL_WEIGHTS_FILEPATH = Path(TEMP_GEO_DIR, "nextgen_03S_weights.parquet")
UNIQUE_ZONE_ID = "id"

# NWM
CONFIGURATION = "forcing_short_range"  # forcing_short_range, forcing_analysis_assim, forcing_medium_range
OUTPUT_TYPE = "forcing"
VARIABLE_NAME = "RAINRATE"

START_DATE = "2020-12-18"
INGEST_DAYS = 1

JSON_DIR = Path(Path.home(), "temp/parquet/jsons/")
OUTPUT_DIR = Path(Path.home(), "temp/parquet")

CONCAT_DIMS = ["time"]  # "reference_time"
T_MINUS = [0, 1, 2]  # Only used if an assimilation run is selected
IGNORE_MISSING_FILE = True  # If True, the missing file(s) will be skipped and the process will resume
                            # If False, TEEHR will fail if a missing NWM file is encountered
OVERWRITE_OUTPUT = True  # If True (default), existing output files will be overwritten
                         # If False, existing files are retained

In [3]:
# Fetch template file
!wget -O /home/jovyan/temp/geo/nwm.t00z.short_range.forcing.f001.conus.nc \
https://storage.googleapis.com/national-water-model/nwm.20220101/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc

--2023-11-21 16:39:32--  https://storage.googleapis.com/national-water-model/nwm.20220101/forcing_short_range/nwm.t00z.short_range.forcing.f001.conus.nc
Resolving storage.googleapis.com (storage.googleapis.com)... 142.250.80.59, 142.250.80.91, 142.250.80.123, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.250.80.59|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 145568599 (139M) [application/x-netcdf]
Saving to: ‘/home/jovyan/temp/geo/nwm.t00z.short_range.forcing.f001.conus.nc’


2023-11-21 16:39:39 (23.2 MB/s) - ‘/home/jovyan/temp/geo/nwm.t00z.short_range.forcing.f001.conus.nc’ saved [145568599/145568599]



In [4]:
# Fetch example polygons
!wget -O /home/jovyan/temp/geo/nextgen_03S.gpkg https://lynker-spatial.s3.amazonaws.com/v20/gpkg/nextgen_03S.gpkg

--2023-11-21 16:40:04--  https://lynker-spatial.s3.amazonaws.com/v20/gpkg/nextgen_03S.gpkg
Resolving lynker-spatial.s3.amazonaws.com (lynker-spatial.s3.amazonaws.com)... 52.92.196.41, 52.92.234.73, 52.92.194.201, ...
Connecting to lynker-spatial.s3.amazonaws.com (lynker-spatial.s3.amazonaws.com)|52.92.196.41|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 60043264 (57M) [application/octet-stream]
Saving to: ‘/home/jovyan/temp/geo/nextgen_03S.gpkg’


2023-11-21 16:40:09 (15.5 MB/s) - ‘/home/jovyan/temp/geo/nextgen_03S.gpkg’ saved [60043264/60043264]



In [5]:
n_workers = max(os.cpu_count() - 1, 1)
client = Client(n_workers=n_workers)
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 7
Total threads: 14,Total memory: 19.52 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:45387,Workers: 7
Dashboard: http://127.0.0.1:8787/status,Total threads: 14
Started: Just now,Total memory: 19.52 GiB

0,1
Comm: tcp://127.0.0.1:34531,Total threads: 2
Dashboard: http://127.0.0.1:41463/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:41143,
Local directory: /tmp/dask-scratch-space/worker-37_9mvjx,Local directory: /tmp/dask-scratch-space/worker-37_9mvjx

0,1
Comm: tcp://127.0.0.1:32985,Total threads: 2
Dashboard: http://127.0.0.1:33749/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:36965,
Local directory: /tmp/dask-scratch-space/worker-e7appw0n,Local directory: /tmp/dask-scratch-space/worker-e7appw0n

0,1
Comm: tcp://127.0.0.1:35403,Total threads: 2
Dashboard: http://127.0.0.1:37301/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:44855,
Local directory: /tmp/dask-scratch-space/worker-9jaz0qkg,Local directory: /tmp/dask-scratch-space/worker-9jaz0qkg

0,1
Comm: tcp://127.0.0.1:46627,Total threads: 2
Dashboard: http://127.0.0.1:40321/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:37105,
Local directory: /tmp/dask-scratch-space/worker-0xgyglpn,Local directory: /tmp/dask-scratch-space/worker-0xgyglpn

0,1
Comm: tcp://127.0.0.1:42419,Total threads: 2
Dashboard: http://127.0.0.1:46817/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:44729,
Local directory: /tmp/dask-scratch-space/worker-1izsws3t,Local directory: /tmp/dask-scratch-space/worker-1izsws3t

0,1
Comm: tcp://127.0.0.1:41241,Total threads: 2
Dashboard: http://127.0.0.1:34981/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:39333,
Local directory: /tmp/dask-scratch-space/worker-o2llbw2f,Local directory: /tmp/dask-scratch-space/worker-o2llbw2f

0,1
Comm: tcp://127.0.0.1:38073,Total threads: 2
Dashboard: http://127.0.0.1:33587/status,Memory: 2.79 GiB
Nanny: tcp://127.0.0.1:42739,
Local directory: /tmp/dask-scratch-space/worker-mgulsgzq,Local directory: /tmp/dask-scratch-space/worker-mgulsgzq


In [6]:
%%time
gw.generate_weights_file(
    zone_polygon_filepath=ZONE_GEO_FILE,
    template_dataset=GRID_TEMPLATE_FILE,
    variable_name=VARIABLE_NAME,
    output_weights_filepath=ZONAL_WEIGHTS_FILEPATH,
    crs_wkt=CONUS_NWM_WKT,
    unique_zone_id=UNIQUE_ZONE_ID,
    layer="divides"
)

  for s, v in _shapes(source, mask, connectivity, transform):


CPU times: user 10.5 s, sys: 2.28 s, total: 12.8 s
Wall time: 23.9 s


In [7]:
%%time
tlg.nwm_grids_to_parquet(CONFIGURATION,
                         OUTPUT_TYPE,
                         VARIABLE_NAME,
                         START_DATE,
                         INGEST_DAYS,
                         ZONAL_WEIGHTS_FILEPATH,
                         JSON_DIR,
                         OUTPUT_DIR,
                         T_MINUS,
                         IGNORE_MISSING_FILE,
                         OVERWRITE_OUTPUT)

CPU times: user 1min 17s, sys: 32.1 s, total: 1min 49s
Wall time: 21min 24s
