### dHBV2 MERIT Download script

This notebook contains code to download the dHBV merit 2 code locally to be used in routing

NOTE: using .nc as an intermediate is necessary since zarr v2 how this data is saved and zarr v3 is used in DDR

Source: https://zenodo.org/records/15784945

In [None]:
!uv pip install "zarr<3" "s3fs"

In [None]:
import posixpath

import geopandas as gpd
import numpy as np
import pandas as pd
import xarray as xr
import zarr
from tqdm import tqdm

In [None]:
# insert path to MERIT catchment file containing all areas
gdf = gpd.read_file("/projects/mhpi/data/MERIT/raw/continent/cat_pfaf_7_MERIT_Hydro_v07_Basins_v01_bugfix1.shp")
id_to_area = gdf.set_index("COMID")["unitarea"].to_dict()

In [None]:
s3_path: str = "s3://psu-diff-water-models/dhbv2.0_40yr_dataset/1980-2020_daily_flux_merit"
MERIT_ZONES = ["71", "72", "73", "74", "75", "77", "78"]

datasets = []

for zone in tqdm(MERIT_ZONES, desc="Loading MERIT zone data"):
    merit_path = posixpath.join(s3_path, zone)
    root = zarr.open_group(merit_path, mode="r", zarr_version=2, storage_options={"anon": True})
    
    comids = root.COMID[:].astype(int)
    runoff_mm_day = root.Runoff[:]  # (n_comids, n_times), mm/day
    time_indices = root.Time[:]
    
    # Get areas for each COMID (km²)
    areas_km2 = np.array([id_to_area[comid] for comid in comids])  # we want this to throw an error for a missing COMID
    
    # Convert mm/day to m³/s
    # mm/day * km² * (1000 m/km)² * (0.001 m/mm) / (86400 s/day)
    # = mm/day * km² * 1e6 m²/km² * 1e-3 m/mm / 86400 s/day
    # = mm/day * km² * 1000 / 86400
    conversion_factor = areas_km2 * 1000 / 86400  # shape: (n_comids,)
    
    # Apply conversion: (n_comids, n_times) * (n_comids, 1) -> (n_comids, n_times)
    qr_m3_s = runoff_mm_day * conversion_factor[:, np.newaxis]

    # Replace NaN, inf, and 0 with 1e-6
    qr_m3_s = np.nan_to_num(qr_m3_s, nan=1e-6, posinf=1e-6, neginf=1e-6)
    qr_m3_s[qr_m3_s == 0] = 1e-6
    
    ds = xr.Dataset(
        data_vars={
            "Qr": (["divide_id", "time"], qr_m3_s.astype(np.float32)),
        },
        coords={
            "divide_id": (["divide_id"], comids),
            "time": (["time"], pd.to_datetime(time_indices).normalize()),
        },
        attrs={
            "units": "m^3/s",
            "source": "dHBV UH runoff",
            "url": "https://zenodo.org/records/15784945",
        },
    )
    ds["Qr"].attrs["units"] = "m^3/s"
    datasets.append(ds)

In [None]:
print("Concatenating zones...")
combined_ds = xr.concat(datasets, dim="divide_id")

print("saving to .nc")
combined_ds.to_netcdf("../../data/merit_dhbv2_UH_retrospective.nc")