In [None]:
import logging
import fsspec
import xarray as xr
from distributed import Client
from kerchunk.combine import auto_dask
from kerchunk.hdf import SingleHdf5ToZarr

In [None]:
client = Client(n_workers=8, silence_logs=logging.ERROR)
client

In [None]:
# Initiate fsspec filesystems for reading
fs_read = fsspec.filesystem("s3", anon=True, skip_instance_cache=True)

files_paths = fs_read.glob("s3://smn-ar-wrf/DATA/WRF/DET/2022/12/31/12/*")

# Here we prepend the prefix 's3://', which points to AWS.
file_pattern = sorted(["s3://" + f for f in files_paths])

# Keep just the "01H" data
file_pattern = file_pattern[0:-5]
print(len(file_pattern))

In [None]:
mzz = auto_dask(
    urls=file_pattern,
    single_kwargs=dict(
        inline_threshold=300,
        storage_options=dict(
            mode="rb", anon=True, default_fill_cache=False, default_cache_type="first"
        ),
    ),
    single_driver=SingleHdf5ToZarr,
    mzz_kwargs={"concat_dims": ["time"], "identical_dims": ["y", "x"]},
    n_batches=20,
    remote_protocol="s3",
    remote_options={"anon": True},
)

In [None]:
ds = xr.open_dataset(
    "reference://",
    engine="zarr",
    backend_kwargs={
        "storage_options": {
            "fo": mzz,
        },
        "consolidated": False,
    },
)
ds