Copied from https://nbviewer.org/gist/rsignell-usgs/78a4ce00360c65bc99764aa3e88a2493.  Description is here: https://discourse.pangeo.io/t/the-national-water-model-reanalysis-zarr-dataset-on-aws/1449

In [None]:
import hvplot.pandas
import hvplot.xarray
import geoviews as gv
from holoviews.operation.datashader import rasterize
import cartopy.crs as ccrs

In [None]:
import xarray as xr
import fsspec
import numpy as np

In [None]:
from dask.distributed import Client, LocalCluster, progress

cluster = LocalCluster(n_workers=16)
client = Client(cluster)
client

In [None]:
url = 's3://noaa-nwm-retro-v2-zarr-pds'

In [None]:
%%time
ds = xr.open_zarr(fsspec.get_mapper(url, anon=True), consolidated=True)

In [None]:
var='streamflow'

In [None]:
ds[var]

In [None]:
print(f'Variable size: {ds[var].nbytes/1e12:.1f} TB')

In [None]:
%%time
imax = ds[var].sel(time='2017-06-01 00:00:00').argmax().values

In [None]:
%%time
ds[var][:,imax].hvplot(grid=True)

In [None]:
streamflow_April_2010 = ds[var].sel(time=slice('2010-04-01 00:00','2010-04-30 23:00'))

In [None]:
print(f'Variable size: {streamflow_April_2010.nbytes/1e9:.1f} GB')

In [None]:
%%time
var_mean = streamflow_April_2010.mean(dim='time').compute()

In [None]:
df = var_mean.to_pandas().to_frame()

In [None]:
df = df.assign(latitude=ds['latitude'])
df = df.assign(longitude=ds['longitude'])
df.rename(columns={0: "transport"}, inplace=True)

In [None]:
p = df.hvplot.points('longitude', 'latitude', crs=ccrs.PlateCarree(),
                     c='transport', colorbar=True, size=14)

In [None]:
g = rasterize(p, aggregator='mean', x_sampling=0.02, y_sampling=0.02, width=500).opts(tools=['hover'], 
                aspect='equal', logz=True, cmap='viridis', clim=(1e-2, np.nan))

In [None]:
g * gv.tile_sources.OSM