# Write National Water Model (NWM) model data to Zarr

In [1]:
from dask.distributed import Client, progress, LocalCluster
import pandas as pd
import xarray as xr
import s3fs

In [2]:
# depends on the machine you are using
cluster = LocalCluster()
client = Client(cluster)

client 

0,1
Client  Scheduler: tcp://127.0.0.1:38546  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 2  Cores: 2  Memory: 8.37 GB


In [3]:
root = 'http://tds.renci.org:8080/thredds/dodsC/nwm/forcing_short_range/'

In [4]:
dates = pd.date_range(start='2018-04-01T18:00', end='2018-04-02T04:00', freq='H')

In [5]:
urls = ['{}{}/nwm.t{}z.short_range.forcing.f001.conus.nc'.format(root,a.strftime('%Y%m%d'),a.strftime('%H')) for a in dates]

In [6]:
f_zarr = 'rsignell/nwm/test01'

In [7]:
%%time 
ds = xr.open_mfdataset(urls,concat_dim='time')

CPU times: user 488 ms, sys: 48 ms, total: 536 ms
Wall time: 7.22 s


In [8]:
ds

<xarray.Dataset>
Dimensions:                     (nv: 2, reference_time: 11, time: 11, x: 4608, y: 3840)
Coordinates:
  * reference_time              (reference_time) datetime64[ns] 2018-04-01T18:00:00 ...
  * x                           (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 ...
  * y                           (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 ...
  * time                        (time) datetime64[ns] 2018-04-01T19:00:00 ...
Dimensions without coordinates: nv
Data variables:
    time_bounds                 (time, nv) datetime64[ns] dask.array<shape=(11, 2), chunksize=(1, 2)>
    ProjectionCoordinateSystem  (time) |S64 b'' b'' b'' b'' b'' b'' b'' b'' ...
    T2D                         (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>
    LWDOWN                      (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chunksize=(1, 3840, 4608)>
    Q2D                         (time, y, x) float64 dask.array<shape=(11, 3840, 4608), chun

In [9]:
fs = s3fs.S3FileSystem(anon=False)

In [10]:
d = s3fs.S3Map(f_zarr, s3=fs)

In [11]:
%time ds.to_zarr(store=d, mode='w')

CPU times: user 24.3 s, sys: 5.58 s, total: 29.9 s
Wall time: 12min 2s


<xarray.backends.zarr.ZarrStore at 0x7f2b3c186b38>

## Test to see if we can read what we wrote

In [12]:
s3map = s3fs.S3Map(f_zarr, s3=fs)

In [13]:
# works if auto_chunk=False
ds2 = xr.open_zarr(s3map, auto_chunk=False)

In [14]:
ds2

<xarray.Dataset>
Dimensions:                     (nv: 2, reference_time: 11, time: 11, x: 4608, y: 3840)
Coordinates:
  * reference_time              (reference_time) datetime64[ns] 2018-04-01T18:00:00 ...
  * time                        (time) datetime64[ns] 2018-04-01T19:00:00 ...
  * x                           (x) float64 -2.304e+06 -2.303e+06 -2.302e+06 ...
  * y                           (y) float64 -1.92e+06 -1.919e+06 -1.918e+06 ...
Dimensions without coordinates: nv
Data variables:
    LWDOWN                      (time, y, x) float64 ...
    PSFC                        (time, y, x) float64 ...
    ProjectionCoordinateSystem  (time) |S64 ...
    Q2D                         (time, y, x) float64 ...
    RAINRATE                    (time, y, x) float32 ...
    SWDOWN                      (time, y, x) float64 ...
    T2D                         (time, y, x) float64 ...
    U2D                         (time, y, x) float64 ...
    V2D                         (time, y, x) float64 ...


In [15]:
ds3 = xr.open_zarr(s3map, auto_chunk=True)

ValueError: Chunks and shape must be of the same length/dimension. Got chunks=(11, 64), shape=(11,)