In [None]:
from dask.distributed import Client
client = Client(n_workers=4, threads_per_worker=4, memory_limit='4GB')
# link to dashboard
print(client.dashboard_link)

import xarray as xr
import zarr
import numpy as np
import os
import glob

In [None]:
netcdf_dir = r'path/to/data/*.nc'
zarr_dir = r'path/to/data.zarr'

In [None]:
filelist = glob.glob(os.path.join(netcdf_dir,"*.nc"))
# parallel = True is problematic
ds = xr.open_mfdataset(filelist, combine='by_coords',
                       chunks={'latitude' : 720, 'longitude': 1440})

In [None]:
encoding = {vname: {
    'compressor': zarr.Blosc(cname='zstd', clevel=5),
    } for vname in ds.data_vars}
ds.attrs['history'] = 'converted to zarr by Martin Reinhardt, RSC4Earth, University of Leipzig'

In [None]:
# Chunking and _FillValue here
ds['var'] = ds['var'].chunk(chunks={'time' : 100, 'lat' : 720, 'lon': 1440})
ds['var'] = ds['var'].attrs['_FillValue'] = np.nan

In [None]:
ds.to_zarr(zarr_dir, encoding=encoding, consolidated=True, mode='w', compute=True)

In [None]:
client.close()