In [15]:
from kerchunk.netCDF3 import NetCDF3ToZarr
from kerchunk.combine import MultiZarrToZarr
import fsspec
import os
import ujson
from pathlib import Path
import xarray as xr
import numpy as np

In [16]:
fs = fsspec.filesystem('s3')
dir = 'ne_pacific_10km/json'

In [17]:
flist = fs.glob('s3://noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/*.nc')
flist

['noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_01.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_02.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_03.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_04.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_05.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_06.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_07.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_08.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_09.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_10.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_11.nc',
 'noaa3100-oar-pmel-dev-cefi/ne_pacific_10km/nep_revised_hind_moave_2020_12.nc',
 'noaa3100-oar-pmel-dev-cefi

In [18]:
os.makedirs(dir, exist_ok=True)

In [22]:
fs2 = fsspec.filesystem("file")
so = dict(default_fill_cache=False, default_cache_type='first')
for u in flist:
    u = 's3://' + u
    dachunks = NetCDF3ToZarr(u, max_chunk_size=10000)
    parts = u.split('/') # seperate file path to create a unique name for each json 
    fstem = Path(u).stem 
    outf = f'{dir}/{fstem}.json'
    print(outf)
    with fs2.open(outf, 'wb') as f:
        f.write(ujson.dumps(dachunks.translate()).encode());

ne_pacific_10km/json/nep_revised_hind_moave_2020_01.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_02.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_03.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_04.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_05.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_06.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_07.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_08.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_09.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_10.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_11.json
ne_pacific_10km/json/nep_revised_hind_moave_2020_12.json
ne_pacific_10km/json/roms_grd_nep.json


In [23]:
json_list = fs2.glob(f'{dir}/nep*.json')
json_list

['/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_01.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_02.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_03.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_04.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_05.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_06.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_07.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_08.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_09.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_10.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_11.json',
 '/home/rhs/ne_pacific_10km/json/nep_revised_hind_moave_2020_12.json']

I am pretty much following the example from this blog post and notebook:
https://medium.com/pangeo/accessing-netcdf-and-grib-file-collections-as-cloud-native-virtual-datasets-using-kerchunk-625a2d0a9191
https://nbviewer.org/github/peterm790/ERA5_Kerchunk_tutorial/blob/master/ERA5_tutorial.ipynb

N.B. It seems that hc and Cs_r do not vary from file to file so they are in the identical list.

In [24]:
%%time
mzz = MultiZarrToZarr(json_list,  
    concat_dims=['ocean_time'], #this is the dimension along which the individual files will be merged
    identical_dims = ['lat_rho', 'lon_rho', 's_rho', 's_w'],
)
d = mzz.translate()
with fs2.open(f'{dir}/combined.json', 'wb') as f:
    f.write(ujson.dumps(d).encode())

CPU times: user 511 ms, sys: 68.8 ms, total: 580 ms
Wall time: 2.61 s
