In [1]:
from kerchunk.hdf import SingleHdf5ToZarr
from kerchunk.netCDF3 import NetCDF3ToZarr
from kerchunk.combine import MultiZarrToZarr
import fsspec
import os
import ujson
from pathlib import Path
import xarray as xr
import numpy as np

In [2]:
fs = fsspec.filesystem('s3')
dirs = [
    'northwest_atlantic/full_domain/hindcast/daily/raw/r20230520/',
    'northwest_atlantic/full_domain/hindcast/daily/regrid/r20230520/',
    'northwest_atlantic/full_domain/hindcast/monthly/raw/r20230520/',
    'northwest_atlantic/full_domain/hindcast/monthly/regrid/r20230520/',
    'northwest_atlantic/full_domain/seasonal_forecast/monthly/raw/r20250212/',
    'northwest_atlantic/full_domain/seasonal_forecast/monthly/regrid/r20250212/',
    'northwest_atlantic/full_domain/seasonal_reforecast/monthly/raw/r20250212/',
    'northwest_atlantic/full_domain/seasonal_reforecast/monthly/regrid/r20250212/',
]

In [3]:
def write_chunks(u):
    u = 's3://' + u
    parts = u.split('/') # seperate file path to create a unique name for each json 
    fstem = Path(u).stem  
    outf = f'{tdir}/{fstem}.json'
    # Only write it does not exist and is not 
    if not os.path.exists(outf) or not (os.path.getsize(outf) > 0):
        try:
            dachunks = SingleHdf5ToZarr(u)
        except Exception as e:
            # These static files are old school netcdf 3 files, so try that
            print(f'Trying netCDF 3 on {u}')
            dachunks = NetCDF3ToZarr(u)
        with fs2.open(outf, 'wb') as f:
            f.write(ujson.dumps(dachunks.translate()).encode());

In [4]:
for tdir in dirs:
    glob=f's3://noaa-oar-cefi-regional-mom6-pds/{tdir}*.nc'
    flist = fs.glob(glob)
    os.makedirs(tdir, exist_ok=True)
    fs2 = fsspec.filesystem("file")
    so = dict(default_fill_cache=False, default_cache_type='first')
    print(f'Making chunks for {tdir} with {len(flist)} files')
    for u in flist:
        write_chunks(u)

Making chunks for northwest_atlantic/full_domain/hindcast/daily/raw/r20230520/ with 24 files
Making chunks for northwest_atlantic/full_domain/hindcast/daily/regrid/r20230520/ with 20 files
Making chunks for northwest_atlantic/full_domain/hindcast/monthly/raw/r20230520/ with 492 files
Making chunks for northwest_atlantic/full_domain/hindcast/monthly/regrid/r20230520/ with 452 files
Making chunks for northwest_atlantic/full_domain/seasonal_forecast/monthly/raw/r20250212/ with 4 files
Making chunks for northwest_atlantic/full_domain/seasonal_forecast/monthly/regrid/r20250212/ with 3 files
Making chunks for northwest_atlantic/full_domain/seasonal_reforecast/monthly/raw/r20250212/ with 361 files
Making chunks for northwest_atlantic/full_domain/seasonal_reforecast/monthly/regrid/r20250212/ with 360 files
