To install kerchunk, used
```
!pip install git+https://github.com/fsspec/kerchunk

```

In [1]:
import fsspec
import kerchunk
from kerchunk.hdf import SingleHdf5ToZarr
import ujson
import xarray as xr

In [2]:
from fsspec.registry import known_implementations
known_implementations.keys()

dict_keys(['abfs', 'adl', 'arrow_hdfs', 'asynclocal', 'az', 'blockcache', 'box', 'cached', 'dask', 'data', 'dbfs', 'dir', 'dropbox', 'dvc', 'file', 'filecache', 'ftp', 'gcs', 'gdrive', 'generic', 'git', 'github', 'gs', 'hdfs', 'hf', 'http', 'https', 'jlab', 'jupyter', 'lakefs', 'libarchive', 'local', 'memory', 'oci', 'ocilake', 'oss', 'reference', 'root', 's3', 's3a', 'sftp', 'simplecache', 'smb', 'ssh', 'tar', 'wandb', 'webdav', 'webhdfs', 'zip'])

### Locate S3 endpoints

In [3]:
fs = fsspec.filesystem('s3', anon=True)

In [4]:
flist = fs.glob("s3://noaa-goes16/ABI-L2-SSTF/2020/210/*/*.nc")
flist

['noaa-goes16/ABI-L2-SSTF/2020/210/00/OR_ABI-L2-SSTF-M6_G16_s20202100000205_e20202100059513_c20202100105456.nc',
 'noaa-goes16/ABI-L2-SSTF/2020/210/01/OR_ABI-L2-SSTF-M6_G16_s20202100100205_e20202100159512_c20202100205423.nc',
 'noaa-goes16/ABI-L2-SSTF/2020/210/02/OR_ABI-L2-SSTF-M6_G16_s20202100200204_e20202100259512_c20202100305437.nc',
 'noaa-goes16/ABI-L2-SSTF/2020/210/03/OR_ABI-L2-SSTF-M6_G16_s20202100300204_e20202100359512_c20202100405457.nc',
 'noaa-goes16/ABI-L2-SSTF/2020/210/04/OR_ABI-L2-SSTF-M6_G16_s20202100400204_e20202100459511_c20202100505540.nc',
 'noaa-goes16/ABI-L2-SSTF/2020/210/05/OR_ABI-L2-SSTF-M6_G16_s20202100500203_e20202100559513_c20202100605464.nc',
 'noaa-goes16/ABI-L2-SSTF/2020/210/06/OR_ABI-L2-SSTF-M6_G16_s20202100600205_e20202100659513_c20202100705486.nc',
 'noaa-goes16/ABI-L2-SSTF/2020/210/07/OR_ABI-L2-SSTF-M6_G16_s20202100700205_e20202100759512_c20202100805387.nc',
 'noaa-goes16/ABI-L2-SSTF/2020/210/08/OR_ABI-L2-SSTF-M6_G16_s20202100800204_e20202100859512_c202

### Create single kerchunk file

In [5]:
flist = ['s3://' + f for f in flist]
u = flist[0]
u

's3://noaa-goes16/ABI-L2-SSTF/2020/210/00/OR_ABI-L2-SSTF-M6_G16_s20202100000205_e20202100059513_c20202100105456.nc'

In [6]:
%%time
with fsspec.open(u, mode="rb", anon=True) as infile:
    reference = SingleHdf5ToZarr(infile, u, inline_threshold=100).translate()

CPU times: user 2.03 s, sys: 624 ms, total: 2.65 s
Wall time: 38 s


In [7]:
type(reference)

dict

In [8]:
!mkdir './example_jsons/'
!mkdir './example_jsons/individual/'

In [9]:
name = './example_jsons/individual/'+ u.split('/')[-1].replace('.nc', '.json')

with open(name, 'w') as outf:
    outf.write(ujson.dumps(reference))

### Open dataset using kerchunk file

In [12]:
%%time

ds = xr.open_dataset(
    "reference://", engine="zarr",
    backend_kwargs={
        "storage_options": {
            "fo": reference,
            "remote_protocol": "s3",
            "remote_options": {"anon": True}
        },
        "consolidated": False
    }
)

CPU times: user 45.3 ms, sys: 7.78 ms, total: 53.1 ms
Wall time: 679 ms


In [11]:
ds