In [None]:
!mamba install kerchunk -y --quiet

In [None]:
!mamba install h5py=3.2 -y --quiet # Default version in this environment does not work. Must update

In [1]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import s3fs
import datetime as dt
import logging
import fsspec
import ujson
import requests
from tqdm import tqdm
from glob import glob
import os

In [2]:
from kerchunk.hdf import SingleHdf5ToZarr
from kerchunk.combine import MultiZarrToZarr

In [3]:
s3_cred_endpoint = {
    'podaac':'https://archive.podaac.earthdata.nasa.gov/s3credentials',
    'lpdaac':'https://data.lpdaac.earthdatacloud.nasa.gov/s3credentials',
    'ornldaac':'https://data.ornldaac.earthdata.nasa.gov/s3credentials',
    'gesdisc':'https://data.gesdisc.earthdata.nasa.gov/s3credentials'
}

In [4]:
def get_temp_creds():
    temp_creds_url = s3_cred_endpoint['podaac']
    return requests.get(temp_creds_url).json()

In [5]:
temp_creds_req = get_temp_creds()

In [6]:
fs = s3fs.S3FileSystem(
    anon=False,
    key=temp_creds_req['accessKeyId'],
    secret=temp_creds_req['secretAccessKey'],
    token=temp_creds_req['sessionToken']
)

In [7]:
url = 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-01_ECCO_V4r4_latlon_0p50deg.nc'

In [8]:
s3_file_obj = fs.open(url, mode='rb')

In [9]:
xr_ds = xr.open_dataset(s3_file_obj, chunks='auto', engine='h5netcdf')
xr_ds

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(1, 2)","(1, 2)"
Count,2 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (1, 2) (1, 2) Count 2 Tasks 1 Chunks Type datetime64[ns] numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(1, 2)","(1, 2)"
Count,2 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.81 kiB,2.81 kiB
Shape,"(360, 2)","(360, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.81 kiB 2.81 kiB Shape (360, 2) (360, 2) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  360,

Unnamed: 0,Array,Chunk
Bytes,2.81 kiB,2.81 kiB
Shape,"(360, 2)","(360, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.62 kiB,5.62 kiB
Shape,"(720, 2)","(720, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.62 kiB 5.62 kiB Shape (720, 2) (720, 2) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  720,

Unnamed: 0,Array,Chunk
Bytes,5.62 kiB,5.62 kiB
Shape,"(720, 2)","(720, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 0.99 MiB 0.99 MiB Shape (1, 360, 720) (1, 360, 720) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",720  360  1,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 0.99 MiB 0.99 MiB Shape (1, 360, 720) (1, 360, 720) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",720  360  1,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 0.99 MiB 0.99 MiB Shape (1, 360, 720) (1, 360, 720) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",720  360  1,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray


---

In [None]:
# def gen_json(u):
#     so = dict(
#         mode="rb", 
#         anon=False, 
#         default_fill_cache=False,
#         default_cache_type="none"
#     )
#     with fs.open(u, **so) as infile:
#         #print(u)
#         h5chunks = SingleHdf5ToZarr(infile, u, inline_threshold=300)
#         with open(f"jsons/{u.split('/')[-1]}.json", 'wb') as outfile:
#            outfile.write(ujson.dumps(h5chunks.translate()).encode())
#            #outfile.write(ujson.dumps(h5chunks.translate()))

In [10]:
urls = ['s3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2014-12_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-01_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-02_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-03_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-04_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-05_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-06_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-07_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-08_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-09_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-10_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-11_ECCO_V4r4_latlon_0p50deg.nc',
 's3://podaac-ops-cumulus-protected/ECCO_L4_SSH_05DEG_MONTHLY_V4R4/SEA_SURFACE_HEIGHT_mon_mean_2015-12_ECCO_V4r4_latlon_0p50deg.nc']

In [11]:
so = dict(
    mode="rb",
    anon=False,
    default_fill_cache=False,
    default_cache_type="none"
)

#references = []
for u in urls:
    with fs.open(u, **so) as infile:
        h5chunks = SingleHdf5ToZarr(infile, u, inline_threshold=300)
        with open(f"jsons/{u.split('/')[-1]}.json", 'wb') as outfile:
            translate = h5chunks.translate()
            outfile.write(ujson.dumps(translate).encode())
            #references.append(translate)

In [None]:
#gen_json(urls[0])

In [None]:
#for u in urls:
#    gen_json(u)

In [15]:
fs_ref_list = fsspec.filesystem('file')

In [16]:
reference_list = sorted([x for x in fs_ref_list.ls('jsons') if '.json' in x])
reference_list

['/home/jovyan/earthdata-cloud-cookbook/examples/PODAAC/jsons/SEA_SURFACE_HEIGHT_mon_mean_2014-12_ECCO_V4r4_latlon_0p50deg.nc.json',
 '/home/jovyan/earthdata-cloud-cookbook/examples/PODAAC/jsons/SEA_SURFACE_HEIGHT_mon_mean_2015-01_ECCO_V4r4_latlon_0p50deg.nc.json',
 '/home/jovyan/earthdata-cloud-cookbook/examples/PODAAC/jsons/SEA_SURFACE_HEIGHT_mon_mean_2015-02_ECCO_V4r4_latlon_0p50deg.nc.json',
 '/home/jovyan/earthdata-cloud-cookbook/examples/PODAAC/jsons/SEA_SURFACE_HEIGHT_mon_mean_2015-03_ECCO_V4r4_latlon_0p50deg.nc.json',
 '/home/jovyan/earthdata-cloud-cookbook/examples/PODAAC/jsons/SEA_SURFACE_HEIGHT_mon_mean_2015-04_ECCO_V4r4_latlon_0p50deg.nc.json',
 '/home/jovyan/earthdata-cloud-cookbook/examples/PODAAC/jsons/SEA_SURFACE_HEIGHT_mon_mean_2015-05_ECCO_V4r4_latlon_0p50deg.nc.json',
 '/home/jovyan/earthdata-cloud-cookbook/examples/PODAAC/jsons/SEA_SURFACE_HEIGHT_mon_mean_2015-06_ECCO_V4r4_latlon_0p50deg.nc.json',
 '/home/jovyan/earthdata-cloud-cookbook/examples/PODAAC/jsons/SEA_SUR

In [17]:
with open(reference_list[0]) as j:
    reference = ujson.load(j)

In [15]:
#reference

In [18]:
s_opts = {'skip_instance_cache':True}   #json
r_opts = {'anon':False,          
          'key':temp_creds_req['accessKeyId'], 
          'secret':temp_creds_req['secretAccessKey'], 
          'token':temp_creds_req['sessionToken']}    #ncfiles

In [19]:
fs_single = fsspec.filesystem("reference", 
                       fo=reference, 
                       ref_storage_args=s_opts,
                       remote_protocol='s3', 
                       remote_options=r_opts)

In [20]:
m = fs_single.get_mapper("")
ds_single = xr.open_dataset(m, engine="zarr", backend_kwargs={'consolidated':False}, chunks={})

In [21]:
ds_single

Unnamed: 0,Array,Chunk
Bytes,2.81 kiB,2.81 kiB
Shape,"(360, 2)","(360, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.81 kiB 2.81 kiB Shape (360, 2) (360, 2) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  360,

Unnamed: 0,Array,Chunk
Bytes,2.81 kiB,2.81 kiB
Shape,"(360, 2)","(360, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.62 kiB,5.62 kiB
Shape,"(720, 2)","(720, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.62 kiB 5.62 kiB Shape (720, 2) (720, 2) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",2  720,

Unnamed: 0,Array,Chunk
Bytes,5.62 kiB,5.62 kiB
Shape,"(720, 2)","(720, 2)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(1, 2)","(1, 2)"
Count,2 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (1, 2) (1, 2) Count 2 Tasks 1 Chunks Type datetime64[ns] numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(1, 2)","(1, 2)"
Count,2 Tasks,1 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 0.99 MiB 0.99 MiB Shape (1, 360, 720) (1, 360, 720) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",720  360  1,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 0.99 MiB 0.99 MiB Shape (1, 360, 720) (1, 360, 720) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",720  360  1,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 0.99 MiB 0.99 MiB Shape (1, 360, 720) (1, 360, 720) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",720  360  1,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(1, 360, 720)","(1, 360, 720)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray


---

Combine the single reference files created above into a single time series reference file

In [23]:
mzz = MultiZarrToZarr(
    reference_list,
    remote_protocol="s3",
    remote_options=r_opts,
    concat_dims=["time"]
)

out = mzz.translate()

In [24]:
fs_mzz = fsspec.filesystem("reference",
                           fo=out,
                           ref_storage_args=s_opts,
                           remote_protocol='s3',
                           remote_options=r_opts)

In [25]:
m = fs_mzz.get_mapper("")
ds_multi = xr.open_dataset(m, engine="zarr", backend_kwargs={'consolidated':False}, chunks={})

In [26]:
ds_multi

Unnamed: 0,Array,Chunk
Bytes,36.56 kiB,2.81 kiB
Shape,"(13, 360, 2)","(1, 360, 2)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 36.56 kiB 2.81 kiB Shape (13, 360, 2) (1, 360, 2) Count 14 Tasks 13 Chunks Type float32 numpy.ndarray",2  360  13,

Unnamed: 0,Array,Chunk
Bytes,36.56 kiB,2.81 kiB
Shape,"(13, 360, 2)","(1, 360, 2)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,73.12 kiB,5.62 kiB
Shape,"(13, 720, 2)","(1, 720, 2)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 73.12 kiB 5.62 kiB Shape (13, 720, 2) (1, 720, 2) Count 14 Tasks 13 Chunks Type float32 numpy.ndarray",2  720  13,

Unnamed: 0,Array,Chunk
Bytes,73.12 kiB,5.62 kiB
Shape,"(13, 720, 2)","(1, 720, 2)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,208 B,16 B
Shape,"(13, 2)","(1, 2)"
Count,14 Tasks,13 Chunks
Type,datetime64[ns],numpy.ndarray
"Array Chunk Bytes 208 B 16 B Shape (13, 2) (1, 2) Count 14 Tasks 13 Chunks Type datetime64[ns] numpy.ndarray",2  13,

Unnamed: 0,Array,Chunk
Bytes,208 B,16 B
Shape,"(13, 2)","(1, 2)"
Count,14 Tasks,13 Chunks
Type,datetime64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12.85 MiB,0.99 MiB
Shape,"(13, 360, 720)","(1, 360, 720)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 12.85 MiB 0.99 MiB Shape (13, 360, 720) (1, 360, 720) Count 14 Tasks 13 Chunks Type float32 numpy.ndarray",720  360  13,

Unnamed: 0,Array,Chunk
Bytes,12.85 MiB,0.99 MiB
Shape,"(13, 360, 720)","(1, 360, 720)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12.85 MiB,0.99 MiB
Shape,"(13, 360, 720)","(1, 360, 720)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 12.85 MiB 0.99 MiB Shape (13, 360, 720) (1, 360, 720) Count 14 Tasks 13 Chunks Type float32 numpy.ndarray",720  360  13,

Unnamed: 0,Array,Chunk
Bytes,12.85 MiB,0.99 MiB
Shape,"(13, 360, 720)","(1, 360, 720)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12.85 MiB,0.99 MiB
Shape,"(13, 360, 720)","(1, 360, 720)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 12.85 MiB 0.99 MiB Shape (13, 360, 720) (1, 360, 720) Count 14 Tasks 13 Chunks Type float32 numpy.ndarray",720  360  13,

Unnamed: 0,Array,Chunk
Bytes,12.85 MiB,0.99 MiB
Shape,"(13, 360, 720)","(1, 360, 720)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray


In [27]:
ds_multi['SSH']

Unnamed: 0,Array,Chunk
Bytes,12.85 MiB,0.99 MiB
Shape,"(13, 360, 720)","(1, 360, 720)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 12.85 MiB 0.99 MiB Shape (13, 360, 720) (1, 360, 720) Count 14 Tasks 13 Chunks Type float32 numpy.ndarray",720  360  13,

Unnamed: 0,Array,Chunk
Bytes,12.85 MiB,0.99 MiB
Shape,"(13, 360, 720)","(1, 360, 720)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray
