# h5py (direct)

In [1]:
import h5py
import fsspec
import earthaccess

In [2]:
s3 = earthaccess.get_s3fs_session(daac="NSIDC")

In [3]:
url = 's3://nsidc-cumulus-prod-protected/ATLAS/ATL06/006/2019/12/02/ATL06_20191202203649_10220511_006_01.h5'

In [6]:
fsspec_kwargs = {
    "cache_type": "blockcache", 
    "block_size": 8*1024*1024
}

h5py_kwargs = {
    # "page_buf_size": 16*1024*1024,
    "rdcc_nbytes": 4*1024*1024
}

In [10]:
%%timeit -r 5

with s3.open(url, 'rb', **fsspec_kwargs) as fo:
    with h5py.File(fo, **h5py_kwargs) as f:
        data0 = f['gt2l']['land_ice_segments']['h_li'][:]
        data1 = f['gt2l']['land_ice_segments']['latitude'][:]
        data2 = f['gt2l']['land_ice_segments']['longitude'][:]
        data3 = f['gt2l']['land_ice_segments']['delta_time'][:]

777 ms ± 22 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [12]:
(data0.nbytes + data1.nbytes + data2.nbytes + data3.nbytes) / 1e6

3.159772

### xarray + h5py

In [14]:
import xarray as xr

In [16]:
%%timeit -r 5

with s3.open(url, 'rb', **fsspec_kwargs) as fo:
    ds = xr.open_dataset(
        fo, engine='h5netcdf', group='/gt2l/land_ice_segments', driver_kwds={"rdcc_nbytes": 1024*1024}
    )
    ds.h_li.load()
    ds.latitude.load()
    ds.longitude.load()
    ds.delta_time.load()

1.02 s ± 10.7 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)


In [18]:
ds.nbytes / 1e6

5.078205