
# HDF5 streamable version


In [None]:
import os,sys,time
import h5py
import numpy as np
import xarray as xr

# import openvisus
if os.path.isdir(r"C:\projects\OpenVisus\build\RelWithDebInfo"):
	sys.path.append(r"C:\projects\OpenVisus\build\RelWithDebInfo")

import OpenVisus as ov
os.environ["VISUS_DISABLE_WRITE_LOCK"]="1"

from create_streamable import Streamable
from xarray_backend import OpenVisusBackendEntrypoint

# NEEDED
#   OpenVisus need credentials that will extract from s3 config file
#   you need to have a `~/.aws/config` file with the profile
assert(os.path.isfile(os.path.expanduser("~/.aws/config")))

# *** CHANGE AS NEEDED ****
#   NOTE:  always better to have a directory which contains all h5 and OpenVisus file, for this reason I am using `dirname` for templates below

# original file
h5_filename         = './reconstructed_data.nxs'
expression          ='/shanks-3731-a/data/reconstructed_data'
group,fieldname     = expression.rsplit("/",maxsplit=1) # xarray needs to read one level-up (i.e. at group level)

# create streamable local version, where each 3d field will be an OpenVisus dataset
local_url           = f"./streamable/hdf5/reconstructed_data/visus.nxs"

# upload to S3
profile             = "sealstorage"
endpoint_url        = f"https://maritime.sealstorage.io/api/v0/s3"

# this is where to get the file from the network 
# -   NOTE: OpenVisus server does not support serving files such as HDF5 directly, we need a solution on apache
remote_url          = f"https://maritime.sealstorage.io/api/v0/s3/utah/streamable/hdf5/reconstructed_data/visus.nxs?profile=" + profile

# {name} is the internal HDF5 expression to reach the data
idx_urls={

	# alias to a dic item that will be used for the `public`
	"default":         "remote",

	# this is needed to generate interal local dtaset
	"local":            os.path.splitext(local_url)[0]+"/{name}/visus.idx",

	# network s3 storage
	"remote":           os.path.splitext(remote_url)[0]+"/{name}/visus.idx?cached=arco&profile=" + profile, 

	# **TODO** this is missing the {name} in case of multiple fielcs inside the H5
	"remote-atlantis": "https://atlantis.sci.utah.edu/mod_visus?action=readdataset&dataset=reconstructed_data&cached=arco?cached=arco" 
}

from pprint import pprint
pprint(idx_urls)

# Read from original HDF5

In [None]:
ds = xr.open_dataset(h5_filename, group=group)
field=ds[fieldname]
data=field[...].values
print("Got data","type",type(data),"shape",data.shape,"dtype",data.dtype,"min",np.min(data),"max",np.max(data))

You can use H5glance too
- Execute `!{sys.executable} -m pip install --quiet h5glance` if needed

In [None]:
from h5glance import H5Glance
H5Glance(h5_filename)

# Create streamable version

In [None]:
arco  = "2mb"
compression = "zip"
Streamable.Create(h5_filename, local_url, arco=arco, compression=compression, idx_urls=idx_urls)

In [None]:
Streamable.Print(local_url)

# Read  local 

In [None]:
ds = xr.open_dataset(local_url, group=group, engine=OpenVisusBackendEntrypoint, prefer="local")
field=ds[fieldname]
timestep,res=0,27
data=field[timestep,...,res].values
print("Got data","type",type(data),"shape",data.shape,"dtype",data.dtype,"min",np.min(data),"max",np.max(data))

import matplotlib.pyplot as plt
fig, ax = plt.subplots()
im = ax.imshow(data[100,...]) 
plt.colorbar(im)

# Upload all folder (H5 files and IDX data) to S3

It is important to have an unique folder to simplify the upload
- **TODO** OpenVisus server would need a modification to the `visus.config` file , so it's not easy to make the upload automatic

In [None]:
!{sys.executable} -m pip install --quiet awscli-plugin-endpoint
!aws s3 sync --no-progress --endpoint-url {endpoint_url} --profile {profile} --size-only {os.path.dirname(local_url)}/ s3:/{os.path.dirname(remote_url)[len(endpoint_url):]}/

#  Read from S3

- the streamable file already contains `cached=arco` so it should automatically cache data
- check your `~/visus/` directory for cache

In [None]:
# directly opening the stream using f3fs is causing some problems with `xr.open_dataset` so I am saving the file locally first
temp_local_url=Streamable.SaveRemoteToLocal(remote_url, profile=profile, endpoint_url=endpoint_url)

ds=xr.open_dataset(temp_local_url, group=group, engine=OpenVisusBackendEntrypoint, prefer="remote")
field=ds[fieldname]
timestep,res=0,27
data=field[timestep,...,res].values
print("Got data","type",type(data),"shape",data.shape,"dtype",data.dtype,"min",np.min(data),"max",np.max(data))

import matplotlib.pyplot as plt
fig, ax = plt.subplots()
im = ax.imshow(data[100,...]) 
plt.colorbar(im)

# Read from atlantis

In [None]:
# OpenVisus server does not support serving any file such as streamable HDF5, so I need another place
ds=xr.open_dataset(local_url, group=group, engine=OpenVisusBackendEntrypoint, prefer="remote-atlantis")
field=ds[fieldname]
timestep,res=0,27
data=field[timestep,...,res].values
print("Got data","type",type(data),"shape",data.shape,"dtype",data.dtype,"min",np.min(data),"max",np.max(data))

import matplotlib.pyplot as plt
fig, ax = plt.subplots()
im = ax.imshow(data[100,...]) 
plt.colorbar(im)

# TODO

- support of direct HDF5  (i.e. using `h5py` with `HDF5_PLUGIN_PATH`)
- support of direct NEXUS (i.e. ?)