# 1 Introduction and REST API

Objectives
 * Verify HDF Lab is working correctly
 * Learn how to make REST requests to the HSDS HDF Server

In [7]:
# this is the server endpoint 
hs_endpoint = "http://34.211.29.9:5101" 

In [1]:
# requests is a popular python library for making http requests
import requests
import json
# numpy is a popular array library
import numpy as np

In [8]:
# request server status info
rsp = requests.get(hs_endpoint + "/about")

In [9]:
rsp.status_code  # should be 200

200

In [10]:
# The response include the server state (hopefully 'READY')
# and other information about the service
rsp.json()

{'start_time': 1692378163,
 'state': 'READY',
 'hsds_version': '0.8.1',
 'name': 'nasacloud',
 'greeting': 'Welcome to HSDS!',
 'about': 'HSDS is a webservice for HDF data',
 'node_count': 8,
 'dn_urls': ['http://172.19.0.10:6101',
  'http://172.19.0.3:6101',
  'http://172.19.0.4:6101',
  'http://172.19.0.5:6101',
  'http://172.19.0.6:6101',
  'http://172.19.0.7:6101',
  'http://172.19.0.8:6101',
  'http://172.19.0.9:6101'],
 'dn_ids': ['dn-d86bd',
  'dn-8db8a',
  'dn-9d623',
  'dn-220a7',
  'dn-46aab',
  'dn-876a3',
  'dn-a210e',
  'dn-bd4d1'],
 'username': 'anonymous',
 'isadmin': False}

Problem: "/info" is another request supported by the server.  What does it return?

In [12]:
# get information about a HDF5 "file" on the server
params={"domain":"/home/test_user1/tall.h5"}
rsp = requests.get(hs_endpoint + "/", params=params)
rsp.status_code

200

In [13]:
# show the response contents
rsp_json = rsp.json()
rsp_json

{'root': 'g-b049eefc-00d15168-38c1-667488-59d9e0',
 'class': 'domain',
 'owner': 'test_user1',
 'created': 1681805947.0473416,
 'limits': {'min_chunk_size': 1048576,
  'max_chunk_size': 4194304,
  'max_request_size': 104857600},
 'compressors': ['blosclz', 'lz4', 'lz4hc', 'gzip', 'zstd', 'deflate'],
 'version': '0.8.1',
 'lastModified': 1692380086.5855336,
 'hrefs': [{'rel': 'self',
   'href': 'http://hsds.hdf.test:5101/?domain=/home/test_user1/tall.h5'},
  {'rel': 'database',
   'href': 'http://hsds.hdf.test:5101/datasets?domain=/home/test_user1/tall.h5'},
  {'rel': 'groupbase',
   'href': 'http://hsds.hdf.test:5101/groups?domain=/home/test_user1/tall.h5'},
  {'rel': 'typebase',
   'href': 'http://hsds.hdf.test:5101/datatypes?domain=/home/test_user1/tall.h5'},
  {'rel': 'root',
   'href': 'http://hsds.hdf.test:5101/groups/g-b049eefc-00d15168-38c1-667488-59d9e0?domain=/home/test_user1/tall.h5'},
  {'rel': 'acls',
   'href': 'http://hsds.hdf.test:5101/acls?domain=/home/test_user1/tall.h

In [14]:
# The domain response includes a key identifying the root group
root_id = rsp_json["root"]
root_id

'g-b049eefc-00d15168-38c1-667488-59d9e0'

In [15]:
# getinfo about the root group
rsp = requests.get(hs_endpoint + "/groups/" + root_id, params=params)
rsp.status_code

200

In [16]:
root_json = rsp.json()
root_json

{'id': 'g-b049eefc-00d15168-38c1-667488-59d9e0',
 'root': 'g-b049eefc-00d15168-38c1-667488-59d9e0',
 'created': 1681805946.9946604,
 'lastModified': 1681805948.021308,
 'linkCount': 2,
 'attributeCount': 2,
 'domain': '/home/test_user1/tall.h5',
 'bucket': 'icesat2thgtest',
 'hrefs': [{'rel': 'self',
   'href': 'http://hsds.hdf.test:5101/groups/g-b049eefc-00d15168-38c1-667488-59d9e0?domain=/home/test_user1/tall.h5'},
  {'rel': 'links',
   'href': 'http://hsds.hdf.test:5101/groups/g-b049eefc-00d15168-38c1-667488-59d9e0/links?domain=/home/test_user1/tall.h5'},
  {'rel': 'root',
   'href': 'http://hsds.hdf.test:5101/groups/g-b049eefc-00d15168-38c1-667488-59d9e0?domain=/home/test_user1/tall.h5'},
  {'rel': 'home',
   'href': 'http://hsds.hdf.test:5101/?domain=/home/test_user1/tall.h5'},
  {'rel': 'attributes',
   'href': 'http://hsds.hdf.test:5101/groups/g-b049eefc-00d15168-38c1-667488-59d9e0/attributes?domain=/home/test_user1/tall.h5'}]}

Problem: replace the root_id in the last request with "g-1234".  What status code do you get?

In [17]:
# get the dataset at /g1/g1.1/dset1.1.1
# instead of using the dataset id, we'll provide the 
# path to the object within the HDF file as query parameter
params["h5path"] = "/g1/g1.1/dset1.1.1"
rsp = requests.get(hs_endpoint + "/datasets/", params=params)
rsp.status_code

200

In [18]:
dset_json = rsp.json()
dset_json

{'id': 'd-b049eefc-00d15168-c9bd-391ddd-c496fc',
 'root': 'g-b049eefc-00d15168-38c1-667488-59d9e0',
 'shape': {'class': 'H5S_SIMPLE', 'dims': [10, 10], 'maxdims': [10, 10]},
 'type': {'class': 'H5T_INTEGER', 'base': 'H5T_STD_I32BE'},
 'creationProperties': {'layout': {'class': 'H5D_CHUNKED', 'dims': [10, 10]},
  'fillTime': 'H5D_FILL_TIME_ALLOC'},
 'layout': {'class': 'H5D_CHUNKED', 'dims': [10, 10]},
 'attributeCount': 2,
 'created': 1681805947,
 'lastModified': 1681805947,
 'domain': '/home/test_user1/tall.h5',
 'hrefs': [{'rel': 'self',
   'href': 'http://hsds.hdf.test:5101/datasets/d-b049eefc-00d15168-c9bd-391ddd-c496fc?domain=/home/test_user1/tall.h5'},
  {'rel': 'root',
   'href': 'http://hsds.hdf.test:5101/groups/g-b049eefc-00d15168-38c1-667488-59d9e0?domain=/home/test_user1/tall.h5'},
  {'rel': 'home',
   'href': 'http://hsds.hdf.test:5101/?domain=/home/test_user1/tall.h5'},
  {'rel': 'attributes',
   'href': 'http://hsds.hdf.test:5101/datasets/d-b049eefc-00d15168-c9bd-391ddd-c

In [19]:
# read all the values from the dataset
dset_id = dset_json["id"]
del params["h5path"]  # don't need this now
rsp = requests.get(hs_endpoint + "/datasets/" + dset_id + "/value", params=params)
rsp.status_code

200

In [20]:
value_json = rsp.json()
value_json["value"]

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 [0, 2, 4, 6, 8, 10, 12, 14, 16, 18],
 [0, 3, 6, 9, 12, 15, 18, 21, 24, 27],
 [0, 4, 8, 12, 16, 20, 24, 28, 32, 36],
 [0, 5, 10, 15, 20, 25, 30, 35, 40, 45],
 [0, 6, 12, 18, 24, 30, 36, 42, 48, 54],
 [0, 7, 14, 21, 28, 35, 42, 49, 56, 63],
 [0, 8, 16, 24, 32, 40, 48, 56, 64, 72],
 [0, 9, 18, 27, 36, 45, 54, 63, 72, 81]]

In [15]:
# we can also request that the data be returned as bytes
# for large selections this will be more efficient than using JSON
headers = {"accept": "application/octet-stream"}
rsp = requests.get(hs_endpoint + "/datasets/" + dset_id + "/value", params=params, headers=headers)
rsp.status_code

200

In [16]:
# this header is returned to show how the request was actually fulfilled
# should be "application/octet-stream"
rsp.headers['Content-Type']

'application/octet-stream'

In [17]:
len(rsp.content) # should get 400 bytes back 10x10xlen(int32)

400

In [18]:
# we can use these bytes to create a numpy array using the shape and type 
# info we already know
arr = np.frombuffer(rsp.content, dtype='>i4')
arr

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6,
        7,  8,  9,  0,  2,  4,  6,  8, 10, 12, 14, 16, 18,  0,  3,  6,  9,
       12, 15, 18, 21, 24, 27,  0,  4,  8, 12, 16, 20, 24, 28, 32, 36,  0,
        5, 10, 15, 20, 25, 30, 35, 40, 45,  0,  6, 12, 18, 24, 30, 36, 42,
       48, 54,  0,  7, 14, 21, 28, 35, 42, 49, 56, 63,  0,  8, 16, 24, 32,
       40, 48, 56, 64, 72,  0,  9, 18, 27, 36, 45, 54, 63, 72, 81],
      dtype=int32)

Problem: what would you guess the '>i4' means? What happens if you use '<i4' instead?

In [19]:
# get just the upper 3x4 elements
# We use the select query param with a numpy like metod to 
# specify the sub-region we want
params["select"] = "[0:3, 0:4]"
rsp = requests.get(hs_endpoint + "/datasets/" + dset_id + "/value", params=params)
rsp.status_code

200

In [20]:
subarray_json = rsp.json()
subarray_json["value"]

[[0, 0, 0, 0], [0, 1, 2, 3], [0, 2, 4, 6]]

In [21]:
# Read elements (i,i) for i in range 0-9.
# This is a point selection where we are explictly
# giving the indices of the elements we want.
# Since the number of points could be quite large
# we use POST with the indices provided in the body of 
# the request, rather than GET
points = []
for i in range(10):
    points.append([i,i])
body = {"points": points}
del params["select"] # remove select so we will read across entire dataset
rsp = requests.post(hs_endpoint + "/datasets/" + dset_id + "/value", params=params, data=json.dumps(body))
rsp.status_code

200

In [22]:
# The returned array is always one-dimensional
points_json = rsp.json()
points_json["value"]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]