# 5 Groups and Links

Objectives:
 * Create groups and subgroups
 * List contents of a group
 * Iterate though all objects in a file

In [1]:
USE_H5PY = True # set to False to use HSDS instead
if USE_H5PY:
    import h5py
    WORK_DIR="."  # this directory
else:
    import h5pyd as h5py
    WORK_DIR="hdf5://home/test_user1/"
import os.path as op

In [2]:
filepath = op.join(WORK_DIR, "05.h5")
print(f"creating HDF5 file here: {filepath}")
f = h5py.File(filepath, 'w')
f.id.id

creating HDF5 file here: ./05.h5


72057594037927936

In [3]:
# the file handle is also the root group
if isinstance(f, h5py.Group):
    print("f is a Group instance")
else:
    print("this is unexpected!")

f is a Group instance


In [4]:
# For h5py, the id is an integer.
# For h5pyd, the id is a UUID.  The "g-" prefix of the id indicates that this is a group

f.id.id

72057594037927936

In [5]:
list(f)  # no groups in root (yet)!

[]

In [6]:
# create two groups
if "g1" not in f:
    f.create_group("g1")
if "g2" not in f:
    f.create_group("g2") 
g1 = f["g1"]
g2 = f["g2"]
print("g1 id:", g1.id.id)
print("g2 id:", g2.id.id)   

g1 id: 144115188075855874
g2 id: 144115188075855875


In [7]:
list(f)

['g1', 'g2']

Problem:  Use the hsls or h5ls to list contents of file 

In [8]:
# each object has a name property that gives the h5path of the object 
g1.name

'/g1'

In [9]:
# create a scalar (just one element) dataset
f.create_dataset("dset1", data=42)

<HDF5 dataset "dset1": shape (), type "<i8">

In [10]:
# now the root group has two sub-groups and one dataset
# we can do a typical Python iteration
for x in f:
    print(x, f[x].id.id)

dset1 360287970189639681
g1 144115188075855878
g2 144115188075855879


In [11]:
list(f)

['dset1', 'g1', 'g2']

In [12]:
# You can treat groups like Python dictionaries.
# Look up an item by providing the name
f["dset1"].dtype

dtype('int64')

In [13]:
# every object has a file property that will lead you back to the file
# it belongs to (in case you got lost)
g1.file

<HDF5 file "05.h5" (mode r+)>

In [14]:
# groups can have sub-groups
g1.create_group("g1.1")

<HDF5 group "/g1/g1.1" (0 members)>

In [15]:
# you can address it by name from the parent group
g1["g1.1"].id.id

144115188075855882

In [16]:
# of by an h5path from root
f["/g1/g1.1"].id.id

144115188075855883

In [17]:
g2 = f["g2"]
g1_1 = g1["g1.1"]
# You can create links to existing objects
g2["g1.1"] = g1_1

Problem: Run hsls or h5ls on this file with the -r option

In [18]:
# soft links create a link by name that may or may not refer to an actual object
f["softlink"] = h5py.SoftLink("/g2/dset")

In [19]:
# the links show up in a list
list(f)

['dset1', 'g1', 'g2', 'softlink']

In [20]:
# but in this case it doesn't refer to anything
try:
    print("softlink:", f["softlink"])
except KeyError as ke:
    print("error:", ke)

error: 'Unable to open object (component not found)'


In [21]:
# external links can point to an object in another file
link_filepath = op.join(WORK_DIR, "tall.h5")
f["external_link"] = h5py.ExternalLink(link_filepath, "/g1/g1.1/dset1.1.1")
print(link_filepath)

./tall.h5


In [22]:
# now you can reference it as if it was a part of the file
print("external shape:", f["external_link"].shape)

external shape: (10, 10)


In [23]:
# To iterate through all the members of a group recursively use the visit method
def printname(name):
    print(name)
f.visit(printname)

dset1
g1
g1/g1.1
g2


In [24]:
# you can create an anonymous HDF object - i.e. an object that has no links to it
anon_dset = f.create_dataset(None, data="abc")

In [25]:
# if you have a reference to it, you can use it in the usual way
anon_dset[()]

b'abc'

In [26]:
# they don't have a name though
anon_dset.name

In [27]:
anon_dset_id = anon_dset.id.id

In [28]:
# In the library, anonymous datasets will not be persisted
# In HSDS though, they will, and can be accessed using the uuid
f.close() # re-open file
f = h5py.File(filepath)

try:
    anon_dset = f[f"datasets/{anon_dset_id}"]
    anon_dset[()]
except KeyError:
    print("this trick doesn't work with the HDF5 library")



this trick doesn't work with the HDF5 library
