# 5 Groups and Links

Objectives:
 * Create groups and subgroups
 * List contents of a group
 * Iterate though all objects in a file

In [1]:
USE_H5PY=1  # set to 0 to use HDF Server instead

In [2]:
if USE_H5PY:
    import h5py
else:
    import h5pyd as h5py
import os

In [3]:
#
# Get folder/directory for HDF files we create  
#
def getMyFolder():
    DIR_NAME = "HDFLabTutorial/"
    if USE_H5PY:
        myfolder = os.getenv("HOME") + "/" + DIR_NAME
        if not os.path.isdir(myfolder):
            # create a directory on the local disk if needed
            print("created folder:", myfolder)
            os.mkdir(myfolder)
    else:
        dir = h5py.Folder('/home/')  # get folder object for root
        username = os.getenv("JUPYTERHUB_USER")
        myfolder = None
        for name in dir:
            # we should come across the given domain
            if username.startswith(name):
                # check any folders where the name matches at least part of the username
                # e.g. folder: "/home/bob/" for username "bob@acme.com"
                path = '/home/' + name + '/'
                f = h5py.Folder(path)
                if f.owner == username:
                    myfolder = path
                f.close()
                if myfolder:
                    break

        dir.close()
    
        # create a workshop subfolder if not already present
        myfolder += DIR_NAME
        try:
            h5py.Folder(myfolder)
        except IOError as ioe:
            if ioe.errno != 404:
                return None  # unexpected error
            # not present - create it now
            h5py.Folder(myfolder, mode='x')
            print("created folder:", myfolder)
       
    return myfolder

In [4]:
# Get your home folder
# will be a posix directory is H5PY is 1, or a server folder if 0
home = getMyFolder()
home  # this is the folder where you have permission to write to

'/home/jovyan/HDFLabTutorial/'

In [5]:
# create a file on the disk, or a domain on the server (based on USE_H5PY)
filename = home + "05.h5"
f = h5py.File(filename, 'w')

In [6]:
# the file handle is also the root group
if isinstance(f,h5py.Group):
    print("f is a Group instance")
else:
    print("this is unexpected!")

f is a Group instance


In [7]:
# For h5py, the id is an integer.
# For h5pyd, the id is a UUID.  The "g-" prefix of the id indicates that this is a group

f.id.id

72057594037927936

In [8]:
list(f)  # no groups in root (yet)!

[]

In [9]:
# create two groups
if "g1" not in f:
    f.create_group("g1")
if "g2" not in f:
    f.create_group("g2") 
g1 = f["g1"]
g2 = f["g2"]
print("g1 id:", g1.id.id)
print("g2 id:", g2.id.id)   

g1 id: 144115188075855874
g2 id: 144115188075855875


In [10]:
list(f)

['g1', 'g2']

Problem:  Use the hsls or h5ls to list contents of file 

In [11]:
# each object has a name property that gives the h5path of the object 
g1.name

'/g1'

In [12]:
# create a scalar (just one element) dataset
f.create_dataset("dset1", data=42)

<HDF5 dataset "dset1": shape (), type "<i8">

In [13]:
# now the root group has two sub-groups and one dataset
# we can do a typical Python iteration
for x in f:
    print(x, f[x].id.id)

dset1 360287970189639681
g1 144115188075855878
g2 144115188075855879


In [14]:
# You can treat groups like Python dictionaries.
# Look up an item by providing the name
f["dset1"].dtype

dtype('int64')

In [15]:
# every object has a file property that will lead you back to the file
# it belongs to (in case you got lost)
g1.file

<HDF5 file "05.h5" (mode r+)>

In [16]:
# groups can have sub-groups
g1.create_group("g1.1")

<HDF5 group "/g1/g1.1" (0 members)>

In [17]:
# you can address it by name from the parent group
g1["g1.1"].id.id

144115188075855881

In [18]:
# of by an h5path from root
f["/g1/g1.1"].id.id

144115188075855882

In [19]:
g2 = f["g2"]
g1_1 = g1["g1.1"]
# You can create links to existing objects
g2["g1.1"] = g1_1

Problem: Run hsls or h5ls on this file with the -r option

In [20]:
# soft links create a link by name that may or may not refer to an actual object
f["softlink"] = h5py.SoftLink("/g2/dset")

In [21]:
# the links show up in a list
list(f)

['dset1', 'g1', 'g2', 'softlink']

In [22]:
# but in this case it doesn't refer to anything
try:
    print("softlink:", f["softlink"])
except KeyError as ke:
    print("error:", ke)

error: 'Unable to open object (component not found)'


In [23]:
# external links can point to an object in another file
# (This link won't be valid for h5py, but works with h5pyd)
f["external_link"] = h5py.ExternalLink("/shared/tall.h5", "/g1/g1.1/dset1.1.1")

In [24]:
# now you can reference it as if it was a part of the file
if not USE_H5PY:
    print("external shape:", f["external_link"].shape)

In [25]:
# To iterate through all the members of a group use the visit method
def printname(name):
    print(name)
f.visit(printname)

dset1
g1
g1/g1.1
g2
