# Types

Objectives
 * Learn more about the HDF5 type system
 * Create fixed-length string datasets
 * Create compound type datasets

In [2]:
USE_H5PY=1  # Set to 0 to use HDF Server
if USE_H5PY:
    import h5py
else:
    import h5pyd as h5py
import numpy as np
import os

In [3]:
#
# Get folder/directory for HDF files we create  
#
def getMyFolder():
    DIR_NAME = "HDFLabTutorial/"
    if USE_H5PY:
        myfolder = os.getenv("HOME") + "/" + DIR_NAME
        if not os.path.isdir(myfolder):
            # create a directory on the local disk if needed
            print("created folder:", myfolder)
            os.mkdir(myfolder)
    else:
        dir = h5py.Folder('/home/')  # get folder object for root
        username = os.getenv("JUPYTERHUB_USER")
        myfolder = None
        for name in dir:
            # we should come across the given domain
            if username.startswith(name):
                # check any folders where the name matches at least part of the username
                # e.g. folder: "/home/bob/" for username "bob@acme.com"
                path = '/home/' + name + '/'
                f = h5py.Folder(path)
                if f.owner == username:
                    myfolder = path
                f.close()
                if myfolder:
                    break

        dir.close()
    
        # create a workshop subfolder if not already present
        myfolder += DIR_NAME
        try:
            h5py.Folder(myfolder)
        except IOError as ioe:
            if ioe.errno != 404:
                return None  # unexpected error
            # not present - create it now
            h5py.Folder(myfolder, mode='x')
            print("created folder:", myfolder)
       
    return myfolder

In [4]:
# Get your home folder
# will be a posix directory is H5PY is 1, or a server folder if 0
home = getMyFolder()
home  # this is the folder where you have permission to write to

'/home/jovyan/HDFLabTutorial/'

In [5]:
# create a file on the disk, or a domain on the server (based on USE_H5PY)
filename = home + "07.h5"
f = h5py.File(filename, 'w')

In [6]:
# Datasets with fixed-width strings are efficient to index
# Create them using the numpy Snn type (where nn is the width of the string)
dt = np.dtype("S10")
dset = f.create_dataset("fixed_str", (4,), dtype=dt)

In [7]:
dset[...]

array([b'', b'', b'', b''], dtype='|S10')

In [9]:
# you can store up to 10 character long strings (exclusive of a null terminator)
# any string that is too long will be silently truncated
# Note use the b prefix to denote byte strings as opposed to Unicode
dset[0] = b"0123456789"
dset[1] = b"ABCD"
dset[2] = b"hi, bye"
dset[3] = b"This string is too long to fit"

In [10]:
dset[...]

array([b'0123456789', b'ABCD', b'hi, bye', b'This strin'], dtype='|S10')

TODO: Example for variable length strings

In [11]:
# Rather than just primitive types (floats, ints, and strings), sometimes
# it is nice to bundle together multiple types as with a C struct.
# We can do that using compound types
dt = np.dtype([("pressure", np.float), ("count", np.int)])
dset = f.create_dataset("compound", (10,), dtype=dt)
 

In [12]:
# The type is an array of other types (possibly compound themselves!)
dset.dtype

dtype([('pressure', '<f8'), ('count', '<i8')])

In [13]:
dset[0] = (32.8, 12)

In [14]:
dset[...]

array([(32.8, 12), ( 0. ,  0), ( 0. ,  0), ( 0. ,  0), ( 0. ,  0),
       ( 0. ,  0), ( 0. ,  0), ( 0. ,  0), ( 0. ,  0), ( 0. ,  0)],
      dtype=[('pressure', '<f8'), ('count', '<i8')])

Problem: Add a third field to the above type and create a new dataset using it