# Types

Objectives
 * Learn more about the HDF5 type system
 * Create fixed-length string datasets
 * Create compound type datasets

In [None]:
USE_H5PY = True # set to False to use HSDS instead
if USE_H5PY:
    import h5py
    WORK_DIR="."  # this directory
else:
    import h5pyd as h5py
    WORK_DIR="hdf5://home/test_user1/"
import os.path as op
import numpy as np

In [None]:
filepath = op.join(WORK_DIR, "07.h5")
print(f"creating HDF5 file here: {filepath}")
f = h5py.File(filepath, 'w')
f.id.id

In [None]:
# Datasets with fixed-width strings are efficient to index
# Create them using the numpy Snn type (where nn is the width of the string)
dt = np.dtype("S10")
dset = f.create_dataset("fixed_str", (4,), dtype=dt)

In [None]:
dset[...]

In [None]:
# with the S10 type you can store up to 10 character long strings (exclusive of a null terminator)
# any string that is too long will be silently truncated
# Note use the b prefix to denote byte strings as opposed to Unicode
dset[0] = b"0123456789"
dset[1] = b"ABCD"
dset[2] = b"hi, bye"
dset[3] = b"This string is too long to fit"

In [None]:
dset[...]

You can use a special type to create datasets (or attributes)
where each element can be of arbitrary length.

In [None]:
dt = h5py.special_dtype(vlen=str)
dset = f.create_dataset("vlen_str", (4,), dtype=dt)
dset[0] = b"hi"
dset[1] = b"bye"
dset[2] = b"3.14"
dset[3] = b"3.141592653589793"
dset[...]

In [None]:
# Rather than just primitive types (floats, ints, and strings), sometimes
# it is nice to bundle together multiple types as with a C struct.
# We can do that using compound types
dt = np.dtype([("pressure", float), ("count", int)])
dset = f.create_dataset("compound", (10,), dtype=dt)
 

In [None]:
# The type is an array of other types (possibly compound themselves!)
dset.dtype

In [None]:
dset[0] = (32.8, 12)

In [None]:
dset[...]

Problem: Add a third field to the above type and create a new dataset using it