In [None]:
%reload_ext autoreload
%autoreload 2

# HDF functionalities

This module provides a common interface to access HDF files. It can be imported as follows:

In [None]:
import alphabase.io.hdf

# Other packages used to demonstrate functionality
import numpy as np
import pandas as pd
import os

Instead of relying directly on the `h5py` interface, we will use an HDF wrapper file to provide consistent access to only those specific HDF features we want. Since components of an HDF file come in three shapes `datasets`, `groups` and `attributes`, we will first define a generic HDF wrapper object to handle these components. Once this is done, the HDF wrapper file can be treated as such an object with additional features to open and close the initial connection.

In [None]:
#| hide

import tempfile

In [None]:
#| hide

TEMPDIR = tempfile.gettempdir()

hdf_file_name = os.path.join(TEMPDIR, "sandbox.hdf")
hdf_file = alphabase.io.hdf.HDF_File(
    hdf_file_name,
    read_only=False,
    truncate=True,
    delete_existing=True
)
np.testing.assert_equal(len(hdf_file), 0)
file_size = os.path.getsize(hdf_file_name)
hdf_file.attr1 = 1
np.testing.assert_equal(hdf_file.attr1, 1)
file_size, old_file_size = os.path.getsize(hdf_file_name), file_size
assert file_size > old_file_size, "Filesize not increased"
np.random.seed(42)
array = np.random.rand(10)
hdf_file.array = array
np.testing.assert_equal(array, hdf_file.array.values)
np.testing.assert_equal(array[:3], hdf_file.array[:3])
np.testing.assert_equal((10,), hdf_file.array.shape)
file_size, old_file_size = os.path.getsize(hdf_file_name), file_size
assert file_size > old_file_size, "Filesize not increased"
hdf_file.array.array_attr = "some attr"
np.testing.assert_equal(hdf_file.array.array_attr, "some attr")
file_size, old_file_size = os.path.getsize(hdf_file_name), file_size
assert file_size > old_file_size, "Filesize not increased"
group = {
    "subgroup1": {
        "subsubgroup": {},
        "same_array": array,
        "a_bool": True
    },
    "subgroup2": {}
}
hdf_file.group = group
file_size, old_file_size = os.path.getsize(hdf_file_name), file_size
assert file_size > old_file_size, "Filesize not increased"
np.testing.assert_equal(hdf_file.group.subgroup1.a_bool, True)
np.testing.assert_equal(len(hdf_file.group.subgroup1), 2)
df = pd.DataFrame(
    {
        "col2": np.arange(3),
        "col_str": ["str", "i", "ngs"],
    }
)
hdf_file.dfs = {}
hdf_file.dfs.df_df = df
hdf_file.dfs.df_df.hash_seed = 1337
hdf_file.dfs.df_df.data_from = "colleagues"
file_size, old_file_size = os.path.getsize(hdf_file_name), file_size
assert file_size > old_file_size, "Filesize not increased"
assert hdf_file.dfs.df_df.values.equals(df)

try:
    hdf_file.dfs.df = df
except TypeError as e:
    print(str(e))

In [None]:
#| hide
hdf_file_name = os.path.join(TEMPDIR, "sandbox.hdf")
hdf_file = alphabase.io.hdf.HDF_File(
    hdf_file_name,
)
np.testing.assert_equal(hdf_file.attr1, 1)
np.random.seed(42)
array = np.random.rand(10)
np.testing.assert_equal(array, hdf_file.array.values)
np.testing.assert_equal(array[:3], hdf_file.array[:3])
np.testing.assert_equal((10,), hdf_file.array.shape)
np.testing.assert_equal(hdf_file.array.array_attr, "some attr")
np.testing.assert_equal(hdf_file.group.subgroup1.a_bool, True)
np.testing.assert_equal(len(hdf_file.group.subgroup1), 2)
df = pd.DataFrame(
    {
        "col2": np.arange(3),
        "col_str": ["str", "i", "ngs"],
    }
)
assert hdf_file.dfs.df_df.hash_seed == 1337
assert hdf_file.dfs.df_df.data_from == "colleagues"
assert hdf_file.dfs.df_df.values.equals(df)
assert hdf_file.dfs.__getattribute__("df_df").values.equals(df)
assert hdf_file.__getattribute__('dfs').__getattribute__("df_df").values.equals(df)
