In [1]:
%reload_ext autoreload
%autoreload 2

# HDF

This module provides a common interface to access HDF files. It can be imported as follows:

In [2]:
import alphabase.io.hdf

# Other packages used to demonstrate functionality
import numpy as np
import pandas as pd
import re
import os
import contextlib

Instead of relying directly on the `h5py` interface, we will use an HDF wrapper file to provide consistent access to only those specific HDF features we want. Since components of an HDF file come in three shapes `datasets`, `groups` and `attributes`, we will first define a generic HDF wrapper object to handle these components. Once this is done, the HDF wrapper file can be treated as such an object with additional features to open and close the initial connection.

In [3]:
#hide

import tempfile
TEMPDIR = tempfile.gettempdir()

def test_HDF_creation():
    hdf_file_name = os.path.join(TEMPDIR, "sandbox.hdf")
    hdf_file = alphabase.io.hdf.HDF_File(
        hdf_file_name,
        read_only=False,
        truncate=True,
        delete_existing=True
    )
    np.testing.assert_equal(len(hdf_file), 0)
    file_size = os.path.getsize(hdf_file_name)
    hdf_file.attr1 = 1
    np.testing.assert_equal(hdf_file.attr1, 1)
    file_size, old_file_size = os.path.getsize(hdf_file_name), file_size
    assert file_size > old_file_size, "Filesize not increased"
    np.random.seed(42)
    array = np.random.rand(10)
    hdf_file.array = array
    np.testing.assert_equal(array, hdf_file.array.values)
    np.testing.assert_equal(array[:3], hdf_file.array[:3])
    np.testing.assert_equal((10,), hdf_file.array.shape)
    file_size, old_file_size = os.path.getsize(hdf_file_name), file_size
    assert file_size > old_file_size, "Filesize not increased"
    hdf_file.array.array_attr = "some attr"
    np.testing.assert_equal(hdf_file.array.array_attr, "some attr")
    file_size, old_file_size = os.path.getsize(hdf_file_name), file_size
    assert file_size > old_file_size, "Filesize not increased"
    group = {
        "subgroup1": {
            "subsubgroup": {},
            "same_array": array,
            "a_bool": True
        },
        "subgroup2": {}
    }
    hdf_file.group = group
    file_size, old_file_size = os.path.getsize(hdf_file_name), file_size
    assert file_size > old_file_size, "Filesize not increased"
    np.testing.assert_equal(hdf_file.group.subgroup1.a_bool, True)
    np.testing.assert_equal(len(hdf_file.group.subgroup1), 2)
    df = pd.DataFrame(
        {
            "col2": np.arange(3),
            "col_str": ["str", "i", "ngs"],
        }
    )
    hdf_file.df = df
    file_size, old_file_size = os.path.getsize(hdf_file_name), file_size
    assert file_size > old_file_size, "Filesize not increased"
    assert hdf_file.df.values.equals(df)
    
test_HDF_creation()

  with h5py.File(file_name, mode, swmr=True):


In [4]:
#hide

def test_HDF_reading():
    hdf_file_name = os.path.join(TEMPDIR, "sandbox.hdf")
    hdf_file = alphabase.io.hdf.HDF_File(
        hdf_file_name,
    )
    np.testing.assert_equal(hdf_file.attr1, 1)
    np.random.seed(42)
    array = np.random.rand(10)
    np.testing.assert_equal(array, hdf_file.array.values)
    np.testing.assert_equal(array[:3], hdf_file.array[:3])
    np.testing.assert_equal((10,), hdf_file.array.shape)
    np.testing.assert_equal(hdf_file.array.array_attr, "some attr")
    np.testing.assert_equal(hdf_file.group.subgroup1.a_bool, True)
    np.testing.assert_equal(len(hdf_file.group.subgroup1), 2)
    df = pd.DataFrame(
        {
            "col2": np.arange(3),
            "col_str": ["str", "i", "ngs"],
        }
    )
    assert hdf_file.df.values.equals(df)
    
    
test_HDF_reading()

Basic usage:
> ```
> #Create a new hdf file and write data to it
> hdf_file = alphabase.io.hdf.HDF_File('test.hdf', read_only = False)
> hdf_file.a = np.array([1,2,3])
> hdf_file.b = np.array([4,5,6])
> 
> #Read data, show components and access data:
> hdf_file = alphabase.io.hdf.HDF_File('test.hdf')
> print(hdf_file.components)
> print(hdf_file.a.values)
> ```