# HDF5 I/O with h5io

This notebook demonstrates how to save and load arrays and DataFrames using `neural_analysis.utils.h5io` and how to filter pairs when loading via `load_hdf5`.

In [1]:
# Imports and helpers
from pathlib import Path
import tempfile
import numpy as np
import pandas as pd
from neural_analysis.utils import h5io
from neural_analysis.utils.io import load_hdf5

tmpdir = tempfile.TemporaryDirectory()
base = Path(tmpdir.name)
print("Using temp dir:", base)

Using temp dir: /tmp/tmpl6wbuqn_


In [2]:
# Example 1: Array roundtrip with labels and attrs
path = base / 'array_demo.h5'
data = np.random.randn(100, 10).astype(np.float32)
labels = np.array([f'sample_{i}' for i in range(data.shape[0])])
attrs = {
    'description': 'random normal features',
    'version': 1,
    'metadata': {'source': 'synthetic', 'dims': list(data.shape)}
}

# Save
h5io(path, task='save', data=data, labels=labels, attrs=attrs)
print('Saved to', path)

# Load
loaded_data, loaded_labels = h5io(path, task='load')

# Validate roundtrip
assert isinstance(loaded_data, np.ndarray)
np.testing.assert_allclose(loaded_data, data)
assert list(loaded_labels) == list(labels)
print('Array roundtrip OK:', loaded_data.shape)

Saved to /tmp/tmpl6wbuqn_/array_demo.h5
Array roundtrip OK: (100, 10)


In [3]:
# Example 2: DataFrame roundtrip
path_df = base / 'df_demo.h5'
df = pd.DataFrame({
    'neuron_id': [f'n{i}' for i in range(5)],
    'firing_rate': np.random.rand(5),
    'condition': ['A', 'B', 'A', 'B', 'A'],
})
labels_df = ['trial_1', 'trial_2', 'trial_3', 'trial_4', 'trial_5']

h5io(path_df, task='save', data=df, labels=labels_df)
loaded_df, loaded_labels_df = h5io(path_df, task='load')

# Validate
assert isinstance(loaded_df, pd.DataFrame)
pd.testing.assert_frame_equal(loaded_df.reset_index(drop=True), df.reset_index(drop=True))
assert list(loaded_labels_df) == labels_df
print('DataFrame roundtrip OK:', loaded_df.shape)

DataFrame roundtrip OK: (5, 3)


In [4]:
# Example 3: Filtering pairs on load (via load_hdf5)
path_pairs = base / 'pairs_demo.h5'
pairs_df = pd.DataFrame({
    'item_i': ['A', 'A', 'B', 'C'],
    'item_j': ['B', 'C', 'C', 'D'],
    'score': [0.1, 0.8, 0.5, 0.9],
})
h5io(path_pairs, task='save', data=pairs_df, labels=None)

wanted = [('A','C'), ('B','C')]
(loaded_filtered, _), attrs = load_hdf5(path_pairs, filter_pairs=wanted, return_attrs=True)
print('Filtered rows:')
display(loaded_filtered)

# Validate only desired pairs
assert set(zip(loaded_filtered['item_i'], loaded_filtered['item_j'])) == set(wanted)
print('Filter pairs OK')

Filtered rows:


Unnamed: 0,item_i,item_j,score
0,A,C,0.8
1,B,C,0.5


Filter pairs OK
