In [None]:
import h5py
import numpy as np
import pandas as pd

def save_h5(data_dict, filename):
    """
    Save a dictionary to HDF5, handling:
    - Nested dictionaries
    - DataFrames (numeric, string, mixed)
    - Lists (including jagged)
    - NumPy arrays and scalars
    All object/string data is stored as UTF-8 variable-length strings for seamless loading.
    """
    def save_item(group, key, item):
        if isinstance(item, dict):
            subgroup = group.create_group(key)
            for subkey, subitem in item.items():
                save_item(subgroup, subkey, subitem)

        elif isinstance(item, pd.DataFrame):
            subgroup = group.create_group(key)
            # Save column names as UTF-8 strings
            subgroup.attrs['columns'] = np.array(item.columns, dtype='S')
            dt = h5py.string_dtype(encoding='utf-8')
            # Convert all cells to strings for HDF5, including numeric values
            data_as_strings = np.array([[str(cell) for cell in row] for row in item.values], dtype=object)
            subgroup.create_dataset('data', data=data_as_strings, dtype=dt)

        elif isinstance(item, list):
            dt = h5py.string_dtype(encoding='utf-8')
            # Convert list elements to strings for storage (works for mixed/jagged lists)
            arr = np.array([str(x) for x in item], dtype=object)
            group.create_dataset(key, data=arr, dtype=dt)

        elif isinstance(item, np.ndarray):
            if item.dtype == object:
                dt = h5py.string_dtype(encoding='utf-8')
                arr = np.array([str(x) for x in item.flat], dtype=object).reshape(item.shape)
                group.create_dataset(key, data=arr, dtype=dt)
            else:
                group.create_dataset(key, data=item)

        else:
            # Scalars
            group.create_dataset(key, data=item)

    with h5py.File(filename, "w") as f:
        for key, value in data_dict.items():
            save_item(f, key, value)
