In [7]:
import h5py
import os
import json
import numpy as np

# Path to the HDF5 file
hdf5_file = "HDF5_data.h5"

# Output directory to recreate the structure
output_directory = "Recreated_Directory"

# Function to create nested directories based on group structure
def create_nested_directories(path):
    if not os.path.exists(path):
        os.makedirs(path)

# Recursive function to handle groups and datasets
def process_group(group, group_path):
    create_nested_directories(group_path)

    for name, item in group.items():
        if isinstance(item, h5py.Dataset):
            dataset_name = name
            _, extension = os.path.splitext(dataset_name)
            file_path = os.path.join(group_path, dataset_name)

            if extension in [".csv", ".txt", ".xls", ".xlsx"]:
                data = item[()]
                with open(file_path, "wb") as file:
                    file.write(data)

            elif extension in [".jpg", ".jpeg", ".png"]:
                data = item[()]
                with open(file_path, "wb") as file:
                    file.write(data.tobytes())

            else:
                data = item[()]
                with open(file_path, "wb") as file:
                    file.write(data)

        elif isinstance(item, h5py.Group):
            sub_group_path = os.path.join(group_path, name)
            process_group(item, sub_group_path)

# Open HDF5 file in read mode
with h5py.File(hdf5_file, "r") as f:
    # Retrieve the metadata JSON from the attribute
    metadata_str = f.attrs.get("Schema_json", "")
    metadata = json.loads(metadata_str)

    # Recreate the directory structure
    for group_name, group in f.items():
        group_path = os.path.join(output_directory, group_name)
        process_group(group, group_path)