To process a folder containing multiple HDF5 (.h5) files, extract the 2D arrays from the 4D arrays within each file, and save them as NumPy arrays with appropriate names that include the original HDF5 filenames

Select the Python Interpreter:

Press F1 to open the Command Palette.
Type Python: Select Interpreter and select it.
Choose the interpreter from your virtual environment (myenv/bin/python)

In [None]:
!pip install h5py numpy



In [None]:
import h5py
import numpy as np
import os
import glob

def save_2d_arrays_from_hdf5_folder(hdf5_folder_path, output_dir):
    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Get a list of all .h5 files in the folder
    hdf5_filepaths = glob.glob(os.path.join(hdf5_folder_path, '*.h5'))

    # Check if any .h5 files were found
    if not hdf5_filepaths:
        print(f"No .h5 files found in {hdf5_folder_path}")
        return

    # Iterate over each .h5 file
    for hdf5_filepath in hdf5_filepaths:
        # Extract the base name of the HDF5 file (without the extension)
        base_name = os.path.splitext(os.path.basename(hdf5_filepath))[0]

        # Open the HDF5 file
        with h5py.File(hdf5_filepath, 'r') as h5_file:
            # Check if 'data' dataset exists in the file
            if 'image_tensor' in h5_file:
                data = h5_file['image_tensor'][:]
            else:
                print(f"Dataset 'image_tensor' not found in {hdf5_filepath}")
                continue  # Skip to the next file

            # Check the shape of the 4D array
            print(f"Processing {hdf5_filepath}, shape of the 4D array: {data.shape}")

            # Iterate over the first two dimensions to extract 2D arrays
            for i in range(data.shape[0]):
                for j in range(data.shape[1]):
                    # Extract the 2D array
                    array_2d = data[i, j, :, :]

                    # Create a filename that includes the HDF5 file name and indices
                    array_filename = os.path.join(
                        output_dir, f'{base_name}_array_{i}_{j}.npy'
                    )

                    # Save the 2D array as a .npy file
                    np.save(array_filename, array_2d)

                    # Optionally, print the name of the saved array file
                    print(f"Saved {array_filename}")

    print("All arrays have been saved.")




Processing /content/drive/MyDrive/Test_h5/SW_PMTEST_001.h5, shape of the 4D array: (2, 25, 288, 288)
Saved extracted_arrays/SW_PMTEST_001_array_0_0.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_1.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_2.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_3.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_4.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_5.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_6.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_7.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_8.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_9.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_10.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_11.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_12.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_13.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_14.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_15.npy
Saved extracted_arrays/SW_PMTEST_001_array_0_16.npy
Saved extracted_arrays/SW

In [None]:
hdf5_folder_path = '/home/shirlyn/shuo/Test_h5'
output_dir = '/home/shirlyn/shuo/extracted_arrays'
save_2d_arrays_from_hdf5_folder(hdf5_folder_path, output_dir)

Processing /content/drive/MyDrive/Test_h5/SW_PMTEST_001.h5, shape of the 4D array: (2, 25, 288, 288)
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_0.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_1.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_2.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_3.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_4.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_5.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_6.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_7.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_8.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_9.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_10.npy
Saved /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_11.npy
Saved /co

1. check out https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix
2. format the data as aligned_dataset
2. use pix2pix model to train, refer to scripts/train_pix2pix.py