In [1]:
import os
import h5py
from shutil import copyfile

def compress_and_copy_h5(source_path, dest_path, compression='gzip', compression_opts=9):
    with h5py.File(source_path, 'r') as h5file_source:
        # Create the destination directory if it doesn't exist
        dest_directory = os.path.join(dest_path, os.path.relpath(os.path.dirname(source_path), source_directory))
        os.makedirs(dest_directory, exist_ok=True)

        # Create a new HDF5 file in the destination directory
        dest_file_path = os.path.join(dest_directory, os.path.basename(source_path))
        with h5py.File(dest_file_path, 'w') as h5file_dest:
            # Copy each dataset from the source file to the destination file
            for dataset_name, dataset in h5file_source.items():
                # Create a new dataset in the destination file with compression
                h5file_dest.create_dataset(
                    name=dataset_name,
                    data=dataset[...],  # Use [...] to read the entire dataset
                    compression=compression,
                    compression_opts=compression_opts
                )

                # Copy attributes from the source dataset to the destination dataset
                for attr_name, attr_value in dataset.attrs.items():
                    h5file_dest[dataset_name].attrs[attr_name] = attr_value

def find_h5_files(directory):
    h5_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".h5"):
                h5_files.append(os.path.join(root, file))
    return h5_files

# Source directory containing HDF5 files
source_directory = "/data02/gkim/stem_cell_jwshin/data/23_SEC1H5_wider_v3_allh_onRA"

# Destination directory for compressed HDF5 files
destination_directory = "/data02/gkim/stem_cell_jwshin/data/23_SEC1H5_wider_v3_allh_onRA_gzip1"

# Specify compression settings (change as needed)
compression_algorithm = 'gzip'
compression_level = 1

# Find all HDF5 files in the source directory and its subdirectories
h5_files_to_copy = find_h5_files(source_directory)

# Copy and compress each HDF5 file to the destination directory
for h5_file in h5_files_to_copy:
    compress_and_copy_h5(h5_file, destination_directory, compression=compression_algorithm, compression_opts=compression_level)

print("Compression and copy completed.")

In [None]:
import os
import h5py
from shutil import copyfile

def compress_and_copy_h5(source_path, dest_path, compression='gzip', compression_opts=9):
    with h5py.File(source_path, 'r') as h5file_source:
        # Create the destination directory if it doesn't exist
        dest_directory = os.path.join(dest_path, os.path.relpath(os.path.dirname(source_path), source_directory))
        os.makedirs(dest_directory, exist_ok=True)

        # Create a new HDF5 file in the destination directory
        dest_file_path = os.path.join(dest_directory, os.path.basename(source_path))
        with h5py.File(dest_file_path, 'w') as h5file_dest:
            # Copy each dataset from the source file to the destination file
            for dataset_name, dataset in h5file_source.items():
                # Create a new dataset in the destination file with compression
                h5file_dest.create_dataset(
                    name=dataset_name,
                    data=dataset[...],  # Use [...] to read the entire dataset
                    compression=compression,
                    compression_opts=compression_opts
                )

                # Copy attributes from the source dataset to the destination dataset
                for attr_name, attr_value in dataset.attrs.items():
                    h5file_dest[dataset_name].attrs[attr_name] = attr_value

def find_h5_files(directory):
    h5_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".h5"):
                h5_files.append(os.path.join(root, file))
    return h5_files

# Source directory containing HDF5 files
source_directory = "/data02/gkim/stem_cell_jwshin/data/23_SEC1H5_wider_v3_testiPSC"

# Destination directory for compressed HDF5 files
destination_directory = "/data02/gkim/stem_cell_jwshin/data/23_SEC1H5_wider_v3_allh_RA_gzip1"

# Specify compression settings (change as needed)
compression_algorithm = 'gzip'
compression_level = 1

# Find all HDF5 files in the source directory and its subdirectories
h5_files_to_copy = find_h5_files(source_directory)

# Copy and compress each HDF5 file to the destination directory
for h5_file in h5_files_to_copy:
    compress_and_copy_h5(h5_file, destination_directory, compression=compression_algorithm, compression_opts=compression_level)

print("Compression and copy completed.")