# Opening HDF5 Files
This tests the ability to open an HDF5 file using for-loops. I also converted the contents of the HDF5 file to a dictionary and printed the summary of the dictionary to a file.

In [1]:
import h5py

In [2]:
# filenames = ['GEDI_sample_files/GEDI01_B_2022004042652_O17343_04_T10772_02_005_02_V002.h5',
#              'GEDI_sample_files/GEDI01_B_2022207041426_O20491_04_T09293_02_005_03_V002.h5',
#              'GEDI_sample_files/GEDI02_A_2021050140102_O12405_02_T10912_02_003_02_V002.h5',
#              'GEDI_sample_files/GEDI02_A_2021086153349_O12964_03_T08275_02_003_02_V002.h5',
#              'GEDI_sample_files/GEDI04_A_2021009022644_O11762_03_T01637_02_002_02_V002.h5',
#              'GEDI_sample_files/GEDI04_A_2022106075705_O18927_04_T10647_02_003_01_V002.h5']

filenames = ['GEDI_sample_files/GEDI02_A_2021050140102_O12405_02_T10912_02_003_02_V002.h5',
             'GEDI_sample_files/GEDI02_A_2021086153349_O12964_03_T08275_02_003_02_V002.h5']

In [3]:
def print_group_contents(group, indent=0):
    for key in group:
        item = group[key]
        print("  " * indent + f"{key}: {'Group' if isinstance(item, h5py.Group) else 'Dataset'}")
        if isinstance(item, h5py.Group):
            print_group_contents(item, indent + 1)
        else:
            print("  " * (indent + 1) + f"Shape: {item.shape}, Type: {item.dtype}")

def save_group_contents(group, indent=0, file=None):
    """
    Recursively saves the contents of an h5 group to a file.
    """
    for key in group:
        item = group[key]
        line = "  " * indent + f"{key}: {'Group' if isinstance(item, h5py.Group) else 'Dataset'}\n"
        file.write(line)
        if isinstance(item, h5py.Group):
            save_group_contents(item, indent + 1, file)
        else:
            file.write("  " * (indent + 1) + f"Shape: {item.shape}, Type: {item.dtype}\n")
            data_preview = item[:10]  # Get the first 10 values
            file.write("  " * (indent + 1) + f"First 10 values: {data_preview}\n")
            
# def save_rh_contents(group, indent=0, file=None):
#     """
#     Recursively saves the contents of an h5 group to a file.
#     """
#     for key in group:
#         item = group[key]
#         line = "  " * indent + f"{key}: {'Group' if isinstance(item, h5py.Group) else 'Dataset'}\n"
#         file.write(line)
#         if isinstance(item, h5py.Group):
#             save_group_contents(item, indent + 1, file)
#         else:
#             file.write("  " * (indent + 1) + f"Shape: {item.shape}, Type: {item.dtype}\n")
#             if key == 'rh':
#                 rh_98 = item[98]
#                 rh_50 = item[50]
#                 file.write("  " * (indent + 1) + f"RH98 values: {rh_98}\n" + f"RH50 values: {rh_50}\n")

In [4]:
# output_file_path = 'GEDI_outputs/printed_sample_contents.txt'
output_file_path = 'GEDI_outputs/printed_rh_contents.txt'

with open(output_file_path, 'w') as output_file:
    for filename in filenames:
        with h5py.File(filename, 'r') as f:
            print("File: " + filename + "Keys: %s" % f.keys())
            output_file.write("File:" + filename + "\n" + "Keys: %s\n" % f.keys())
            for name in f:
                if isinstance(f[name], h5py.Group):
                    output_file.write(f"Group: {name}\n")
                else:
                    output_file.write(f"Dataset: {name}\n")
                to_save = f[name]
                save_group_contents(to_save, indent=1, file=output_file)

File: GEDI_sample_files/GEDI02_A_2021050140102_O12405_02_T10912_02_003_02_V002.h5Keys: <KeysViewHDF5 ['BEAM0000', 'BEAM0001', 'BEAM0010', 'BEAM0011', 'BEAM0101', 'BEAM0110', 'BEAM1000', 'BEAM1011', 'METADATA']>
File: GEDI_sample_files/GEDI02_A_2021086153349_O12964_03_T08275_02_003_02_V002.h5Keys: <KeysViewHDF5 ['BEAM0000', 'BEAM0001', 'BEAM0010', 'BEAM0011', 'BEAM0101', 'BEAM0110', 'BEAM1000', 'BEAM1011', 'METADATA']>


In [29]:
def h5_to_dict(group):
    """
    Recursively converts an h5 group into a dictionary of Numpy arrays.
    """
    data_dict = {}
    for key in group:
        item = group[key]
        if isinstance(item, h5py.Group):
            data_dict[key] = h5_to_dict(item)  # Recursively handle nested groups
        else:
            data_dict[key] = item[:]  # Convert dataset to numpy array
    return data_dict

def print_dict_summary(data_dict, indent=0, file=None):
    """
    Recursively prints the summary of a dictionary of Numpy arrays to a file.
    """
    for key, value in data_dict.items():
        if isinstance(value, dict):
            file.write("  " * indent + f"Group: {key}\n")
            print_dict_summary(value, indent + 1, file=file)
        else:
            file.write("  " * indent + f"Dataset: {key}, Shape: {value.shape}, Type: {value.dtype}\n")

def verify_specific_datasets(data_dict):
    """
    Access and print specific datasets in the dictionary for verification.
    """
    try:
        data_array = data_dict['group1']['dataset1']
        print(f"\nVerifying 'group1/dataset1':")
        print(f"Shape: {data_array.shape}")
        print(f"Type: {data_array.dtype}")
        print(f"First 10 values: {data_array[:10]}")
        
    except KeyError as e:
        print(f"KeyError: {e} - Ensure the specified path exists in the dictionary.")

In [31]:
data_dicts = []
summary_file_path = 'GEDI_outputs/sample_dict_summaries.txt'

with open(summary_file_path, 'w') as summary_file:
    for filename in filenames:
        # Open the .h5 file in read mode and convert it to a dictionary
        print(f"Converting {filename} to a dictionary...")
        with h5py.File(filename, 'r') as h5_file:
            data_dict = {}
            for group_name in h5_file:
                print(f"  Working on group: {group_name}...")
                group = h5_file[group_name]
                data_dict[group_name] = h5_to_dict(group)
        
            # Write the summary to the file
            summary_file.write(f"Summary of the converted dictionary {filename} :\n")
            print_dict_summary(data_dict, file=summary_file)
            data_dicts.append(data_dict)
            verify_specific_datasets(data_dict)

Converting GEDI_sample_files/GEDI01_B_2022004042652_O17343_04_T10772_02_005_02_V002.h5 to a dictionary...
  Working on group: BEAM0000...
  Working on group: BEAM0001...
  Working on group: BEAM0010...
  Working on group: BEAM0011...
  Working on group: BEAM0101...
  Working on group: BEAM0110...
  Working on group: BEAM1000...
  Working on group: BEAM1011...
  Working on group: METADATA...
KeyError: 'group1' - Ensure the specified path exists in the dictionary.
Converting GEDI_sample_files/GEDI01_B_2022207041426_O20491_04_T09293_02_005_03_V002.h5 to a dictionary...
  Working on group: BEAM0000...
  Working on group: BEAM0001...
  Working on group: BEAM0010...
  Working on group: BEAM0011...
  Working on group: BEAM0101...
  Working on group: BEAM0110...
  Working on group: BEAM1000...
  Working on group: BEAM1011...
  Working on group: METADATA...
KeyError: 'group1' - Ensure the specified path exists in the dictionary.
Converting GEDI_sample_files/GEDI02_A_2021050140102_O12405_02_T109

In [32]:
print(len(data_dicts))

6
