# Read and combine all the individual hdf5 data files



In [35]:
import pandas as pd
import numpy as np
import os
import h5py
import matplotlib.pyplot as plt

from io import StringIO


In [36]:
# Directory where your .h5 files are stored
base_path = "../result_tables"

list_of_tables = []
for table_name in os.listdir(base_path):
    if table_name.endswith(".h5"):
        list_of_tables.append(table_name)

for table_name in os.listdir(os.path.join(base_path, "WDMS")):
    if table_name.endswith(".h5"):
        list_of_tables.append('WDMS/'+table_name)


print("List of tables:", list_of_tables)


List of tables: ['Be_sdOB_table.h5', 'ns_table.h5', 'WRs_SMC.h5', 'example_obs_df_full.h5', 'algols.h5', 'bh_table.h5', 'stripped_star_table.h5', 'example_obs_df.h5', 'BSS_data.h5', 'contact.h5', 'WDMS/WD_Binary_Pathways_X.h5', 'WDMS/RebassaMansergas2012.h5', 'WDMS/WD_Binary_Pathways_VI.h5', 'WDMS/Zorotovic2010.h5', 'WDMS/Escorza2019.h5', 'WDMS/Jorissen2019.h5', 'WDMS/Shahaf2024.h5']


In [37]:
triplet_cols = ["RA", "Dec", "Period", "Eccentricity", "M1", "M1_sin3i", "M2", "M2_sin3i", "q", "Mass Function"]

dfs = []

for n, table_dir in enumerate([base_path + '/' + x for x in list_of_tables]):
    print(f"Processing table {n+1}/{len(list_of_tables)}: {table_dir}")

    new_df = pd.DataFrame()

    try: 
        # Open all the hdf5 files
        with h5py.File(table_dir, "r") as f:

            # Load and parse metadata
            metadata_json = f["metadata_json"][()].decode("utf-8")
            metadata_df = pd.read_json(StringIO(metadata_json), orient="records")

            for col in triplet_cols:
                # Load triplet arrays
                loerr = f[col][:, 0]
                vals = f[col][:, 1]
                uperr = f[col][:, 2]
                new_df[col] = vals
                new_df[col + '_uperr'] = loerr
                new_df[col + '_loerr'] = uperr

        new_df = pd.concat([metadata_df, new_df], axis=1)
        new_df['table'] = [list_of_tables[n] for x in range(len(new_df))]
        dfs.append(new_df)
        
    except Exception as e:
        print(f"Error processing {table_dir}: {e}")
        continue
    
    df_final = pd.concat(dfs)

Processing table 1/17: ../result_tables/Be_sdOB_table.h5
Processing table 2/17: ../result_tables/ns_table.h5
Processing table 3/17: ../result_tables/WRs_SMC.h5
Processing table 4/17: ../result_tables/example_obs_df_full.h5
Processing table 5/17: ../result_tables/algols.h5
Processing table 6/17: ../result_tables/bh_table.h5
Processing table 7/17: ../result_tables/stripped_star_table.h5
Processing table 8/17: ../result_tables/example_obs_df.h5
Error processing ../result_tables/example_obs_df.h5: "Unable to synchronously open object (object 'metadata_json' doesn't exist)"
Processing table 9/17: ../result_tables/BSS_data.h5
Error processing ../result_tables/BSS_data.h5: Index (1) out of range for (0-0)
Processing table 10/17: ../result_tables/contact.h5
Error processing ../result_tables/contact.h5: "Unable to synchronously open object (object 'metadata_json' doesn't exist)"
Processing table 11/17: ../result_tables/WDMS/WD_Binary_Pathways_X.h5
Processing table 12/17: ../result_tables/WDMS/R

In [39]:
# Debugging 

# Open all the hdf5 files
with h5py.File('../result_tables/contact.h5', "r") as f:
    print(f.keys() )


with h5py.File( "../result_tables/contact.h5", "r") as f:
    # Load triplet arrays
    eccentricity = f["Eccentricity"][:]
    mass1 = f["M1"][:]

    # Load and parse metadata
    metadata_json = f["metadata_json"][()].decode("utf-8")
    metadata_df = pd.read_json(StringIO(metadata_json), orient="records")

print(metadata_df.keys() )

print(metadata_df['System Name'])

print(eccentricity)

<KeysViewHDF5 ['Dec', 'Eccentricity', 'M1', 'M1_sin3i', 'M2', 'M2_sin3i', 'Period', 'RA', 'q']>


KeyError: "Unable to synchronously open object (object 'metadata_json' doesn't exist)"

# Now plot the data 