In [2]:
import numpy as np
import h5py
import json
import pandas as pd
import os

In [41]:
all_waveforms = []
all_configs = []

for filename in ['waveforms_3s_0100_0300.h5', 'waveforms_3s_0250_0500.h5',
                 'waveforms_3s_0400_0800.h5', 'waveforms_3s_0700_1200.h5']:
    
    waveforms_path = os.path.join('../data', 'waveforms', filename)

    # Read in the actual waveforms, the config string (and parse from JSON),
    # and the indices of the failed waveforms
    with h5py.File(waveforms_path, 'r') as file:
        waveforms = np.array(file['waveforms'])
        config = json.loads(file['config'].value.astype('str'))['injections']
        failed_idx = np.array(file['failed'])
        print(len(failed_idx))

    # Create a Pandas DataFrame containing only the relevant columns from the
    # config string (other columns are all trivial at this point)
    dataframe = pd.DataFrame(config)

    # Add columns for the actual waveforms
    dataframe['waveform'] = list(waveforms)

    # Drop the rows with the failed waveforms, and reset the index
    dataframe = dataframe.drop(list(failed_idx)).reset_index(drop=True)

    # Resort columns to order them alphabetically
    dataframe = dataframe[sorted(dataframe.columns)]

    waveforms = list(dataframe['waveform'])
    config = dataframe.loc[:, dataframe.columns != 'waveform']
    
    config_json = config.to_dict(orient='records')

    all_waveforms += waveforms
    all_configs += config_json

2
5
4
6


In [42]:
new_waveforms_path = os.path.join('../data', 'waveforms', 'waveforms_3s_0100_1200.h5')

# Read in the actual waveforms, the config string (and parse from JSON),
# and the indices of the failed waveforms
with h5py.File(new_waveforms_path, 'w') as file:
    file['waveforms'] = np.array(all_waveforms)
    file['config'] = np.string_(json.dumps({'injections': all_configs}))
    file['failed'] = np.array([])