# Loading GWOSC data

In [67]:
import numpy as np 
import h5py

Zenodo datasets are here: https://zenodo.org/records/6513631

In [68]:
# Point to your filename here:
filename = "/Users/Woute029/Documents/gw-datasets/GW150914/IGWN-GWTC2p1-v2-GW150914_095045_PEDataRelease_mixed_nocosmo.h5"

# Here is the list with all the keys that were sampled over
posterior_keys_to_fetch = ['chirp_mass', 
                           'mass_ratio', 
                           'a_1', 
                           'a_2', 
                           'tilt_1', 
                           'tilt_2', 
                           'phi_12', 
                           'phi_jl', 
                           'luminosity_distance', 
                           'geocent_time',
                           'theta_jn', 
                           'psi', 
                           'phase', 
                           'azimuth', 
                           'zenith', 
                           'recalib_H1_amplitude_0', 
                           'recalib_H1_amplitude_1', 
                           'recalib_H1_amplitude_2', 
                           'recalib_H1_amplitude_3', 
                           'recalib_H1_amplitude_4', 
                           'recalib_H1_amplitude_5', 
                           'recalib_H1_amplitude_6', 
                           'recalib_H1_amplitude_7',
                           'recalib_H1_amplitude_8', 
                           'recalib_H1_amplitude_9', 
                           'recalib_H1_phase_0',
                           'recalib_H1_phase_1', 
                           'recalib_H1_phase_2', 
                           'recalib_H1_phase_3', 
                           'recalib_H1_phase_4',
                           'recalib_H1_phase_5', 
                           'recalib_H1_phase_6', 
                           'recalib_H1_phase_7', 
                           'recalib_H1_phase_8', 
                           'recalib_H1_phase_9', 
                           'recalib_L1_amplitude_0', 
                           'recalib_L1_amplitude_1', 
                           'recalib_L1_amplitude_2', 
                           'recalib_L1_amplitude_3', 
                           'recalib_L1_amplitude_4', 
                           'recalib_L1_amplitude_5', 
                           'recalib_L1_amplitude_6', 
                           'recalib_L1_amplitude_7', 
                           'recalib_L1_amplitude_8', 
                           'recalib_L1_amplitude_9', 
                           'recalib_L1_phase_0', 
                           'recalib_L1_phase_1', 
                           'recalib_L1_phase_2', 
                           'recalib_L1_phase_3', 
                           'recalib_L1_phase_4', 
                           'recalib_L1_phase_5', 
                           'recalib_L1_phase_6', 
                           'recalib_L1_phase_7', 
                           'recalib_L1_phase_8', 
                           'recalib_L1_phase_9', 
                           'time_jitter', 
]

Now load the HDF5 file and fetch the keys

In [69]:
with h5py.File(filename) as f:
    # There are different posteriors available, since we use different waveform models
    all_waveforms = list(f.keys())
    print(all_waveforms)
    
    # See what is in this file
    data = f["C01:IMRPhenomXPHM"]
    print(data.keys())
    
    # look at log evidence and error
    ln_evidence = f["C01:IMRPhenomXPHM"]["meta_data"]["sampler"]["ln_evidence"][()]
    ln_evidence_error = f["C01:IMRPhenomXPHM"]["meta_data"]["sampler"]["ln_evidence_error"][()]
    
    # If you want to look at specific priors used
    priors_samples = f["C01:IMRPhenomXPHM"]["priors"]["samples"]
    priors_calibration = f["C01:IMRPhenomXPHM"]["priors"]["calibration"]
    
    # Get the full posterior like this: this is a HDF5 dataset now
    posterior = f["C01:IMRPhenomXPHM"]["posterior_samples"]
    # Get the dtype of the dataset
    dtype = posterior.dtype
    # List all field names (keys) of the compound data type
    field_names = dtype.names
    
    ### TO show all the keys available
    # for field_name in field_names:
    #     print(field_name)
    
    log_likelihood = posterior["log_likelihood"][()]
    log_prior = posterior["log_prior"][()]
    
    # Now all the parameters that were sampled over:
    
    posterior_samples = np.array([posterior[key][()] for key in posterior_keys_to_fetch])
    print(np.shape(posterior_samples))

['C01:IMRPhenomXPHM', 'C01:Mixed', 'C01:SEOBNRv4PHM', 'history', 'version']
<KeysViewHDF5 ['approximant', 'calibration_envelope', 'config_file', 'description', 'injection_data', 'meta_data', 'posterior_samples', 'priors', 'psds', 'skymap', 'version']>
(56, 199766)
