In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

In [2]:
m1_min = 20
m2_min = 3
m2_max = 50
p_cut = 0.5

In [3]:
input_filename = "/mnt/ceph/users/sroy1/GWTC4/Selection_Samples_With_Mock_PE.h5"
output_filename = "Trimmed_Selection_File.h5"

In [4]:
with h5py.File(input_filename, 'r') as h5f:

    injections_group = h5f["injections"]
    injections = pd.DataFrame({col: injections_group[col][...] for col in injections_group.keys()})

    selected_rows = []
    pe_group = h5f["injections-pe"]
        
    for i in tqdm(range(1064750, len(injections))):
        m1 = np.array(pe_group[f"Source_Frame_m1{i}"])
        m2 = np.array(pe_group[f"Source_Frame_m2{i}"])

        mask = (m1 > m1_min) & (m2 > m2_min) & (m2 < m2_max)
        frac = mask.sum() / len(m1)

        if frac > p_cut:
            selected_rows.append(i)

    injections = injections.iloc[selected_rows].reset_index(drop=True)

100%|██████████| 5/5 [00:00<00:00, 18.18it/s]


In [5]:
injections

Unnamed: 0,chi_eff,dluminosity_distance_dredshift,estimated_optimal_snr_H,estimated_optimal_snr_L,estimated_optimal_snr_V,estimated_optimal_snr_net,inclination,lnpdraw_inclination,lnpdraw_mass1_source_mass2_source_redshift_spin1x_spin1y_spin1z_spin2x_spin2y_spin2z,luminosity_distance,...,sampling_pdf_qchieff,semianalytic_observed_phase_maximized_snr_net,spin1x,spin1y,spin1z,spin2x,spin2y,spin2z,time_geocenter,weights
0,-0.106903,5180.19191,15.997637,11.243846,0.0,19.553733,1.453865,-0.699999,-19.639716,0.578994,...,4.356458e-07,0.0,0.122519,0.122423,-0.049459,0.283308,0.245032,-0.492017,1389453000.0,1.050774
1,0.898256,6530.136016,11.756732,12.756933,0.0,17.348201,2.529103,-1.246696,-17.857323,2.151536,...,5.600762e-05,0.0,0.08338,0.33248,0.93498,-0.441352,-0.014055,0.847995,1389453000.0,1.050774
2,0.130711,7729.407729,15.275697,16.357701,0.0,22.381271,0.406815,-1.62028,-14.86225,4.634355,...,0.0001163476,0.0,-0.15516,0.377392,0.378242,-0.207637,-0.034061,-0.242113,1389454000.0,1.050774


In [6]:
with h5py.File(input_filename, "r") as infile:
    info_group = infile["info"]
    
    with h5py.File(output_filename, "w") as outfile:
        new_info_group = outfile.create_group("info")
        for name, dataset in info_group.items():
            new_info_group.create_dataset(name, data=dataset[()])

        df_group = outfile.create_group("injections")
        for col in injections.columns:
            df_group.create_dataset(col, data=injections[col].values)