In [1]:
import pandas as pd
import h5py
import numpy as np

merged_data = pd.read_csv('/home/arutkeerthi/Downloads/Glitchveto/GVS_files/merged_data.csv')
hdf5_files = pd.read_csv('/home/arutkeerthi/Downloads/Glitchveto/GVS_files/GVS_hdf5files_updated.csv')

# Initialize a counter for naming the output files
counter = 1

# Loop through every row of merged_data
for index, row in merged_data.iterrows():
    file_name = row['file']
    start = row['start']
    end = row['end']

    # Check if file_name contains []
    if '[' in file_name and ']' in file_name:
        contents = file_name[file_name.find('[') + 1:file_name.find(']')]  # Extract content between []
        numbers = [int(num.strip()) for num in contents.split(',')]  # Split by comma and convert to integers

        # Determine the number of elements inside []
        if len(numbers) == 1:
            number = numbers[0]
            # Find the row in hdf5_files where number matches
            matching_row = hdf5_files.loc[hdf5_files['number'] == number]
            path = matching_row['path'].iloc[0]
            with h5py.File(path, 'r') as f:
                strain_data = f['strain/Strain']
                Xstart = f['strain/Strain'].attrs['Xstart']

                # Calculate the time indices in terms of dataset indices
                start_index = int((start - Xstart) / f['strain/Strain'].attrs['Xspacing'])
                end_index = int((end - Xstart) / f['strain/Strain'].attrs['Xspacing'])

                # Slice the data to extract only the relevant portion
                relevant_data = strain_data[start_index:end_index]

                # Define the output HDF5 file name
                output_file = f'/home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch{counter}.hdf5'

                # Create a new HDF5 file to save the relevant data and attributes
                with h5py.File(output_file, 'w') as new_f:
                    dset = new_f.create_dataset('strain/Strain', data=relevant_data)

                    # Copy original attributes and add new ones
                    for attr_name, attr_value in f['strain/Strain'].attrs.items():
                        dset.attrs[attr_name] = attr_value
                    dset.attrs['Xstart'] = start
                    dset.attrs['end'] = end
                    dset.attrs['Npoints']=int((end-start)/f['strain/Strain'].attrs['Xspacing']) 
                    dset.attrs['t_0'] = row['t_0']
                    dset.attrs['t_1'] = row['t_1']
                    dset.attrs['snr'] = row['snr']
                    dset.attrs['toa'] = row['toa']

                    # Optionally, copy other groups/attributes if needed
                    # Example: new_f.copy(f['meta'], 'meta')

                print(f"Saved relevant data to {output_file}")

                # Increment the counter for the next file
                counter += 1
                
            
        
    elif len(numbers) == 2:
        number1=numbers[0]
        number2=numbers[1]
        matching_row1 = hdf5_files.loc[hdf5_files['number'] == number1]
        matching_row2=hdf5_files.loc[hdf5_files['number']==number2]
        path = matching_row1['path'].iloc[0]
        with h5py.File(path, 'r') as f:
            strain_data = f['strain/Strain']
            Xstart = f['strain/Strain'].attrs['Xstart']
            start_index=int((start - Xstart) / f['strain/Strain'].attrs['Xspacing'])
            end_index=int((Xstart))
            relevantdata_1=strain_data[start_index:end_index]
        
        path=matching_row2['path'].iloc[0]
        with h5py.File(path, 'r') as f:
            strain_data = f['strain/Strain']
            Xstart = f['strain/Strain'].attrs['Xstart']
            start_index=int((Xstart))
            end_index = int((end - Xstart) / f['strain/Strain'].attrs['Xspacing'])
            relevantdata_2=strain_data[start_index:end_index]
        
        relevant_data=np.concatenate((relevantdata_1,relevant_data2))
        # Define the output HDF5 file name
        output_file = f'/home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch{counter}.hdf5'

        # Create a new HDF5 file to save the relevant data and attributes
        with h5py.File(output_file, 'w') as new_f:
            dset = new_f.create_dataset('strain/Strain', data=relevant_data)

            # Copy original attributes and add new ones
            for attr_name, attr_value in f['strain/Strain'].attrs.items():
                dset.attrs[attr_name] = attr_value
                dset.attrs['Xstart'] = start
                dset.attrs['end'] = end
                dset.attrs['Npoints']=int((end-start)/f['strain/Strain'].attrs['Xspacing']) 
                dset.attrs['t_0'] = row['t_0']
                dset.attrs['t_1'] = row['t_1']
                dset.attrs['snr'] = row['snr']
                dset.attrs['toa'] = row['toa']

                # Optionally, copy other groups/attributes if needed
                # Example: new_f.copy(f['meta'], 'meta')

                print(f"Saved relevant data to {output_file}")
        
        # Increment the counter for the next file
        counter += 1

        
        # Do something else if it's two numbers
        print(f"Processing for two numbers: {numbers[0]} and {numbers[1]}")
        # Replace with your specific processing logic

Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch1.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch2.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch3.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch4.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch5.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch6.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch7.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch8.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch9.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch10.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch11.hdf5
Saved relevant data

In [2]:
import pandas as pd
import h5py
import numpy as np

merged_data = pd.read_csv('/home/arutkeerthi/Downloads/Glitchveto/GVS_files/merged_data.csv')
hdf5_files = pd.read_csv('/home/arutkeerthi/Downloads/Glitchveto/GVS_files/GVS_hdf5files_updated.csv')

# Initialize a counter for naming the output files
counter = 1

# Loop through every row of merged_data
for index, row in merged_data.iterrows():
    file_name = row['file']
    start = row['start']
    end = row['end']

    # Check if file_name contains []
    if '[' in file_name and ']' in file_name:
        contents = file_name[file_name.find('[') + 1:file_name.find(']')]  # Extract content between []
        numbers = [int(num.strip()) for num in contents.split(',')]  # Split by comma and convert to integers

        # Determine the number of elements inside []
        if len(numbers) == 1:
            number = numbers[0]
            # Find the row in hdf5_files where number matches
            matching_row = hdf5_files.loc[hdf5_files['number'] == number]
            if not matching_row.empty:
                path = matching_row['path'].iloc[0]
                with h5py.File(path, 'r') as f:
                    strain_data = f['strain/Strain']
                    Xstart = f['strain/Strain'].attrs['Xstart']

                    # Calculate the time indices in terms of dataset indices
                    start_index = int((start - Xstart) / f['strain/Strain'].attrs['Xspacing'])
                    end_index = int((end - Xstart) / f['strain/Strain'].attrs['Xspacing'])

                    # Slice the data to extract only the relevant portion
                    relevant_data = strain_data[start_index:end_index]

                    # Define the output HDF5 file name
                    output_file = f'/home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch{counter}.hdf5'

                    # Create a new HDF5 file to save the relevant data and attributes
                    with h5py.File(output_file, 'w') as new_f:
                        dset = new_f.create_dataset('strain/Strain', data=relevant_data)

                        # Copy original attributes and add new ones
                        for attr_name, attr_value in f['strain/Strain'].attrs.items():
                            dset.attrs[attr_name] = attr_value
                        dset.attrs['Xstart'] = start
                        dset.attrs['end'] = end
                        dset.attrs['Npoints'] = int((end - start) / f['strain/Strain'].attrs['Xspacing']) 
                        dset.attrs['t_0'] = row['t_0']
                        dset.attrs['t_1'] = row['t_1']
                        dset.attrs['snr'] = row['snr']
                        dset.attrs['toa'] = row['toa']

                    print(f"Saved relevant data to {output_file}")

                    # Increment the counter for the next file
                    counter += 1

        elif len(numbers) == 2:
            print(f"Processing for two numbers: {numbers[0]} and {numbers[1]}")
            number1 = numbers[0]
            number2 = numbers[1]
            matching_row1 = hdf5_files.loc[hdf5_files['number'] == number1]
            matching_row2 = hdf5_files.loc[hdf5_files['number'] == number2]

            if not matching_row1.empty and not matching_row2.empty:
                path1 = matching_row1['path'].iloc[0]
                path2 = matching_row2['path'].iloc[0]

                # Store the attributes and data from the first file
                with h5py.File(path1, 'r') as f1:
                    strain_data1 = f1['strain/Strain']
                    Xstart1 = f1['strain/Strain'].attrs['Xstart']
                    Npoints1 = f1['strain/Strain'].attrs['Npoints']
                    Xspacing = f1['strain/Strain'].attrs['Xspacing']

                    # Calculate the indices
                    start_index1 = int((start - Xstart1) / Xspacing)
                    end_index1 = Npoints1

                    # Extract the relevant data
                    data1 = strain_data1[start_index1:end_index1]

                    # Store attributes from the first file
                    attrs1 = {attr_name: attr_value for attr_name, attr_value in f1['strain/Strain'].attrs.items()}

                # Store the attributes and data from the second file
                with h5py.File(path2, 'r') as f2:
                    strain_data2 = f2['strain/Strain']
                    Xstart2 = f2['strain/Strain'].attrs['Xstart']

                    # Calculate the indices
                    start_index2 = 0
                    end_index2 = int((end - Xstart2) / Xspacing)

                    # Extract the relevant data
                    data2 = strain_data2[start_index2:end_index2]

                    # Store attributes from the second file
                    attrs2 = {attr_name: attr_value for attr_name, attr_value in f2['strain/Strain'].attrs.items()}

                # Combine the data from both files
                relevant_data = np.concatenate((data1, data2))

                # Define the output HDF5 file name
                output_file = f'/home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch{counter}.hdf5'

                # Create a new HDF5 file to save the combined data and attributes
                with h5py.File(output_file, 'w') as new_f:
                    dset = new_f.create_dataset('strain/Strain', data=relevant_data)

                    # Copy original attributes and add new ones
                    for attr_name, attr_value in attrs1.items():
                        dset.attrs[attr_name] = attr_value
                    dset.attrs['Xstart'] = start
                    dset.attrs['end'] = end
                    dset.attrs['Npoints'] = int((end - start) / Xspacing) 
                    dset.attrs['t_0'] = row['t_0']
                    dset.attrs['t_1'] = row['t_1']
                    dset.attrs['snr'] = row['snr']
                    dset.attrs['toa'] = row['toa']

                print(f"Saved combined data to {output_file}")

                # Increment the counter for the next file
                counter += 1


Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch1.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch2.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch3.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch4.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch5.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch6.hdf5
Processing for two numbers: 1 and 2
Saved combined data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch7.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch8.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch9.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_data/Glitch10.hdf5
Saved relevant data to /home/arutkeerthi/Downloads/Glitchveto/Glitches_da