In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import os

from matplotlib import mlab
from pprint import pprint
from tabulate import tabulate
from IPython.display import HTML, display
from scipy.interpolate import interp1d

## Define functions we will need later on

In [3]:
def apply_psd(signal_t, psd, sampling_rate=4096, apply_butter=False):
    """
    Take a signal in the time domain, and a precalculated Power Spectral
    Density, and color the signal according to the given PSD.

    Args:
        signal_t: A signal in time domain (i.e. a 1D numpy array)
        psd: A Power Spectral Density, e.g. calculated from the detector noise.
            Should be a function: psd(frequency)
        sampling_rate: Sampling rate of signal_t
        apply_butter: Whether or not to apply a Butterworth filter to the data.

    Returns: color_signal_t, the colored signal in the time domain.
    """

    # First set some parameters for computing power spectra
    signal_size = len(signal_t)
    delta_t = 1 / sampling_rate

    # Go into Fourier (frequency) space: signal_t -> signal_f
    frequencies = np.fft.rfftfreq(signal_size, delta_t)
    signal_f = np.fft.rfft(signal_t)

    # Divide by the given Power Spectral Density (PSD)
    # This is the 'whitening' = actually adding color
    color_signal_f = signal_f / (np.sqrt(psd(frequencies) / delta_t / 2))

    # Go back into time space: color_signal_f -> color_signal_t
    color_signal_t = np.fft.irfft(color_signal_f, n=signal_size)

    # In case we want to use a Butterworth-filter, here's how to do it:
    if apply_butter:

        # Define cut-off frequencies for the filter
        f_low = 42
        f_high = 800

        # Calculate Butterworth-filter and normalization
        numerator, denominator = butter(4, [f_low*2/4096, f_high*2/4096],
                                        btype="bandpass")
        normalization = np.sqrt((f_high - f_low) / (sampling_rate / 2))

        # Apply filter and normalize
        color_signal_t = filtfilt(numerator, denominator, color_signal_t)
        color_signal_t = color_signal_t / normalization

    return color_signal_t


# -----------------------------------------------------------------------------


def get_psd(real_strain, sampling_rate=4096):
    """
    Take a detector recording and calculate the Power Spectral Density (PSD).

    Args:
        real_strain: The detector recording to be used.
        sampling_rate: The sampling rate (in Hz) of the recording

    Returns:
        psd: The Power Spectral Density of the detector recordings
    """

    # Define some constants
    nfft = 2 * sampling_rate  # Bigger values yield better resolution?

    # Use matplotlib.mlab to calculate the PSD from the real strain
    power_spectrum, frequencies = mlab.psd(real_strain,
                                           NFFT=nfft,
                                           Fs=sampling_rate)

    # Interpolate it linearly, so we can re-sample the spectrum arbitrarily
    psd = interp1d(frequencies, power_spectrum)

    return psd

## Read in the strain files for all events

In [4]:
# Path to the directory where all data is stored
data_path = '../data'

strains = dict()

for event in ['GW150914', 'GW151226', 'GW170104']:
    
    strains[event] = dict()
    
    with h5py.File(os.path.join(data_path, 'strain', '{}_H1_STRAIN_4096.h5'.format(event)), 'r') as file:
        strains[event]['H1'] = np.array(file['strain/Strain'])
    with h5py.File(os.path.join(data_path, 'strain', '{}_L1_STRAIN_4096.h5'.format(event)), 'r') as file:
        strains[event]['L1'] = np.array(file['strain/Strain'])

In [5]:
psds = dict()

for event in ['GW150914', 'GW151226', 'GW170104']:
    
    psds[event] = dict()
    psds[event]['H1'] = get_psd(strains[event]['H1'])
    psds[event]['L1'] = get_psd(strains[event]['L1'])

## Read in and whiten the waveform files

In [7]:
waveforms = dict()
for dist in ['0100_0300', '0250_0500', '0400_0800', '0700_1200', '1000_1700']:
    with h5py.File(os.path.join(data_path, 'waveforms', 'waveforms_3s_{}_testing.h5'.format(dist)), 'r') as file:
        waveforms_ = np.array(file['waveforms'])
        failed_ = np.array(file['failed'])
        waveforms[dist] = [_ for i, _ in enumerate(waveforms_) if i not in failed_]

In [8]:
white_waveforms = dict()
for event in ['GW150914', 'GW151226', 'GW170104']:
    white_waveforms[event] = dict()
    for dist in ['0100_0300', '0250_0500', '0400_0800', '0700_1200', '1000_1700']:
    
        white_waveforms[event][dist] = dict()
        white_waveforms[event][dist]['H1'] = []
        white_waveforms[event][dist]['L1'] = []
    
        for waveform in waveforms[dist]:
            white_waveforms[event][dist]['H1'].append(apply_psd(waveform, psds[event]['H1']))
            white_waveforms[event][dist]['L1'].append(apply_psd(waveform, psds[event]['L1']))

## Calculate the SNRs

In [9]:
stds = {'GW150914': {'H1': 0.98780110486117378, 'L1': 0.98664230166344458},
        'GW151226': {'H1': 0.99035140654295228, 'L1': 0.98776116540338088},
        'GW170104': {'H1': 0.99212202082054124, 'L1': 0.98727435617370363}}

In [14]:
snrs = dict()
for event in ['GW150914', 'GW151226', 'GW170104']:
    snrs[event] = dict()
    for dist in ['0100_0300', '0250_0500', '0400_0800', '0700_1200', '1000_1700']:
        
        snrs[event][dist] = dict()
        snrs[event][dist]['H1'] = []
        snrs[event][dist]['L1'] = []
        
        for i, white_waveform in enumerate(white_waveforms[event][dist]['H1']):
            maximum = np.max(np.abs(white_waveform))
            snrs[event][dist]['H1'].append(maximum / stds[event]['H1'])
            if maximum == 0:
                print(event, dist, i)
        for i, white_waveform in enumerate(white_waveforms[event][dist]['L1']):
            maximum = np.max(np.abs(white_waveform))
            snrs[event][dist]['L1'].append(maximum / stds[event]['L1'])
            if maximum == 0:
                print(event, dist, i)

In [17]:
for event in ['GW150914', 'GW151226', 'GW170104']:
    display(HTML('<h3>SNRs for {}</h3>'.format(event)))
    rows = []
    for dist in ['0100_0300', '0250_0500', '0400_0800', '0700_1200', '1000_1700']:
        median_H1 = '{:.2f}'.format(np.nanmedian(snrs[event][dist]['H1']))
        median_L1 = '{:.2f}'.format(np.nanmedian(snrs[event][dist]['L1']))
        min_H1    = '{:.2f}'.format(np.nanmin(snrs[event][dist]['H1']))
        min_L1    = '{:.2f}'.format(np.nanmin(snrs[event][dist]['L1']))
        max_H1    = '{:.2f}'.format(np.nanmax(snrs[event][dist]['H1']))
        max_L1    = '{:.2f}'.format(np.nanmax(snrs[event][dist]['L1']))
        rows.append([dist, median_H1, min_H1, max_H1, median_L1, min_L1, max_L1])
    display(HTML(tabulate(rows, tablefmt='html', 
                          headers=['Distances', 'Median H1', 'Minimum H1', 'Maximum H1', 'Median L1', 'Minimum L1', 'Maximum L1'])))
    # print(tabulate(rows, tablefmt='latex', 
    #                      headers=['Distances', 'Median H1', 'Minimum H1', 'Maximum H1', 'Median L1', 'Minimum L1', 'Maximum L1']))

Distances,Median H1,Minimum H1,Maximum H1,Median L1,Minimum L1,Maximum L1
0100_0300,6.63,0.61,27.59,6.16,0.6,25.42
0250_0500,3.56,0.32,11.3,3.3,0.32,10.39
0400_0800,2.29,0.19,7.22,2.13,0.2,6.67
0700_1200,1.43,0.13,4.1,1.31,0.14,3.71
1000_1700,1.0,0.09,2.81,0.91,0.1,2.58


Distances,Median H1,Minimum H1,Maximum H1,Median L1,Minimum L1,Maximum L1
0100_0300,6.53,0.59,28.47,6.38,0.59,26.64
0250_0500,3.48,0.31,11.58,3.43,0.32,10.78
0400_0800,2.28,0.21,7.42,2.23,0.2,7.01
0700_1200,1.4,0.13,4.15,1.37,0.14,3.77
1000_1700,0.97,0.1,2.93,0.95,0.09,2.76


Distances,Median H1,Minimum H1,Maximum H1,Median L1,Minimum L1,Maximum L1
0100_0300,6.74,0.62,28.63,7.28,0.66,33.25
0250_0500,3.64,0.34,11.6,3.9,0.35,13.38
0400_0800,2.35,0.22,7.49,2.56,0.22,8.76
0700_1200,1.46,0.14,4.12,1.57,0.15,4.76
1000_1700,1.01,0.1,2.94,1.09,0.11,3.51
