In [None]:
https://github.com/PraveenThakkannavar/G2Net-Gravitational-Wave-Detection/blob/main/SIMPLE_CNN.ipynb

In [None]:
train_label_dataset = pd.read_csv("../input/g2net-gravitational-wave-detection/training_labels.csv")
train_label_dataset.head()

In [None]:

train_label_dataset.shape

In [None]:
sns.countplot(data=train_label_dataset, x="target")

In [None]:
train_label_dataset['target'].value_counts()

In [None]:
train_label_dataset.isnull().sum() # no null

In [None]:
train_path = glob('../input/g2net-gravitational-wave-detection/train/*/*/*/*')

In [None]:
len(train_path)

In [None]:
explore_sample_3 = np.load(train_path[3])
explore_sample_3

In [None]:
explore_sample_3.shape

In [None]:
print(len(explore_sample_3[0]), len(explore_sample_3[1]), len(explore_sample_3[2]))

plot

In [None]:
colors = ("red", "green", "blue")
signal_names = ("LIGO Hanford", "LIGO Livingston", "Virgo")

for x, i in tqdm(zip(samples, targets)):
    figure = plt.figure(figsize=(16, 7))
    figure.suptitle(f'Raw wave (target={i})', fontsize=20)
    # range is 3 because we have 3 different rows for each interferometers
    for j in range(3):
        axes = figure.add_subplot(3, 1, j+1)
        librosa.display.waveshow(x[j], sr=2048, ax=axes, color=colors[j])
        axes.set_title(signal_names[j], fontsize=12)
        axes.set_xlabel('Time[sec]')
    plt.tight_layout()
    plt.show()

In [None]:
pd.set_option('display.max_colwidth',None)

In [None]:
ids = []
for files in train_path:
    ids.append(files[files.rindex('/')+1:].replace('.npy',''))
df = pd.DataFrame({"id":ids,"path":train_path})
df = pd.merge(df, train_label_dataset, on='id')

In [None]:
df.head()

In [None]:
def load_random_file(signal = None):
    """Selecting a random file from the training dataset. 
    
    Args:
        signal: bool
            optional flag defining whether to select pure detector 
            noise (False) or detector noise plus simulated signal (True).
            If skipped, the flag is chosen randomly.
    Returns:
        file_id: str
            unique id of the selected file
        target: int
            0 or 1, target value
        data: numpy.ndarray
            numpy array in the shape (3, 4096), where 3 is the number
            of detectors, 4096 is number of data points (each time series
            instance spans over 2 seconds and is sampled at 2048 Hz)
        
    """    
    if signal is None:
        signal = random.choice([True, False])
        
    filtered = train_data["target"]==signal   # filtering dataframe based on the target value
    
    index = random.choice(train_data[filtered].index)   # random index 
    
    file_id = train_data['id'].at[index]
    target = train_data['target'].at[index]
    path = train_data['path'].at[index]
    
    data = np.load(path)
    
    return file_id, target, data

In [None]:
file_id, target, data = load_random_file()
ylim = 1.1*np.max(data)

plt.style.use('ggplot')

fig, axs = plt.subplots(ncols=1, nrows=3, figsize=(10, 5))

for i in range(3):
    ax = axs.ravel()[i]
    ax.plot(data[i])
    ax.margins(0)
    axs[i].set_title(f"Detector {i+1}", loc='center')
    ax.set_ylabel(f"Amplitude")
    ax.set_ylim([-ylim, ylim])
    
axs[0].xaxis.set_visible(False)
axs[1].xaxis.set_visible(False)

axs[2].set_xlabel("Time stamp")
fig.suptitle(f"Raw data visualization. ID: {file_id}. Target: {target}.")
plt.show()

plotting the data in frequency domain

In [None]:
fs = 2048      # sampling rate
NFFT = 4*fs    # the Nyquist frequency 
f_min = 20.
f_max = fs/2

In [None]:
_, target, data = load_random_file(True)

strain1, strain2, strain3 = data[0], data[1], data[2]

Pxx_1, freqs = mlab.psd(strain1, Fs = fs, NFFT = NFFT)
Pxx_2, freqs = mlab.psd(strain2, Fs = fs, NFFT = NFFT)
Pxx_3, freqs = mlab.psd(strain3, Fs = fs, NFFT = NFFT)

psd_1 = interp1d(freqs, Pxx_1)
psd_2 = interp1d(freqs, Pxx_2)
psd_3 = interp1d(freqs, Pxx_3)

fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(10, 5))
ax.loglog(freqs, np.sqrt(Pxx_1),"g",label="Detector 1")
ax.loglog(freqs, np.sqrt(Pxx_2),"r",label="Detector 2")
ax.loglog(freqs, np.sqrt(Pxx_3),"b",label="Detector 3")

ax.set_xlim([f_min, f_max])
ax.set_ylabel("ASD (strain/$\sqrt{Hz}$)")
ax.set_xlabel("Frequency (Hz)")
ax.legend()

plt.show()

constant q-transform

In [None]:
!pip -q install pycbc
import pycbc

In [None]:
def generate_qtransform(data, fs):
    """Function for generating constant Q-transform. 
    
    Args:
        data: numpy.ndarray
            numpy array in the shape (3, 4096), where 3 is the number
            of detectors, 4096 is number of data points (each time series
            instance spans over 2 seconds and is sampled at 2048 Hz)
        fs: int
            sampling frequency
    Returns:
        times: numpy.ndarray
            array of time bins
        freqs: numpy.ndarray
            array of frequency bins
        qplanes: list
            list with 3 elements corresponding to each detector in the raw
            data file. Each element is a 2-d vector of the power in each 
            time-frequency bin
    """    
    
    qplanes = []
    for i in range(len(data)):
        
        # converting data into PyCBC Time Series format
        ts = pycbc.types.TimeSeries(data[i, :], epoch=0, delta_t=1.0/fs)   
        
        # whitening the data within some frequency range
        ts = ts.whiten(0.125, 0.125) 
        
        # calculating CQT values
        times, freqs, qplane = ts.qtransform(.002, logfsteps=100, qrange=(10, 10), frange=(20, 512))

        qplanes.append(qplane)
        
    return times, freqs, qplanes 

In [None]:
def plot_qtransform(file_id, target, data):
    """Plotting constant Q-transform data.
    
    Args:
        file_id: str
            unique id of the selected file
        target: int
            0 or 1, target value
        data: numpy.ndarray
            numpy array in the shape (3, 4096), where 3 is the number
            of detectors, 4096 is number of data points (each time series
            instance spans over 2 seconds and is sampled at 2048 Hz)
    """
    
    times, freqs, qplanes = generate_qtransform(data, fs=fs)
    
    fig, axs = plt.subplots(ncols=1, nrows=3, figsize=(12, 8))

    for i in range(3):

        axs[i].pcolormesh(times, freqs, qplanes[i], shading = 'auto')
        axs[i].set_yscale('log')
        axs[i].set_ylabel('Frequency (Hz)')
        axs[i].set_xlabel('Time (s)')
        axs[i].set_title(f"Detector {i+1}", loc='left')
        axs[i].grid(False)

    axs[0].xaxis.set_visible(False)
    axs[1].xaxis.set_visible(False)

    fig.suptitle(f"Q transform visualization. ID: {file_id}. Target: {target}.", fontsize=16)
    plt.show()

In [None]:
file_id, target, data = load_random_file()
plot_qtransform(file_id, target, data)

In [None]:
file_id = '7945e449f3'
target = 1
data  = np.load(train_data[train_data['id']==file_id]['path'].values[0])

plot_qtransform(file_id, target, data)

https://github.com/JonasHeinzmann-AI/G2Net-Gravitational-Wave-Detection/blob/main/g2net-eda-and-modeling.ipynb

In [None]:
def convert_image_id_2_path(image_id: str, is_train: bool = True) -> str:
    folder = "train" if is_train else "test"
    return "../input/g2net-gravitational-wave-detection/{}/{}/{}/{}/{}.npy".format(
        folder, image_id[0], image_id[1], image_id[2], image_id 
    )

In [None]:
train_df = pd.read_csv("../input/g2net-gravitational-wave-detection/training_labels.csv")
train_df

In [None]:
sns.countplot(data=train_df, x="target")

In [None]:
def visualize_sample(
    _id, 
    target, 
    colors=("black", "red", "green"), 
    signal_names=("LIGO Hanford", "LIGO Livingston", "Virgo")
):
    path = convert_image_id_2_path(_id)
    x = np.load(path)
    plt.figure(figsize=(16, 7))
    for i in range(3):
        plt.subplot(4, 1, i + 1)
        plt.plot(x[i], color=colors[i])
        plt.legend([signal_names[i]], fontsize=12, loc="lower right")
        
        plt.subplot(4, 1, 4)
        plt.plot(x[i], color=colors[i])
    
    plt.subplot(4, 1, 4)
    plt.legend(signal_names, fontsize=12, loc="lower right")

    plt.suptitle(f"id: {_id} target: {target}", fontsize=16)
    plt.show()

In [None]:
for i in random.sample(train_df.index.tolist(), 3):
    _id = train_df.iloc[i]["id"]
    target = train_df.iloc[i]["target"]

    visualize_sample(_id, target)

https://github.com/SiddharthPatel45/gravitational-wave-detection/blob/main/code/gw-detection-analysis.ipynb

In [None]:
# Import libraries
import numpy as np
import pandas as pd

# Plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Import the kaggle API to download data files as required
try:
    import kaggle
except:
    !pip install -q --user kaggle
    import kaggle

In [None]:
# Gravitational wave analysis python library
try:
    import gwpy
except:
    !pip install -q --user gwpy
    import gwpy
from gwpy.timeseries import TimeSeries

In [None]:
# Training IDs
train = pd.read_csv('../data_source/training_labels.csv')

In [None]:
# data source url -- get the npy files from kaggle API
def get_data(id_, is_train=True):
    if is_train:
        file = f'train/{id_[0]}/{id_[1]}/{id_[2]}/{id_}.npy'
    else:
        file = f'test/{id_[0]}/{id_[1]}/{id_[2]}/{id_}.npy'
        
    return np.load(download_unzip(id_, file))

In [None]:
# set of observatories
obs_list = ('LIGO Hanford', 'LIGO Livingston', 'Virgo')

In [None]:
# check the shape of the training data
train.shape

In [None]:
# Target distribution in train data
train['target'].value_counts(normalize=True)

In [None]:

# draw a random sample from the train data
sample_gw_id = train[train['target'] == 1].sample(random_state=42)['id'].values[0]

In [None]:
# Plot the sample data
def plot_sample_data(sample_id):
    # Get the data
    sample = get_data(sample_id)
    
    fig, ax = plt.subplots(3, 1, figsize=(12, 10), sharey=True)
    target = train[train['id'] == sample_id]['target'].values
    plt.suptitle(f"Strain data for three observatories from sample: {sample_id} | Target: {target[0]}")

    for i in range(3):
        sns.lineplot(data=sample[i], ax=ax[i], color=sns.color_palette()[i])
        ax[i].legend([obs_list[i]])
        ax[i].set_xlim(0, 4096)
        ax[i].set_xticks(ticks=[0, 2048, 4096]);
        ax[i].set_xticklabels(labels=[0, 1, 2]);

In [None]:

# plot the sample with gravitational wave signal
plot_sample_data(sample_gw_id)

Spectral density plots

In [None]:
# let's define some signal parameters
sample_rate = 2048 # data is provided at 2048 Hz
signal_length = 2 # each signal lasts 2 s

In [None]:
# function to plot the amplitude spectral density (ASD) plot
def plot_asd(sample_id):
    # Get the data
    sample = get_data(sample_id)
    
    # we convert the data to gwpy's TimeSeries for analysis
    for i in range(sample.shape[0]):
        ts = TimeSeries(sample[i], sample_rate=sample_rate)
        ax = ts.asd(signal_length).plot(figsize=(12, 5)).gca()
        ax.set_xlim(10, 1024);
        ax.set_title(f"ASD plots for sample: {sample_id} from {obs_list[i]}");

In [None]:
# plot ASD for sample w/ GW
plot_asd(sample_gw_id)

In [None]:
# plot ASD for sample w/o GW
plot_asd(sample_no_gw_id)

Typical signal processing workflow
Next, we try to implement the steps from this paper referenced above by following these steps:

Plot the raw signal
Window the signal
Whiten the signal
Bandpass the signal

In [None]:
# Plot the raw signal
sample_gw_ts = TimeSeries(get_data(sample_gw_id)[0], sample_rate=sample_rate)
plot = sample_gw_ts.plot()
ax = plot.gca()
ax.set_xlim(0, 2);

In [None]:
# get a window of lenght of the signal
from scipy.signal import hann
hann_win = hann(sample_rate*signal_length, False)

In [None]:
plt.plot(hann_win);

In [None]:
# plot the windowed signal
sample_gw_ts_win = sample_gw_ts * hann_win
plot = sample_gw_ts_win.plot()
ax = plot.gca()
ax.set_xlim(0, 2);

In [None]:
# plot the whitened signal
plot = sample_gw_ts.whiten().plot()
ax = plot.gca()
ax.set_xlim(0, 2);

In [None]:
# Bandpass the above whitened data and plot
plot = sample_gw_ts.whiten().bandpass(35, 350).plot()
ax = plot.gca()
ax.set_xlim(0, 2);

qt

In [None]:
# function to plot the Q-transform spectrogram
def plot_q_transform(sample_id):
    # Get the data
    sample = get_data(sample_id)
    
    # we convert the data to gwpy's TimeSeries for analysis
    for i in range(sample.shape[0]):
        ts = TimeSeries(sample[i], sample_rate=sample_rate)
        ax = ts.q_transform(whiten=True).plot().gca()
        ax.set_xlabel('')
        ax.set_title(f"Spectrogram plots for sample: {sample_id} from {obs_list[i]}")
        ax.grid(False)
        ax.set_yscale('log');

In [None]:
# plot the Q-transform for sample w/ GW
plot_q_transform(sample_gw_id)

In [None]:
# function to plot the Q-transform spectrogram side-by-side
def plot_q_transform_sbs(sample_gw_id, sample_no_gw_id):
    # Get the data
    sample_gw = get_data(sample_gw_id)
    sample_no_gw = get_data(sample_no_gw_id)
    
    for i in range(len(obs_list)):
        # get the timeseries
        ts_gw = TimeSeries(sample_gw[i], sample_rate=sample_rate)
        ts_no_gw = TimeSeries(sample_no_gw[i], sample_rate=sample_rate)
        
        # get the Q-transform
        image_gw = ts_gw.q_transform(whiten=True)
        image_no_gw = ts_no_gw.q_transform(whiten=True)

        plt.figure(figsize=(20, 10))
        plt.subplot(131)
        plt.imshow(image_gw)
        plt.title(f"id: {sample_gw_id} | Target=1")
        plt.grid(False)

        plt.subplot(132)
        plt.imshow(image_no_gw)
        plt.title(f"id: {sample_no_gw_id} | Target=0")
        plt.grid(False)
        
        plt.show()

In [None]:

# let's plot two spectrograms for sample w/ and w/o GW signal side-by-side
plot_q_transform_sbs(sample_gw_id, sample_no_gw_id)

In [None]:
# let's look at the sample with obvious "chirp"
# id from: https://www.kaggle.com/mistag/data-preprocessing-with-gwpy
plot_q_transform('0021f9dd71')