# Gravitational Wave Dataset Generator
This notebook generates a dataset of spectrograms from synthetic LIGO-like noise with optional signal injection. It uses the IMRPhenomXPHM waveform model and simulates aLIGO Zero Det High Power noise.
- Each sample lasts 2 seconds
- Sampling rate: 2048 Hz
- Half the samples contain signals; the other half are pure noise
- Output: spectrograms saved as `.npy` files

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
from pycbc.waveform import get_td_waveform
from pycbc.noise import noise_from_psd
from pycbc.psd import aLIGOZeroDetHighPower, interpolate
from pycbc.types import TimeSeries
from pycbc.filter import sigma
from pycbc.detector import Detector
from scipy.signal import spectrogram


In [None]:
# Parameters
num_samples = 1000
sample_rate = 2048
duration = 2.0
samples = int(sample_rate * duration)
f_lower = 20
snr_range = (8, 20)
nperseg = 512
noverlap = 256
target_time_bins = 31
output_dir = "results"

np.random.seed(42)
os.makedirs(output_dir, exist_ok=True)

# PSD for noise and SNR calculations
psd = aLIGOZeroDetHighPower(length=samples//2 + 1,
                            delta_f=1.0 / duration,
                            low_freq_cutoff=f_lower)


In [None]:
def pad_or_crop(ts: TimeSeries, target_length: int) -> TimeSeries:
    arr = ts.numpy()
    if len(arr) < target_length:
        arr = np.pad(arr, (0, target_length - len(arr)))
    else:
        arr = arr[:target_length]
    return TimeSeries(arr, delta_t=ts.delta_t, epoch=ts.start_time)


In [None]:
x = []
y = []
meta = []
labels = np.random.permutation([1] * (num_samples // 2) + [0] * (num_samples - num_samples // 2))

for i in tqdm(range(num_samples)):
    is_signal = labels[i]
    noise = noise_from_psd(length=samples, delta_t=1/sample_rate, psd=psd, seed=i)

    if is_signal:
        mass1 = np.random.uniform(5, 80)
        mass2 = np.random.uniform(5, 80)
        spin1z = np.random.uniform(-0.9, 0.9)
        spin2z = np.random.uniform(-0.9, 0.9)
        inclination = np.random.uniform(0, np.pi)
        distance = np.random.uniform(300, 800)
        snr_target = np.random.uniform(*snr_range)

        hp, hc = get_td_waveform(
            approximant="IMRPhenomXPHM",
            mass1=mass1, mass2=mass2,
            spin1z=spin1z, spin2z=spin2z,
            distance=distance,
            inclination=inclination,
            delta_t=1.0 / sample_rate,
            f_lower=30
        )

        hp = pad_or_crop(hp, samples)
        detector = Detector("H1")
        fp, fc = detector.antenna_pattern(0, 0, 0, 1000)
        signal = pad_or_crop(fp * hp + fc * hc, samples)

        psd_est = interpolate(noise.psd(4), delta_f=signal.delta_f)
        sig_snr = sigma(signal, psd=psd_est, low_frequency_cutoff=f_lower)
        scale = snr_target / sig_snr
        strain = noise + signal * scale

    else:
        strain = noise
        mass1 = mass2 = snr_target = None

    # Whitening
    strain = strain.whiten(4, 2)

    # Spectrogram
    f, t, Sxx = spectrogram(strain.numpy(), fs=sample_rate, nperseg=nperseg, noverlap=noverlap)
    Sxx_log = 10 * np.log10(Sxx + 1e-10)
    Sxx_log = (Sxx_log - Sxx_log.mean()) / (Sxx_log.std() + 1e-6)

    # Pad/crop
    if Sxx_log.shape[1] < target_time_bins:
        pad = target_time_bins - Sxx_log.shape[1]
        Sxx_log = np.pad(Sxx_log, ((0, 0), (0, pad)), mode='constant')
    else:
        Sxx_log = Sxx_log[:, :target_time_bins]

    x.append(Sxx_log[..., np.newaxis])
    y.append(is_signal)
    meta.append({
        "index": i, "label": is_signal,
        "mass1": mass1, "mass2": mass2,
        "snr": snr_target
    })


In [None]:
x = np.array(x, dtype=np.float32)
y = np.array(y, dtype=np.int32)

np.save(os.path.join(output_dir, "x.npy"), x)
np.save(os.path.join(output_dir, "y.npy"), y)

import pandas as pd
pd.DataFrame(meta).to_csv(os.path.join(output_dir, "meta.csv"), index=False)

print("âœ… Saved:")
print("  X shape:", x.shape)
print("  y shape:", y.shape)
