In [44]:
import torch
import numpy as np
from typing import Iterable, Tuple, Callable
import math
from ml4gw.waveforms import SineGaussian
from ml4gw.distributions import Uniform, LogUniform, Cosine
from ml4gw.gw import compute_observed_strain, get_ifo_geometry
from gwpy.timeseries import TimeSeriesDict, TimeSeries

One of the easiest way to improve neural network performance is to provide more high quality data. Memory limitations often reduce the amount of data that can be utilized at training time. Consider the problem of training a neural network to detect sine-Gaussian signals in gravitational wave data. In this problem, memory limitations may reduce the number of waveforms/signals available to inject into data at training time. How can we get remove this constraint? One solution is to generate these signals on GPU during training, so that memory is only utilized as it is needed. 

In [45]:
class ConvNet(torch.nn.Module):
    def __init__(self):
        torch.nn.Conv1d()
        
    def forward(self):
        pass

In [46]:
start = 1262653854
stop = start + 1000
sample_rate = 2048 
kernel_length = 1
ifos = ["H1", "L1"]
channel = "DCS-CALIB_STRAIN_CLEAN_C01"

In [47]:
background = TimeSeriesDict()
for ifo in ifos:
    channel = f"{ifo}:{channel}"
    background[ifo] = TimeSeries.fetch(channel, start, stop)

ModuleNotFoundError: No module named 'nds2'

In [None]:
tensors, vertices = get_ifo_geometry(*ifos)
sine_gaussian = SineGaussian(sample_rate, duration=1)

In [31]:
prior = dict(
    frequency = Uniform(32, 1024),
    quality = Uniform(2, 108),
    hrss = LogUniform(1e-23, 1e-20),
    eccentricity = Uniform(0, 1),
    phase = Uniform(0, np.pi)
)

dec = Cosine()
psi = Uniform()
phi = Uniform(-math.pi, math.pi)
dec, psi, phi = dec(1000), psi(1000), phi(1000)

In [11]:
class ParameterSampler(torch.nn.Module):
    def __init__(self, **parameters: Callable):
        self.params = parameters

    def __call__(self, N: int):
        samples = {}
        for p, dist in self.params.items():
            samples[p] = dist(N)
        return samples

parameter_sampler = ParameterSampler(**prior)

In [33]:
parameters = parameter_sampler(1000)
cross, plus = sine_gaussian(**parameters)
cross, plus = cross.float(), plus.float()
projected = compute_observed_strain(dec, psi, phi, tensors, vertices, sample_rate, cross=cross, plus=plus)

In [None]:
window_size = int(kernel_length * sample_rate)
windows = background[::window_size]
signals = windows + projected
y = torch.concat(torch.zeros(len(windows)), torch.ones(len(signals)))
training = torch.concat(windows, signals)

In [48]:
dataset = torch.utils.data.TensorDataset(training, y)
loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
def train_for_one_epoch(
    dataset: Iterable[Tuple[np.ndarray, np.ndarray]],
    model: torch.nn.Module,
):
    total_loss = 0
    for X, y in dataset:
        preds = model(X)
        loss = torch.nn.functional.binary_cross_entropy(preds, y)
        loss.backward()

    total_loss += loss.item()

    return total_loss


