In [1]:
from Dataset.generate_datasets import make_gravitational_waves
from pathlib import Path
from gtda.time_series import SlidingWindow, TakensEmbedding

import numpy as np

# [Library] PyTorch
import torch
import torch.nn as nn
import torch.optim as optim

# [Library] Scikit-learn
from sklearn.model_selection import train_test_split

from gtda.diagrams import PersistenceEntropy, Scaler
from gtda.homology import VietorisRipsPersistence
from gtda.metaestimators import CollectionTransformer
from gtda.pipeline import Pipeline
from gtda.time_series import TakensEmbedding
from sklearn.decomposition import PCA

# Silence the warninigs, don't let the voices ring in my head
import warnings
warnings.filterwarnings("ignore")

In [2]:
def getPipe():
    embedding_dimension = 100
    embedding_time_delay = 10
    stride = 10

    embedder = TakensEmbedding(time_delay=embedding_time_delay,
                            dimension=embedding_dimension,
                            stride=stride)

    batch_pca = CollectionTransformer(PCA(n_components=3))

    persistence = VietorisRipsPersistence(homology_dimensions=[1], n_jobs=-1)

    scaling = Scaler()

    entropy = PersistenceEntropy(normalize=True, nan_fill_value=-10)


    steps = [("embedder", embedder),
            ("pca", batch_pca),
            ("persistence", persistence),
            ("scaling", scaling)]

    return Pipeline(steps)

pipe = getPipe()

def getPipe2(pipe, signal, fixed = 200):
    betti = pipe.fit_transform(signal)
    # Eliminamos el tercer eje porque solo es el tipo de homología y solo queremos la primera dimensión, asi que es redundante
    betti = betti[:, :, 0:2].transpose(0, 2, 1)
    # Ordenamos los datos para que sean consistentes
    def sort_persistence(x):
        def time_life_diagram(x):
            return x[1] - x[0]
        return np.array([sorted(betti[i], key=time_life_diagram) for i in range(len(betti))])
    ordered = sort_persistence(betti)
    ordered = ordered[:, :, 0:fixed]
    return ordered

In [3]:
Rmin = 0.075
Rmax = 0.65
n_signals = 10000
DATA = Path("./Dataset")

noisy_signals, gw_signals, labels = make_gravitational_waves(
    path_to_data=DATA, n_signals=n_signals, r_min=Rmin, r_max=Rmax, n_snr_values=100
)
# Lo volvemos un numpy array
noisy_signals, gw_signals, labels = np.array(noisy_signals), np.array(gw_signals), np.array(labels)
# Standardize the data
noisy_mean,  noisy_std = noisy_signals.mean(), noisy_signals.std()
noisy_signals = (noisy_signals - noisy_mean) / noisy_std
gw_signals = (gw_signals - noisy_mean) / noisy_std
# Standarize the signa

print (noisy_signals.shape, gw_signals.shape, labels.shape)


(10000, 8692) (10000, 8192) (10000,)


In [4]:
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# get the index corresponding to the first pure noise time series
background_idx = np.argmin(labels)
# get the index corresponding to the first noise + gravitational wave time series
signal_idx = np.random.randint(1, len(labels))

ts_noise = noisy_signals[signal_idx]
ts_background = noisy_signals[signal_idx]
ts_signal = gw_signals[signal_idx]

In [5]:
# Split the clean signals
X_train, X_test = train_test_split(gw_signals, test_size=0.2, random_state=42)


In [6]:
# Noiser
class Noiser:

    # Asignamos las señales limpias y las ajustamos al tamaño de la señal con ruido
    def __init__(self, 
            signals,
            fixed_length : int = 8192,
            noise_level : int =0.1,
            decoy_rate : int =0.5,
            normal_values : list = [0, 1]
            ):
        self.signals = signals
        self.fixed_length = fixed_length
        self.noise_level = noise_level
        self.decoy_rate = decoy_rate
        self.normal_values = normal_values
        # Ajustamos las señales

    # Que nos devuelva BATCH_SIZE señales con ruido agregado
    def __call__(self, batch_size):
        # Creamos un donde habra o no señal
        decoy = np.random.choice([0, 1], size=batch_size, p=[1 - self.decoy_rate, self.decoy_rate])
        # Seleccionamos las señales
        signal_idx = np.random.choice(len(self.signals), size=batch_size)
        signals = self.signals[signal_idx]
        # Add the padding
        signals = np.pad(signals, ((0, 0), (0, self.fixed_length - signals.shape[1])))
        # Generamos ruido
        noise = np.random.normal(0, self.noise_level, (batch_size, self.fixed_length))
        # Agregamos ruido
        noise_signal = signals * decoy[:, None] + noise 
        return (signals * decoy[:, None], noise_signal, decoy)

In [7]:
noiser = Noiser(
    X_train,
    fixed_length=noisy_signals.shape[1],
    noise_level=0.8,
    decoy_rate=0.40,
)
# Visualizamos las señales
signals, noisy, decoy = noiser(10)
print (signals.shape, noisy.shape, decoy.shape)

(10, 8692) (10, 8692) (10,)


In [14]:
# Importar Tqdm
from tqdm import tqdm

In [15]:
# Ahora, guardaremos los datos para no tener que volver a generarlos
final_signals = []
final_noisy = []
final_betti = []

batch_size = 100

for i in tqdm(range(0, len(X_train), batch_size)):
    signals, noisy, decoy = noiser(batch_size)
    betti = getPipe2(pipe, noisy)
    final_signals.append(signals)
    final_noisy.append(noisy)
    final_betti.append(betti)

final_signals = np.concatenate(final_signals)
final_noisy = np.concatenate(final_noisy)
final_betti = np.concatenate(final_betti)

  1%|▏         | 1/80 [00:11<14:30, 11.02s/it]


KeyboardInterrupt: 

In [12]:
# print los tamaños
print (final_signals.shape, final_noisy.shape, final_betti.shape)
# Guardamos los datos
np.save("final_signals.npy", final_signals)
np.save("final_noisy.npy", final_noisy)
np.save("final_betti.npy", final_betti)

(8000, 8692) (8000, 8692) (8000, 2, 200)


In [16]:
# Repetir el proceso pero con las señales de prueba
noiser = Noiser(
    X_test,
    fixed_length=noisy_signals.shape[1],
    noise_level=0.8,
    decoy_rate=0.40,
)
# Visualizamos las señales
final_test_signals = []
final_test_noisy = []
final_test_betti = []

batch_size = 100

for i in tqdm(range(0, len(X_test), batch_size)):
    signals, noisy, decoy = noiser(batch_size)
    betti = getPipe2(pipe, noisy)
    final_test_signals.append(signals)
    final_test_noisy.append(noisy)
    final_test_betti.append(betti)

final_test_signals = np.concatenate(final_test_signals)
final_test_noisy = np.concatenate(final_test_noisy)
final_test_betti = np.concatenate(final_test_betti)

# print los tamaños
print (final_test_signals.shape, final_test_noisy.shape, final_test_betti.shape)

# Guardamos los datos
np.save("final_test_signals.npy", final_test_signals)
np.save("final_test_noisy.npy", final_test_noisy)
np.save("final_test_betti.npy", final_test_betti)

100%|██████████| 20/20 [02:44<00:00,  8.21s/it]


(2000, 8692) (2000, 8692) (2000, 2, 200)
