In [5]:
from scipy.stats import beta, bernoulli

def beta_drop(inputs, a, b):
    
    x = tf.convert_to_tensor(inputs, name="x")
    x_dtype = x.dtype
    
    is_a_number = isinstance(a, numbers.Real)
    is_b_number = isinstance(b, numbers.Real)
    
    if not tf.is_tensor(a) or not tf.is_tensor(b):
        if is_a_number and is_b_number:
            keep_prob = 1 - a/(a+b)
            scale = 1 / keep_prob
            scale = tf.convert_to_tensor(scale, dtype=x_dtype)
            ret = tf.math.multiply(x, scale)
        else:
            raise ValueError(
                f"`a` and 'b' must be a scalar or scalar tensor. Received: a={a}, b={b}")
    else:
        a.get_shape().assert_has_rank(0)
        b.get_shape().assert_has_rank(0)
        
        a_dtype = a.dtype
        b_dtype = b.dtype

        if a_dtype != x_dtype or b_dtype != x_dtype:
            if not a_dtype.is_compatible_with(x_dtype) or not b_dtype.is_compatible_with(x_dtype):
                raise ValueError(
                  "`x.dtype` must be compatible with `a.dtype` and `b.dtype`. "
                  f"Received: x.dtype={x_dtype} and a.dtype={a_dtype}, b.dtype={b_dtype}")
            a = tf.cast(a, x_dtype, name="a")
            b = tf.cast(b, x_dtype, name="b")
        one_tensor = tf.constant(1, dtype=x_dtype)
        ret = tf.realdiv(x, tf.math.subtract(one_tensor, a/(a+b)))    
    
    size = x.shape[1:]    
    
    random_tensor = beta.rvs(a,b, size=size)
    
    random_tensor = tf.convert_to_tensor(random_tensor, dtype=x_dtype )
    
    ret = tf.math.multiply(ret, random_tensor)
    
    return ret
    

In [2]:
#capa soft-dropout
import numbers

import tensorflow.compat.v2 as tf
import torch

from keras import backend
from keras.engine import base_layer
from keras.utils import control_flow_util

class Soft_Dropout(base_layer.BaseRandomLayer):
    """Applies Dropout to the input.
    Args:
      a: Float higher than 0.
      b: Float higher than 0.
      noise_shape: 1D integer tensor representing the shape of the
        binary dropout mask that will be multiplied with the input.
        For instance, if your inputs have shape
        `(batch_size, timesteps, features)` and
        you want the dropout mask to be the same for all timesteps,
        you can use `noise_shape=(batch_size, 1, features)`.
      seed: A Python integer to use as random seed.
    Call arguments:
      inputs: Input tensor (of any rank).
      training: Python boolean indicating whether the layer should behave in
        training mode (adding dropout) or in inference mode (doing nothing).
    """

    def __init__(self, a, b, noise_shape=None, seed=None, **kwargs):
        super().__init__(seed=seed, **kwargs)
        if isinstance(a, (int, float)) and not 0 < a : #cambiar para condiciones de a y b > 0
            raise ValueError(
                f"Invalid value {a} received for "
                "`a`, expected a value higher than 0."
            )
        if isinstance(b, (int, float)) and not 0 < b : #cambiar para condiciones de a y b > 0
            raise ValueError(
                f"Invalid value {b} received for "
                "`b`, expected a value higher than 0."
            )
        self.a = a
        self.b = b
        self.scale = a/(a+b)
        self.noise_shape = noise_shape
        self.seed = seed
        self.supports_masking = True

    def _get_noise_shape(self, inputs):
        # Subclasses of `Dropout` may implement `_get_noise_shape(self,
        # inputs)`, which will override `self.noise_shape`, and allows for
        # custom noise shapes with dynamically sized inputs.
        if self.noise_shape is None:
            return None

        concrete_inputs_shape = tf.shape(inputs)
        noise_shape = []
        for i, value in enumerate(self.noise_shape):
            noise_shape.append(
                concrete_inputs_shape[i] if value is None else value
            )
        return tf.convert_to_tensor(noise_shape)

    def call(self, inputs, training=None):
        if training is None:
            training = backend.learning_phase()
        
        def dropped_inputs():
            
            return beta_drop(
            inputs, self.a, self.b
            )
            
        output = control_flow_util.smart_cond(
            training, dropped_inputs,lambda: tf.identity(inputs)#lambda: tf.multiply(inputs, self.scale) # 
        )
        return output
    
    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self):
        config = {
            "a":self.a,
            "b":self.b,
            "scale":self.scale,
            "noise_shape": self.noise_shape,
            "seed": self.seed,
        }
        base_config = super().get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [3]:
from keras.callbacks import Callback
from keras.callbacks import EarlyStopping

class EarlyStoppingTresh(tf.keras.callbacks.Callback):
    """
    EarlyStoppingTresh Callback para detener la ejecucion cuando el valor monitor llegue al threshold
    
    :param monitor: metrica a monitorear
    :param threshold: humbral, si se supera se detiene la ejecucion
    
    """
    
    
    def __init__(self, monitor='loss',threshold=0.1):
        super(EarlyStoppingTresh, self).__init__()
        self.threshold = threshold
        self.monitor = monitor
    def on_epoch_end(self, epoch, logs=None):
        
        metric = logs[self.monitor]
        if metric <= self.threshold:
            print('\nEpoch %d: Reached threshold, terminating training' % (epoch))
            self.model.stop_training = True

In [2]:
import glob
import pandas as pd
import numpy as np 
from keras.utils import to_categorical

from sklearn.model_selection import train_test_split

def load_data(directory, dist, sep=' ', multclass=False):
    """
    load_data Carga toda la data de una distancia en el directorio especificado

    :param directory: directorio donde se encuentran los archivos
    :param dist: distancia de las simulaciones ['0.1', '3.23', '6.87', '10']
    :param sep: separador que usan los archivos [' ', ',']
    :param multclass: define si los labels son categoricos o binarios
    :return: dos variables, x la data : y los labels de cada dato
    """ 
    
    signal = []
    #files = glob.glob(directory+'/s11.2--LS220_'+dist+'kpc_sim*.txt')
    
    files = glob.glob(directory+'/s11.2--LS220_'+dist+'kpc_sim*.txt')
    
    for file in files:
        # Use the pandas.read_csv() function to read the contents of the file
        data = pd.read_csv(file, sep=sep, header=None)
        data = data.values.T
        #print(data.shape)
        # Add the contents of the file to the numpy array
        signal.append(data)
    
    
    noise = []
    files = glob.glob(directory+'/aLIGO_noise_2sec_sim*.txt')
    for file in files:
        # Use the pandas.read_csv() function to read the contents of the file
        data = pd.read_csv(file, sep=sep, header=None)
        #print(data.shape)
        data = data.values.T
        
        # Add the contents of the file to the numpy array
        noise.append(data)
        
    if len(noise) != len(signal):
        if len(noise) > len(signal):
            noise, n = train_test_split(noise,train_size=len(signal) )
        else:
            signal, n = train_test_split(signal,train_size=len(noise) )
    
    y1 = np.zeros(len(noise))
    y2 = np.ones(len(signal))
    
    x = np.vstack((signal,noise))
    
    y = np.hstack((y1,y2))
    
    if multclass:
        y = to_categorical(y, dtype ="uint8")
    
    if x.dtype == 'O':
        x = x.astype(complex)
        x = abs(x)
    
    return x, y

In [None]:
#normalizacion
import numpy as np 

def norm_data(x):
    """
    norm_data Normaliza la data de x
    
    :return: x normalizado
    """
    
    for i in range(0,len(x)):
        dfmax, dfmin = np.max(x[i]), np.min(x[i])
        x[i] = (x[i] - dfmin)/(dfmax - dfmin)
    return x

In [1]:
#graficar las metricas

import matplotlib.pyplot as plt

def graph_metrics(history, name, text = ''):
    """
    graph_metrics Grafica las metricas: loss, val_loss, accuracy y val_accuracy
    """
    
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    epochs = range(1,len(loss) + 1 )
    plt.plot(epochs, loss, 'y', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    
    plt.plot(epochs, acc, 'b', label='Acc')
    plt.plot(epochs, val_acc, 'purple', label='val_acc')
    
    plt.title(name)
    plt.xlabel('Epochs \n ' + text)
    plt.ylabel('Loss')
    plt.legend()
    
    
    plt.show()
    


In [10]:
import os, sys
from sklearn.utils import shuffle
import glob
import pandas as pd
import numpy as np 
from keras.utils import to_categorical

from sklearn.model_selection import train_test_split

def name_store(directory= 'data/timeFrecuency'):
    
    dist = ['0.1','5.05','10']
    
    for i in range( len(dist) ):
        signal_names = glob.glob(directory+'/s11.2--LS220_'+dist[i]+'kpc_sim*.txt')
        
        noise_names = glob.glob(directory+'/aLIGO_noise_2sec_sim*.txt')

        if len(noise_names) != len(signal_names):
            if len(noise_names) > len(signal_names):
                noise_names, n = train_test_split(noise_names,train_size=len(signal_names) )
            else:
                signal_names, n = train_test_split(signal_names,train_size=len(noise_names) )
        
        y1 = np.zeros(len(noise_names))
        y2 = np.ones(len(signal_names))
    
        x_names = signal_names + noise_names
    
        y = np.hstack((y1,y2))
    
        y = to_categorical(y, dtype ="uint8")
            
        x_shuffled, y_shuffled = shuffle(x_names, y)
            
        outdir = directory + '_names_' + dist[i]
        os.makedirs(outdir, exist_ok=True)
        
        X_train_filenames, X_val_filenames, y_train, y_val = train_test_split(
            x_shuffled, y_shuffled, test_size=0.3, random_state=1)
        
        
        name_x_t = outdir + '/train_list_dist_' + dist[i] + '.npy'
        np.save(name_x_t, X_train_filenames)
        
        name_x_v = outdir + '/val_list_dist_' + dist[i] + '.npy'
        np.save(name_x_v, X_val_filenames)
        
        name_y_t = outdir + '/train_labels_dist_' + dist[i] + '.npy'
        np.save(name_y_t, y_train)
        
        name_y_v = outdir + '/val_labels_dist_' + dist[i] + '.npy'
        np.save(name_y_v, y_val)
        
        #separar train y test
        #https://medium.com/@mrgarg.rajat/training-on-large-datasets-that-dont-fit-in-memory-in-keras-60a974785d71
        

In [12]:
name_store('D:/benja/Ondas/Code/data/timeFrecuency')


In [81]:
import keras

class My_Custom_Generator(keras.utils.Sequence) :
  
  def __init__(self, image_filenames, labels, batch_size) :
    self.image_filenames = image_filenames
    self.labels = labels
    self.batch_size = batch_size
    
    
  def __len__(self) :
    return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
  
  
  def __getitem__(self, idx) :
    batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
    batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
    
    return np.array( [ np.loadtxt(str(file_name), delimiter=',' ).T for file_name in batch_x]), np.array(batch_y)