In [2]:
import os
from gpuutils import GpuUtils
GpuUtils.allocate(gpu_count=1, framework='keras')

import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True) 

GPU 0  will be allocated


In [3]:
from matplotlib import pyplot as plt
import numpy as np
#import seaborn as sns
import pandas as pd
from tensorflow import keras
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Reshape, GlobalAveragePooling1D, Activation, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D, UpSampling1D
#from keras_flops import get_flops

In [4]:
def normalizing_data(data):
  """
    This function normalize the data using mean and standard
    deviation from noise data
  """
  std = np.std(data)
  mean = np.mean(data)
  normalized_data = (data - mean)/std
  
  return normalized_data, std, mean

In [5]:
def unnormalizing_data(normalized_data, std, mean):
  data = normalized_data*std + mean
  return data

In [13]:
def load_data(all_signals=True):
  """
    This function loads data from ARIANNA group, downloaded localy
    Args:
     all_signals = True means that all the signals are
    used in the test data. If all_signals = False only 20000 signals are used as test data.
    Can be useful if training on signals aswell or just want to test data on small
    test data.
    Returns:
      x_test, y_test, smask_test, signal, noise, std, mean
    
  """
  DATA_URL = '/home/halin/Autoencoder/Data/trimmed100_data_noise_3.6SNR_1ch_0000.npy'#/home/halin/Autoencoder/Data/trimmed100_data_noise_3.6SNR_1ch_0000.npy
  noise = np.load(DATA_URL)

  for i in range(1,10):
    noise = np.vstack((noise,np.load(f'/home/halin/Autoencoder/Data/trimmed100_data_noise_3.6SNR_1ch_000{i}.npy')))

  noise = np.vstack((noise,np.load('/home/halin/Autoencoder/Data/trimmed100_data_noise_3.6SNR_1ch_0010.npy')))
  signal = np.load("/home/halin/Autoencoder/Data/trimmed100_data_signal_3.6SNR_1ch_0000.npy")
  signal = np.vstack((signal,np.load("/home/halin/Autoencoder/Data/trimmed100_data_signal_3.6SNR_1ch_0001.npy")))
  n_classes = 2

  signal, std, mean = normalizing_data(signal)
  noise, std, mean = normalizing_data(noise)

  shuffle = np.arange(noise.shape[0], dtype=np.int)
  np.random.shuffle(shuffle)
  noise = noise[shuffle]
  shuffle = np.arange(signal.shape[0], dtype=np.int)
  np.random.shuffle(shuffle)
  signal = signal[shuffle]

  number_of_test_samples = 0
  if all_signals:
    number_of_test_samples = len(signal)
  else:  
    number_of_test_samples = 20000
    signal = signal[number_of_test_samples:]
    noise = noise[number_of_test_samples:]

  signal_test = signal[:number_of_test_samples]
  noise_test = noise[:number_of_test_samples*2]
  


  x_test = np.vstack((noise_test, signal_test))
  x_test = np.expand_dims(x_test, axis=-1)
  y_test = np.ones(len(x_test))
  y_test[:len(noise_test)] = 0
  shuffle = np.arange(x_test.shape[0])  #, dtype=np.int
  np.random.shuffle(shuffle)
  x_test = x_test[shuffle]
  y_test = y_test[shuffle]
  smask_test = y_test == 1

  return x_test, y_test, smask_test, signal, noise, std, mean

In [14]:
x_test, y_test, smask_test, signal, noise, std, mean = load_data(all_signals=False)

In [8]:
path = '/home/halin/Autoencoder/Models/complement_models/best_model.h5'
model = load_model(path)

In [9]:
def plot_signal(x):
  for item in x:
    fig, ax = plt.subplots(1,1)
    ax.plot(item)
    fig.tight_layout()
    plt.show()

In [12]:
def find_signal(model, treshold, x, smask, under_treshold=True):
  """
    This function steps trough the losses to find data tha are
    below or above a sertain treshold.
    Args:
      model: keras model
      treshold: (float) value to compare
      x: data to test
      smask: where the true signals are
      under_treshold: (bool)
    Returns:
      outliers: the data beyond threshold in an list

  """
  outliers = []
  for i in range(len(x)):
    x_pred = model.predict(np.array([x[i],]))
    test = x[i]
    
    pred_loss = keras.losses.mean_squared_error(x[i], x_pred)
    pred_loss = np.sum(pred_loss)/len(pred_loss)
    if under_treshold:
      if pred_loss < treshold:
        outliers.append(x[i])
        
    else:
      if pred_loss > treshold:
        outliers.append(x[i])  
  return outliers      

   

In [None]:
outliers = find_signal(model, 0.001, x_test, smask_test, True)

In [16]:
print(len(outliers))

23
