In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model

from dataload_merlion import Dataset

In [None]:
import os

os.environ["CUDA_VISIBLE_DEVICES"]="1"

# Load Dataset

In [None]:
dataset = Dataset()
nab_dataset = dataset.load_NAB("realTraffic", "../Datasets")
nab_dataset_x = nab_dataset[0][0]
nab_dataset_y = nab_dataset[0][1]

In [None]:
data = nab_dataset_x['value'].to_numpy()
labels = nab_dataset_y['anomaly'].to_numpy()

data = tf.cast(data, dtype=tf.float32)

mean_data = tf.math.reduce_mean(data)
std_data = tf.math.reduce_std(data)
data = (data - mean_data) / std_data
data = tf.cast(data, tf.float32)
data = data.numpy()

train_data,  test_data, train_labels, test_labels = train_test_split(
    data, labels, test_size=0.3, random_state=21
)

# Preprocessing

In [None]:
class Preprocessing:

    @staticmethod
    def create_dataset(data, window_size):
        org_shape = data.shape
        truncated = data.size - (int(data.size/window_size) * window_size)
        data = data[:data.size - truncated]
            
        dataset = np.empty((int(data.size/window_size), window_size))
        j = 0
        for i in [n*window_size for n in range(int(data.size/window_size))]:
            dataset[j] = data[i:i+window_size]
            j = j + 1
            
        print(f"Dataset shape changed {org_shape} -> {dataset.shape}.")
        print(f"Truncated {truncated} values ")
        #print(f"{data[data.size-100:data.size-50]}, {dataset[-2]}")
        #print(f"{data[data.size-50:data.size]}, {dataset[-1]}")
        return dataset
    
    @staticmethod
    def create_overlap_dataset(data, window_size):
        org_shape = data.shape

        dataset = np.empty((data.size-window_size+1, window_size))
        j = 0
        for i in range(data.size-window_size+1):
            dataset[j] = data[i:i+window_size]
            j = j + 1
        print(f"Dataset shape changed {org_shape} -> {dataset.shape}")
        #print(f"{data[data.size-100:data.size-50]}, {dataset[-2]}")
        #print(f"{data[data.size-50:data.size]}, {dataset[-1]}")
        return dataset
    @staticmethod
    def create_labels(labels, window_size):
        new_labels = []
        for i in range(len(labels)-window_size+1):
            if True in labels[i:i+window_size]:
                new_labels.append(True)
            else:
                new_labels.append(False)
                
        return np.array(new_labels)
    
    @staticmethod
    def create_overlap_labels(labels, window_size):
        new_labels = []
        for i in range(len(labels)-window_size+1):
            new_labels.append(labels[i:i+window_size])
                
        return np.array(new_labels)
        

In [None]:
WINDOW_SIZE = 30

train_data = Preprocessing.create_overlap_dataset(train_data, WINDOW_SIZE)
test_data = Preprocessing.create_overlap_dataset(test_data, WINDOW_SIZE)

train_labels = train_labels.astype(bool)
test_labels = test_labels.astype(bool)

cutted_train_labels = Preprocessing.create_labels(train_labels, WINDOW_SIZE)
cutted_test_labels = Preprocessing.create_labels(test_labels, WINDOW_SIZE)

normal_train_data = train_data[cutted_train_labels]
normal_test_data = test_data[cutted_test_labels]

anomalous_test_data = np.concatenate((test_data[~cutted_test_labels], train_data[~cutted_train_labels]))

print(f"normal train data size : {len(normal_train_data)}")
print(f"normal test data size : {len(normal_test_data)}")

#print(f"anomalous train data size : {len(anomalous_train_data)}")
print(f"anomalous test data size : {len(anomalous_test_data)}")

In [None]:
plt.grid()
plt.plot(np.arange(30), normal_train_data[0])
plt.title("Normal")
plt.show()

In [None]:
plt.grid()
plt.plot(np.arange(30), anomalous_test_data[0])
plt.title("Anomalous")
plt.show()

# Anomaly Detector

In [None]:
class AnomalyDetector(Model):
  def __init__(self):
    super(AnomalyDetector, self).__init__()
    self.encoder = tf.keras.Sequential([
      layers.Dense(30, activation="relu"),
      layers.Dense(20, activation="relu"),
      layers.Dense(10, activation="relu"),
      layers.Dense(8, activation="relu")])
    
    self.decoder = tf.keras.Sequential([
      layers.Dense(10, activation="relu"),
      layers.Dense(20, activation="relu"),
      layers.Dense(30, activation="sigmoid")])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded
  
autoencoder = AnomalyDetector()

In [None]:
autoencoder.compile(optimizer='adam', loss='mae')

In [None]:
history = autoencoder.fit(normal_train_data, normal_train_data, 
          epochs=30,
          batch_size=16,
          validation_data=(test_data, test_data),
          shuffle=False)

In [None]:
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()

In [None]:
INDEX=0
encoded_imgs = autoencoder.encoder(normal_test_data).numpy()
decoded_imgs = autoencoder.decoder(encoded_imgs).numpy()

plt.plot(normal_test_data[INDEX], 'b')
plt.plot(decoded_imgs[INDEX], 'r')
plt.fill_between(np.arange(30), decoded_imgs[INDEX], normal_test_data[INDEX], color='lightcoral')
plt.legend(labels=["Input", "Reconstruction", "Error"])
plt.show()
mse = mean_squared_error(decoded_imgs, normal_test_data)
print("Mean Squared Error : ", mse)
mse_list = []
for i in range(len(decoded_imgs)):
    mse_list.append(mean_squared_error(decoded_imgs[i], normal_test_data[i]))
print(max(mse_list))
print(mse_list.index(max(mse_list)))

In [None]:
INDEX=0

encoded_imgs = autoencoder.encoder(anomalous_test_data).numpy()
decoded_imgs = autoencoder.decoder(encoded_imgs).numpy()

plt.plot(anomalous_test_data[INDEX], 'b')
plt.plot(decoded_imgs[INDEX], 'r')
plt.fill_between(np.arange(30), decoded_imgs[INDEX], anomalous_test_data[INDEX], color='lightcoral')
plt.legend(labels=["Input", "Reconstruction", "Error"])
plt.show()
mse = mean_squared_error(decoded_imgs, anomalous_test_data)
print("Mean Squared Error : ", mse)
mse_list = []
for i in range(len(decoded_imgs)):
    mse_list.append(mean_squared_error(decoded_imgs[i], anomalous_test_data[i]))
print(max(mse_list))
print(mse_list.index(max(mse_list)))