In [1]:
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from matplotlib import pyplot as plt
import tensorflow as tf
import os
import csv

2023-06-30 11:57:10.564885: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-30 11:57:10.657845: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-30 11:57:10.686284: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-06-30 11:57:11.041862: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: li

In [None]:
#specify train and test params
train_task = 0
test_task = 0
#specify the data with a certain samplign frequency
sample_freq = "12k"
#specify location where fault is found
fault_location = "DE"
dataset = "CWRU"
#specify sensor to train on
sensor = "DE"
#set experiment name
experiment = "CWRU_test"
#set path where all folders to run code are
base_path = "/home/lunet/cosoc/Desktop/orion_anomaly_pipeline"

In [None]:
#gets the data
def get_data(mode, fault_type=""):
    if mode == "train":
        task = "normal_" + str(train_task)
        path = os.path.join(base_path, "datasets", dataset, mode, task, sensor) + ".csv"
        data = pd.read_csv(path, index_col="timestamp")
    
    elif mode == "test":
        if fault_type == "normal":
            task = "normal_" + str(train_task)
            path = os.path.join(base_path, "datasets", dataset, mode, task, sensor) + ".csv"
            data = pd.read_csv(path, index_col="timestamp")
        else:
            fault = fault_type + "_" + str(test_task)
            path = os.path.join(base_path, "datasets", dataset, mode, sample_freq, fault_location, fault, sensor) + ".csv"
            data = pd.read_csv(path, index_col="timestamp")
    
    return data
            

In [None]:
train_data = get_data(mode="train")
test_data = get_data(mode="test", fault_type="IR028")

print(train_data.head())

In [None]:
#visualise 1 rotation of normal data usef
fig, ax = plt.subplots()
plt.plot(train_data["value"][0:2000])
plt.show()


In [None]:
#visualise 1 rotation of test data used for testing
fig, ax = plt.subplots()
plt.plot(test_data["value"][0:401])
plt.show()

In [None]:
# Normalize and save the mean and std we get,
# for normalizing test data.
training_mean = train_data.mean()
training_std = train_data.std()
df_training_value = (train_data - training_mean) / training_std
print("Number of training samples:", len(df_training_value))


In [None]:
print(train_data.head())
print(df_training_value)

In [None]:
def create_sequences(values, time_steps=400):
    # Generates a widows of length time_steps of a sequence

    output = [] 
    i = 0
    while i <= len(values) - time_steps:
        output.append(values[i: (i + time_steps)])
        i += 1
    return np.stack(output)


x_train = create_sequences(df_training_value.values)
print("Training input shape: ", x_train.shape)


In [None]:
x_train = x_train.reshape(-1,1,400)
x_train[0].shape

In [None]:
# model = keras.Sequential(
#     [
#         layers.Input(shape=(x_train.shape[1], x_train.shape[2])),
#         layers.Conv1D(
#             filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
#         ),
#         layers.Dropout(rate=0.2),
#         layers.Conv1D(
#             filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
#         ),
#         layers.Conv1DTranspose(
#             filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
#         ),
#         layers.Dropout(rate=0.2),
#         layers.Conv1DTranspose(
#             filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
#         ),
#         layers.Conv1DTranspose(filters=1, kernel_size=7, padding="same"),
#     ]
# )
# model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
# model.summary()


In [None]:
model = keras.Sequential(
    [
        layers.Input(shape=(x_train.shape[1], x_train.shape[2])),

        layers.Dense(128),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        layers.Dense(128),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        layers.Dense(128),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        layers.Dense(128),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        layers.Dense(8),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        layers.Dense(128),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        layers.Dense(128),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        layers.Dense(128),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        layers.Dense(128),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        layers.Dense(x_train.shape[2]),
        
    ]
)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
model.summary()

In [None]:
history = model.fit(
    x_train,
    x_train,
    epochs=10,
    batch_size=128,
    validation_split=0.1,
    callbacks=[
        keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min")
    ],
)


In [None]:
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()
plt.show()

In [None]:
hist = pd.DataFrame(history.history)
print(history.history)

In [None]:
#x_train_pred = model.predict(x_train)

x_train = x_train.reshape(-1,400,1)
x_train_pred = x_train_pred.reshape(-1,400,1)
train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1)

plt.hist(train_mae_loss, bins=50)
plt.xlabel("Train MAE loss")
plt.ylabel("No of samples")
plt.show()

# Get reconstruction loss threshold.
threshold = np.max(train_mae_loss)
print("Reconstruction error threshold: ", threshold)


In [None]:
x_train_pred.shape

In [None]:
# Checking how the first sequence is learnt
plt.plot(x_train[0], color='red')
plt.plot(x_train_pred[0], color='black')
plt.show()


In [None]:
df_test_value = (test_data - training_mean) / training_std
fig, ax = plt.subplots()
df_test_value[0:400].plot(legend=False, ax=ax)
plt.show()

# Create sequences from test values.
x_test = create_sequences(df_test_value.values[0:3000])
print("Test input shape: ", x_test.shape)

# Get test MAE loss.
x_test=x_test.reshape(-1,1,400)
x_test_pred = model.predict(x_test)
print(x_test_pred.shape)
test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
print(np.mean(np.abs(x_test_pred[0]- x_test[0])))
test_mae_loss = test_mae_loss.reshape((-1))
plt.hist(test_mae_loss, bins=50)
plt.xlabel("test MAE loss")
plt.ylabel("No of samples")
plt.show()

x_test = x_test.reshape(-1,400,1)
x_test_pred = x_test_pred.reshape(-1,400,1)
plt.plot(x_test[0], color='red')
plt.plot(x_train_pred[0], color='black')
plt.show()

# Detect all the samples which are anomalies.
anomalies = test_mae_loss > threshold
print("Number of anomaly samples: ", np.sum(anomalies))
print("Indices of anomaly samples: ", np.where(anomalies))
anomalies

In [None]:
# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data_indices = []
for data_idx in range(TIME_STEPS - 1, len(df_test_value) - TIME_STEPS + 1):
    if np.all(anomalies[data_idx - TIME_STEPS + 1 : data_idx + 1]):
        anomalous_data_indices.append(data_idx)


In [None]:
df_subset = test_data.iloc[anomalous_data_indices]
fig, ax = plt.subplots()
test_data.plot(legend=False, ax=ax)
df_subset.plot(legend=False, ax=ax, color="red")
plt.show()


In [None]:
# Checking how the first sequence is learnt
plt.plot(x_test[0])
plt.plot(x_test_pred[0])
plt.show()