#### Anomaly Detection - Imports and Setup

In [None]:
%reload_ext autoreload
%autoreload 2
import tensorflow
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential, Model
from keras.layers import Lambda, Dropout, SimpleRNN, Dense, LSTM, RepeatVector, Input, TimeDistributed, concatenate
from keras import regularizers
from keras.utils import plot_model

import IPython, IPython.display, os, datetime
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

mpl.rcParams['figure.figsize'] = (14, 4)
mpl.rcParams['axes.grid'] = True

print(f"Tensorflow Version {tf.__version__}, Keras Vesion: {keras.__version__}")

In [None]:
import ts_utils
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pickle

# STEP 1: = >Lets just read the first few columns for testing
df = pd.read_csv("../data/processminer-rare-event-mts.csv.zip", sep=';', usecols=range(59))
split = int (.8 * len(df) )
df_scaled_trn = df [df.columns[2:] ][0:split ]
df_scaled_tst = df [df.columns[2:] ][split: ]

scaler = MinMaxScaler()
df_scaled_trn = pd.DataFrame(scaler.fit_transform(df_scaled_trn), columns=df_scaled_trn.columns)
df_scaled_tst = pd.DataFrame(scaler.transform(df_scaled_tst), columns=df_scaled_tst.columns)


#  STEP 2: => Create window

input_slice  = slice(0, len(df_scaled_trn.columns) )
label_slice  = input_slice
window_len   = 5
ouput_len    = 1
batch_size   = 128

inp_feat_len   = input_slice.stop - (input_slice.start or 0)
ouput_feat_len = label_slice.stop - (label_slice.start or 0)

ds_trn     = tf.data.Dataset.from_tensor_slices(df_scaled_trn[df_scaled_trn.columns[input_slice]])
ds_tst     = tf.data.Dataset.from_tensor_slices(df_scaled_tst[df_scaled_trn.columns[input_slice]])
window_trn = ts_utils.windowae(ds_trn, window_len, batch_size=batch_size)
window_tst = ts_utils.windowae(ds_tst, window_len, batch_size=batch_size)

window_trn100 = ts_utils.windowae(ds_trn, window_len, batch_size=100000)
window_tst100 = ts_utils.windowae(ds_tst, window_len, batch_size=100000)

display( pd.concat([df, df_scaled_trn], axis=1))
#for w in window_trn.take(1): print(f'{w[0]} \n\n {w[1]}' )

In [None]:
from keras.utils import plot_model

lstm_ae1 = Sequential(name="Simple_LSTM_AE")
# Encoder
lstm_ae1.add(LSTM(32, activation='relu', input_shape=(window_len, inp_feat_len), return_sequences=True))
lstm_ae1.add(LSTM(16, activation='relu', return_sequences=False))
lstm_ae1.add(RepeatVector(window_len))
# Decoder
lstm_ae1.add(LSTM(16, activation='relu', return_sequences=True))
lstm_ae1.add(LSTM(32, activation='relu', return_sequences=True))
lstm_ae1.add(TimeDistributed(Dense(inp_feat_len)))

lstm_ae1.summary()

plot_model(lstm_ae1, show_shapes=True)


In [None]:
dim = 128

# Create Autoencoder Layer
input_layer = Input(shape=(window_len, inp_feat_len), dtype='float32', name='input')
memory_layer = LSTM(dim, return_sequences=True)(input_layer)
memory_layer = LSTM (int(dim//2), return_sequences=False)(memory_layer)
repeated_lyr = RepeatVector(window_len)(memory_layer)
memory_layer = LSTM (int(dim//2), return_sequences=True)(repeated_lyr)
memory_layer = LSTM (dim,  return_sequences=True)(memory_layer)
decoded_inputs = TimeDistributed(Dense(units=inp_feat_len, activation='linear'))( memory_layer)

dropout_input = Dropout(0.2)(input_layer)
concat_layer = concatenate([dropout_input, decoded_inputs])
memory_layer = LSTM(units=dim, 
                    kernel_regularizer = regularizers.l1_l2(l1= .1, l2= .1), 
                    recurrent_regularizer = regularizers.l1_l2(l1= .1, l2= .1), 
                    return_sequences=False)(concat_layer)
preds = Dense(units=inp_feat_len, activation='linear')(memory_layer)

umodel = Model(input_layer, preds)
#umodel.summary()

In [None]:
model = lstm_ae1
#model = umodel
history = ts_utils.compile_fit(model, window_trn, window_tst= window_trn, patience=30, epochs=50)

IPython.display.clear_output()

for l in history.history:
    plt.plot(history.history[l], label=f"{l}")
plt.title("History of Losses")
plt.legend()

### Anomaly - precision/Recall etc.

In [None]:
model = lstm_ae1

for w in window_trn100:
    p = model.predict(w[0])

In [None]:
es

In [None]:
#e = np.sqrt(np.mean((p - w[1])**2, axis=1))
e = (np.mean((p - w[1])**2, axis=1))
scaler = StandardScaler()
es = scaler.fit_transform(e)

m = np.sum(es, axis=1)
#m = np.sum(e, axis=1)

y= df.y[0:len(e)]
yy = [np.nan if j<1 else m[i] for i,j in enumerate(y)]

plt.plot(range(len(e)), m, alpha=0.2, c='orange', marker='o', linestyle='', 
            markersize=.5, label="error-scrore");
plt.plot(yy, marker='x', c="red" , linestyle="", markersize=3, label="Anomalies")

plt.title(f"Reconstruction Error: #Anomalies: {sum(y)}")
#plt.ylim(-10,50)
plt.legend();


In [None]:
#w[1][0][0][:4], p[0][0][:4], w[1][0][0] -  p[0][0]
m

In [None]:
yyneg = [m[i] for i,j in enumerate(y) if j < 1]
yypos = [m[i] for i,j in enumerate(y) if j > 0.9]

plt.hist(yypos, density=1, alpha=0.5, label="positive")
plt.hist(yyneg, density=1, alpha=0.3, label="Negative")
plt.legend()

In [None]:
for w in window_tst100:
    p = model.predict(w[0])

In [None]:
mts

In [None]:
et = np.mean((p - w[1])**2, axis=1)
ets= scaler.transform(et)
mts= np.sum(ets, axis=1)
mt = np.sum(et, axis=1)

yt= df.y[0:len(et)]
yy = [np.nan if j<1 else mt[i] for i,j in enumerate(yt)]

#plt.plot(range(len(et)), mts, alpha=0.2, c='blue', marker='+', label="score", linestyle='', markersize=.5);
plt.plot(range(len(et)), mt, alpha=0.2, c='orange', marker='o', label="score", linestyle='', markersize=.5);
plt.plot(yy, marker='x', c="red" , linestyle="", markersize=3)

plt.title(f"Reconstruction Error Test: #Anomalies: {sum(yt)}")
plt.ylim(-1,7)
plt.legend();


###  Precision Recall Curves

In [None]:
from sklearn.metrics import confusion_matrix, precision_recall_curve, ConfusionMatrixDisplay
from sklearn.metrics import recall_score, classification_report, auc, roc_curve
from sklearn.metrics import precision_recall_fscore_support, f1_score
from sklearn.metrics import PrecisionRecallDisplay



prec, recall, thr = precision_recall_curve(y, m)
prd = PrecisionRecallDisplay(prec, recall)
prd.plot()
plt.show()

plt.plot(thr, prec[1:],   label="Precision", marker='o', linewidth=1, markersize=1)
plt.plot(thr, recall[1:], label="Recall",    marker='x', linewidth=1, markersize=1)
plt.title('PR Curve')
plt.xlabel('Threshold')
plt.ylabel('Precision/Recall')
plt.legend()
#plt.xlim(2,2.5)
#plt.ylim(0,.2)


In [None]:
THRESHOLD = 0.7
yhat = [1 if e > THRESHOLD else 0 for e in m]
cm = confusion_matrix(y, yhat)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["True", "False"])

cost_fp= 1      # Cost of False Positive
cost_fn= 100    # Cost of False Negative

tcost = cm[0,1] * cost_fp +  cm[1,0] * cost_fn

disp.plot()
plt.title(f"Total cost ${tcost}")
plt.grid(0)


In [None]:
y