In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import sys
from sklearn import metrics # for the evaluation
from settings import CORR_GROUP, AD_THRESHOLD
from keras.callbacks import EarlyStopping
import tensorflow as tf
import logging

In [8]:
def unique_cols(df):
    a = df.to_numpy() # df.values (pandas<0.24)
    return (a[0] == a).all(0)


def create_supervised_dataset(df, target, feats, n_in=1, n_out=1):
    cols, names = list(), list()
    n_vars = len(feats)
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df[feats].shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df[target].shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(1)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(1)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    agg.dropna(inplace=True)
    return agg.values

In [9]:
from settings import INPUT_FILE

df = pd.read_csv(INPUT_FILE, index_col='ts')
#df = df.drop('Unnamed: 0', 1)
df.index = pd.to_datetime(df.index)
df_2 = df.loc[:,np.invert(unique_cols(df))]
# Average window
# df_2 = df.groupby(np.arange(len(df))//60).mean()
scaler = MinMaxScaler()
d = scaler.fit_transform(df_2)
scaled_df = pd.DataFrame(d, columns=df_2.columns, index=df_2.index)


In [10]:
scaled_df.head()

Unnamed: 0_level_0,AE_L1,AE_L2,AE_L3,AE_SUM,C_phi_L1,C_phi_L2,C_phi_L3,F,H_TDH_I_L1_N,H_TDH_I_L2_N,...,S_L1,S_L2,S_L3,S_SUM,U_L1_L2,U_L1_N,U_L2_L3,U_L2_N,U_L3_L1,U_L3_N
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-14 09:45:00,0.0,0.0,0.0,0.0,0.0,0.0,0.985689,0.591371,0.878465,0.769139,...,0.0,0.0,0.004865,0.001578,0.600407,0.705059,0.658637,0.614509,0.728167,0.672975
2022-04-14 09:46:00,0.0,0.0,0.0,0.0,0.0,0.0,0.836782,0.632546,0.878465,0.769139,...,0.0,0.0,0.004061,0.001318,0.601648,0.685304,0.636376,0.610935,0.707506,0.654408
2022-04-14 09:47:00,0.0,0.0,4.502219e-07,0.0,0.0,0.0,0.820093,0.603675,0.878465,0.769139,...,0.0,0.0,0.003887,0.001261,0.606283,0.693359,0.635031,0.610293,0.716453,0.659068
2022-04-14 09:48:00,0.0,0.0,4.911512e-07,0.0,0.0,0.0,0.739987,0.597113,0.878465,0.769139,...,0.0,0.0,0.003586,0.001163,0.628999,0.706141,0.650733,0.629983,0.731549,0.67987
2022-04-14 09:49:00,0.0,0.0,7.530985e-07,0.0,0.0,0.0,0.756175,0.536745,0.878465,0.769139,...,0.0,0.0,0.003666,0.001189,0.634475,0.702842,0.665284,0.641441,0.727198,0.687478


In [11]:
for k in CORR_GROUP:
    scaled_df[k + ' AD'] = " "
    scaled_df[k + ' AD Detected'] = " "
scaled_df.head()

Unnamed: 0_level_0,AE_L1,AE_L2,AE_L3,AE_SUM,C_phi_L1,C_phi_L2,C_phi_L3,F,H_TDH_I_L1_N,H_TDH_I_L2_N,...,ReacEc_L1 AD,ReacEc_L1 AD Detected,C_phi_L3 AD,C_phi_L3 AD Detected,ReacEc_L3 AD,ReacEc_L3 AD Detected,RealE_SUM AD,RealE_SUM AD Detected,H_TDH_U_L2_N AD,H_TDH_U_L2_N AD Detected
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-14 09:45:00,0.0,0.0,0.0,0.0,0.0,0.0,0.985689,0.591371,0.878465,0.769139,...,,,,,,,,,,
2022-04-14 09:46:00,0.0,0.0,0.0,0.0,0.0,0.0,0.836782,0.632546,0.878465,0.769139,...,,,,,,,,,,
2022-04-14 09:47:00,0.0,0.0,4.502219e-07,0.0,0.0,0.0,0.820093,0.603675,0.878465,0.769139,...,,,,,,,,,,
2022-04-14 09:48:00,0.0,0.0,4.911512e-07,0.0,0.0,0.0,0.739987,0.597113,0.878465,0.769139,...,,,,,,,,,,
2022-04-14 09:49:00,0.0,0.0,7.530985e-07,0.0,0.0,0.0,0.756175,0.536745,0.878465,0.769139,...,,,,,,,,,,


In [12]:
from random import random

anomaly_df = scaled_df.tail(int(0.1*len(scaled_df)))
for index, row in anomaly_df.iterrows():
    for k in CORR_GROUP:
        is_anomaly = random() < 0.05
        if is_anomaly:
            anomaly_df.at[index, k] -= 0.5
            anomaly_df.at[index, k + ' AD'] = True
        else:
            anomaly_df.at[index, k + ' AD'] = False

anomaly_df.tail()

Unnamed: 0_level_0,AE_L1,AE_L2,AE_L3,AE_SUM,C_phi_L1,C_phi_L2,C_phi_L3,F,H_TDH_I_L1_N,H_TDH_I_L2_N,...,ReacEc_L1 AD,ReacEc_L1 AD Detected,C_phi_L3 AD,C_phi_L3 AD Detected,ReacEc_L3 AD,ReacEc_L3 AD Detected,RealE_SUM AD,RealE_SUM AD Detected,H_TDH_U_L2_N AD,H_TDH_U_L2_N AD Detected
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-05-11 20:54:00,0.99994,0.999937,0.999939,0.999939,0.9644,0.97447,0.961782,0.520997,0.534157,0.473966,...,False,,False,,False,,False,,False,
2022-05-11 20:55:00,0.999971,0.999969,0.99997,0.99997,0.964736,0.978975,0.960113,0.506562,0.522807,0.462906,...,False,,False,,False,,False,,False,
2022-05-11 20:56:00,0.999995,0.999993,0.999994,0.999994,0.764903,0.866177,0.971629,0.525591,0.737308,0.592497,...,False,,False,,False,,False,,False,
2022-05-11 20:57:00,1.0,1.0,1.0,1.0,0.0,0.43167,0.992824,0.498031,0.878465,0.409323,...,False,,False,,False,,False,,False,
2022-05-11 20:58:00,1.0,1.0,1.0,1.0,0.0,0.41477,0.993229,0.517435,0.878465,0.384148,...,False,,False,,False,,False,,False,


In [13]:
import pandas as pd
import tensorflow as tf

import autokeras as ak

from settings import INPUT_FILE, CORR_GROUP, OUTPUT_FILE
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import logging

var = 'P_SUM'
model = tf.keras.models.load_model(f'models/{var}_autokeras.h5')
features = []
counter = 0
history_window = 15
for index, row in scaled_df.iterrows():

    if counter >= history_window:
        if row[var + ' AD'] == " ":
            tensor = np.array(features).reshape(-1, history_window, len(CORR_GROUP[var]))
            res = model.predict(tensor)
            ad_detected = abs(res - row[var]) > AD_THRESHOLD[var]
            scaled_df.at[index, var + ' AD Detected'] = ad_detected
        features = features[len(CORR_GROUP[var]):]
        
    counter += 1
    predictors = row[CORR_GROUP[var]]
    features += predictors.to_list()
    

In [30]:
from tensorflow import keras
from AttentionBiLSTM import attention

for var in ['U_L1_N', 'I_SUM', 'F', 'C_phi_L3']:
    model = keras.models.load_model(f'models/{var}_autokeras.h5')
    features = []
    counter = 0
    history_window = 15
    for index, row in scaled_df.iterrows():
        if counter >= history_window:
            tensor = np.array(features).reshape((1, 1, len(features)))
            res = model.predict(tensor)
            ad_detected = abs(res - row[var]) > AD_THRESHOLD[var]
            scaled_df.at[index, var + ' AD'] = ad_detected
            features = features[len(CORR_GROUP[var]):]

        counter += 1
        predictors = row[CORR_GROUP[var]]
        features += predictors.to_list()
    

In [31]:
for i in ['P_SUM', 'U_L1_N', 'I_SUM', 'F', 'C_phi_L3']:
    df[i + ' AD'] = scaled_df[i + ' AD']

df.to_csv('data/anomaly_detection_data.csv')

In [22]:
scaled_df.tail(10)

Unnamed: 0_level_0,S_SUM,I_SUM,H_TDH_I_L3_N,U_L3_L1,H_TDH_I_L1_N,C_phi_L1,P_L2,P_L1,P_L3,ReacEc_L1,...,RealE_SUM,AE_SUM,S_L3,H_TDH_U_L3_N,S_L2,S_L1,H_TDH_U_L1_N,U_L1_N,ReacE_SUM,P_SUM AD
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-14 08:36:00,0.001195,0.601491,0.648505,0.276176,0.855618,0.0,0.0,0.0,0.002648,1.0,...,0.999999,0.999999,0.003689,0.298488,0.0,0.0,0.326404,0.245372,0.999999,[[[False]]]
2022-04-14 08:38:00,0.001397,0.701224,0.711686,0.306044,0.855618,0.0,0.0,0.0,0.00295,1.0,...,0.999999,0.999999,0.004314,0.160908,0.0,0.0,0.170171,0.272803,0.999999,[[[False]]]
2022-04-14 08:39:00,0.001161,0.582527,0.66776,0.331721,0.855618,0.0,0.0,0.0,0.002507,1.0,...,0.999999,0.999999,0.003583,0.153925,0.0,0.0,0.165787,0.287604,1.0,[[[False]]]
2022-04-14 08:40:00,0.001071,0.537274,0.656861,0.320183,0.855618,0.0,0.0,0.0,0.002329,1.0,...,1.0,0.999999,0.003305,0.163582,0.0,0.0,0.174875,0.28328,1.0,[[[False]]]
2022-04-14 08:41:00,0.001181,0.592597,0.66085,0.325677,0.855618,0.0,0.0,0.0,0.002586,1.0,...,1.0,0.999999,0.003646,0.175149,0.0,0.0,0.185078,0.289237,1.0,[[[False]]]
2022-04-14 08:42:00,0.001163,0.584881,0.659865,0.303754,0.855618,0.0,0.0,0.0,0.002543,1.0,...,1.0,1.0,0.00359,0.167025,0.0,0.0,0.17765,0.266856,1.0,[[[False]]]
2022-04-14 08:43:00,0.001227,0.617651,0.670128,0.294729,0.855618,0.0,0.0,0.0,0.002674,1.0,...,1.0,1.0,0.003789,0.155505,0.0,0.0,0.164282,0.261521,1.0,[[[False]]]
2022-04-14 08:44:00,0.001289,0.641381,0.689899,0.488641,0.855618,0.0,0.0,0.0,0.002776,1.0,...,1.0,1.0,0.00398,0.146689,0.0,0.0,0.153842,0.463451,1.0,[[[False]]]
2022-04-14 08:45:00,0.001337,0.665402,0.677359,0.473352,0.855618,0.0,0.0,0.0,0.002888,1.0,...,1.0,1.0,0.004126,0.146698,0.0,0.0,0.155565,0.441677,1.0,[[[False]]]
2022-04-14 08:46:00,0.00122,0.60803,0.677079,0.424712,0.855618,0.0,0.0,0.0,0.002625,1.0,...,1.0,1.0,0.003765,0.136737,0.0,0.0,0.142661,0.397244,1.0,[[[False]]]
