In [24]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import sys
from sklearn import metrics # for the evaluation
from settings import CORR_GROUP, AD_THRESHOLD
from keras.callbacks import EarlyStopping
import tensorflow as tf
import logging

In [25]:
def unique_cols(df):
    a = df.to_numpy() # df.values (pandas<0.24)
    return (a[0] == a).all(0)


def create_supervised_dataset(df, target, feats, n_in=1, n_out=1):
    cols, names = list(), list()
    n_vars = len(feats)
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df[feats].shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df[target].shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(1)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(1)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    agg.dropna(inplace=True)
    return agg.values

In [26]:
df = pd.read_csv("data/mongo_filtered_av101_mins.csv", index_col='ts')
#df = df.drop('Unnamed: 0', 1)
df.index = pd.to_datetime(df.index)
df_2 = df.loc[:,np.invert(unique_cols(df))]
# Average window
# df_2 = df.groupby(np.arange(len(df))//60).mean()
scaler = MinMaxScaler()
d = scaler.fit_transform(df_2)
scaled_df = pd.DataFrame(d, columns=df_2.columns, index=df_2.index)


In [27]:
scaled_df.head()

Unnamed: 0_level_0,S_SUM,I_SUM,H_TDH_I_L3_N,U_L3_L1,H_TDH_I_L1_N,C_phi_L1,P_L2,P_L1,P_L3,ReacEc_L1,...,ReacE_L1,RealE_SUM,AE_SUM,S_L3,H_TDH_U_L3_N,S_L2,S_L1,H_TDH_U_L1_N,U_L1_N,ReacE_SUM
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-01 21:58:00,0.990984,0.781978,0.254418,0.615027,0.342328,0.985237,0.966901,0.994739,0.970895,0.0,...,0.0,0.0,0.0,0.987464,0.760618,0.966283,0.987454,0.72161,0.609543,0.0
2022-04-01 21:59:00,0.992591,0.773476,0.254683,0.608446,0.343604,0.982323,0.973957,0.99375,0.966047,5.684342e-14,...,7.3e-05,7.2e-05,7.1e-05,0.983499,0.781622,0.973044,0.989167,0.739361,0.606128,7.6e-05
2022-04-01 22:00:00,0.84806,0.648967,0.312273,0.608049,0.403454,0.978451,0.799049,0.835862,0.790451,5.684342e-14,...,0.000173,0.000167,0.000166,0.840799,0.73928,0.822394,0.853432,0.700016,0.603478,0.000179
2022-04-01 22:01:00,0.73982,0.560511,0.357909,0.586065,0.446856,0.977273,0.657226,0.723811,0.650708,5.684342e-14,...,0.000245,0.000224,0.000227,0.73426,0.702624,0.702688,0.758202,0.653778,0.580593,0.000251
2022-04-01 22:03:00,0.854597,0.650536,0.30771,0.547224,0.387736,0.98569,0.773103,0.863535,0.797161,5.684342e-14,...,0.00047,0.000402,0.000416,0.85986,0.789692,0.803707,0.872752,0.736178,0.545678,0.000476


In [28]:
for k in CORR_GROUP:
    scaled_df[k + ' AD'] = " "
scaled_df.head()

Unnamed: 0_level_0,S_SUM,I_SUM,H_TDH_I_L3_N,U_L3_L1,H_TDH_I_L1_N,C_phi_L1,P_L2,P_L1,P_L3,ReacEc_L1,...,P_SUM AD,U_L1_N AD,I_SUM AD,H_TDH_I_L3_N AD,F AD,ReacEc_L1 AD,C_phi_L3 AD,ReacEc_L3 AD,RealE_SUM AD,H_TDH_U_L2_N AD
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-01 21:58:00,0.990984,0.781978,0.254418,0.615027,0.342328,0.985237,0.966901,0.994739,0.970895,0.0,...,,,,,,,,,,
2022-04-01 21:59:00,0.992591,0.773476,0.254683,0.608446,0.343604,0.982323,0.973957,0.99375,0.966047,5.684342e-14,...,,,,,,,,,,
2022-04-01 22:00:00,0.84806,0.648967,0.312273,0.608049,0.403454,0.978451,0.799049,0.835862,0.790451,5.684342e-14,...,,,,,,,,,,
2022-04-01 22:01:00,0.73982,0.560511,0.357909,0.586065,0.446856,0.977273,0.657226,0.723811,0.650708,5.684342e-14,...,,,,,,,,,,
2022-04-01 22:03:00,0.854597,0.650536,0.30771,0.547224,0.387736,0.98569,0.773103,0.863535,0.797161,5.684342e-14,...,,,,,,,,,,


In [37]:
from tensorflow import keras
from AttentionBiLSTM import attention

var = 'P_SUM'
model = keras.models.load_model(f'models/{var}_model.h5', custom_objects={'attention': attention})
features = []
counter = 0
history_window = 15
for index, row in scaled_df.iterrows():
    if counter >= history_window:
        tensor = np.array(features).reshape((1, 1, len(features)))
        res = model.predict(tensor)
        ad_detected = abs(res - row[var]) > AD_THRESHOLD[var]
        scaled_df.at[index, 'P_SUM AD'] = ad_detected
        features = features[len(CORR_GROUP[var]):]
        print(res)
        print(abs(res - row[var]) > AD_THRESHOLD[var])
        
    counter += 1
    predictors = row[CORR_GROUP[var]]
    features += predictors.to_list()
    
    if counter >= 30: break
    

[[[0.89742553]]]
[[[ True]]]
[[[0.9171946]]]
[[[ True]]]
[[[0.90943074]]]
[[[ True]]]
[[[0.80837893]]]
[[[ True]]]
[[[0.72545695]]]
[[[ True]]]
[[[0.6654034]]]
[[[ True]]]
[[[0.69631857]]]
[[[ True]]]
[[[0.744438]]]
[[[ True]]]
[[[0.79851604]]]
[[[False]]]
[[[0.8172443]]]
[[[ True]]]
[[[0.8451294]]]
[[[ True]]]
[[[0.87781334]]]
[[[ True]]]
[[[0.87682146]]]
[[[ True]]]
[[[0.85201645]]]
[[[False]]]
[[[0.85953736]]]
[[[ True]]]


In [30]:
from tensorflow import keras
from AttentionBiLSTM import attention

for var in ['U_L1_N', 'I_SUM', 'F', 'C_phi_L3']:
    model = keras.models.load_model(f'models/{var}_model.h5')
    features = []
    counter = 0
    history_window = 15
    for index, row in scaled_df.iterrows():
        if counter >= history_window:
            tensor = np.array(features).reshape((1, 1, len(features)))
            res = model.predict(tensor)
            ad_detected = abs(res - row[var]) > AD_THRESHOLD[var]
            scaled_df.at[index, var + ' AD'] = ad_detected
            features = features[len(CORR_GROUP[var]):]

        counter += 1
        predictors = row[CORR_GROUP[var]]
        features += predictors.to_list()
    

In [31]:
for i in ['P_SUM', 'U_L1_N', 'I_SUM', 'F', 'C_phi_L3']:
    df[i + ' AD'] = scaled_df[i + ' AD']

df.to_csv('data/anomaly_detection_data.csv')

In [22]:
scaled_df.tail(10)

Unnamed: 0_level_0,S_SUM,I_SUM,H_TDH_I_L3_N,U_L3_L1,H_TDH_I_L1_N,C_phi_L1,P_L2,P_L1,P_L3,ReacEc_L1,...,RealE_SUM,AE_SUM,S_L3,H_TDH_U_L3_N,S_L2,S_L1,H_TDH_U_L1_N,U_L1_N,ReacE_SUM,P_SUM AD
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-04-14 08:36:00,0.001195,0.601491,0.648505,0.276176,0.855618,0.0,0.0,0.0,0.002648,1.0,...,0.999999,0.999999,0.003689,0.298488,0.0,0.0,0.326404,0.245372,0.999999,[[[False]]]
2022-04-14 08:38:00,0.001397,0.701224,0.711686,0.306044,0.855618,0.0,0.0,0.0,0.00295,1.0,...,0.999999,0.999999,0.004314,0.160908,0.0,0.0,0.170171,0.272803,0.999999,[[[False]]]
2022-04-14 08:39:00,0.001161,0.582527,0.66776,0.331721,0.855618,0.0,0.0,0.0,0.002507,1.0,...,0.999999,0.999999,0.003583,0.153925,0.0,0.0,0.165787,0.287604,1.0,[[[False]]]
2022-04-14 08:40:00,0.001071,0.537274,0.656861,0.320183,0.855618,0.0,0.0,0.0,0.002329,1.0,...,1.0,0.999999,0.003305,0.163582,0.0,0.0,0.174875,0.28328,1.0,[[[False]]]
2022-04-14 08:41:00,0.001181,0.592597,0.66085,0.325677,0.855618,0.0,0.0,0.0,0.002586,1.0,...,1.0,0.999999,0.003646,0.175149,0.0,0.0,0.185078,0.289237,1.0,[[[False]]]
2022-04-14 08:42:00,0.001163,0.584881,0.659865,0.303754,0.855618,0.0,0.0,0.0,0.002543,1.0,...,1.0,1.0,0.00359,0.167025,0.0,0.0,0.17765,0.266856,1.0,[[[False]]]
2022-04-14 08:43:00,0.001227,0.617651,0.670128,0.294729,0.855618,0.0,0.0,0.0,0.002674,1.0,...,1.0,1.0,0.003789,0.155505,0.0,0.0,0.164282,0.261521,1.0,[[[False]]]
2022-04-14 08:44:00,0.001289,0.641381,0.689899,0.488641,0.855618,0.0,0.0,0.0,0.002776,1.0,...,1.0,1.0,0.00398,0.146689,0.0,0.0,0.153842,0.463451,1.0,[[[False]]]
2022-04-14 08:45:00,0.001337,0.665402,0.677359,0.473352,0.855618,0.0,0.0,0.0,0.002888,1.0,...,1.0,1.0,0.004126,0.146698,0.0,0.0,0.155565,0.441677,1.0,[[[False]]]
2022-04-14 08:46:00,0.00122,0.60803,0.677079,0.424712,0.855618,0.0,0.0,0.0,0.002625,1.0,...,1.0,1.0,0.003765,0.136737,0.0,0.0,0.142661,0.397244,1.0,[[[False]]]
