In [202]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import joblib

from argparse import ArgumentParser
from scipy import stats
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras import Model, layers

In [203]:
model = tf.keras.models.load_model('model/autoencoder-kravchik-v2')

In [217]:
df = pd.read_csv("dataset/swat_attack.csv", delimiter=";", decimal=",")
df.columns = [column.strip() for column in df.columns]
df.head()

Unnamed: 0,Timestamp,FIT101,LIT101,MV101,P101,P102,AIT201,AIT202,AIT203,FIT201,...,P501,P502,PIT501,PIT502,PIT503,FIT601,P601,P602,P603,Normal/Attack
0,28/12/2015 10:00:00 AM,2.427057,522.8467,2,2,1,262.0161,8.396437,328.6337,2.445391,...,2,1,250.8652,1.649953,189.5988,0.000128,1,1,1,Normal
1,28/12/2015 10:00:01 AM,2.446274,522.886,2,2,1,262.0161,8.396437,328.6337,2.445391,...,2,1,250.8652,1.649953,189.6789,0.000128,1,1,1,Normal
2,28/12/2015 10:00:02 AM,2.489191,522.8467,2,2,1,262.0161,8.394514,328.6337,2.442316,...,2,1,250.8812,1.649953,189.6789,0.000128,1,1,1,Normal
3,28/12/2015 10:00:03 AM,2.53435,522.9645,2,2,1,262.0161,8.394514,328.6337,2.442316,...,2,1,250.8812,1.649953,189.6148,0.000128,1,1,1,Normal
4,28/12/2015 10:00:04 AM,2.56926,523.4748,2,2,1,262.0161,8.394514,328.6337,2.443085,...,2,1,250.8812,1.649953,189.5027,0.000128,1,1,1,Normal


In [218]:
attack_df = df.loc[df['Normal/Attack'] == 'Attack']
attack_indexes = attack_df.index

In [219]:
features_considered = ['FIT101', 'MV101', 'P101', 'P102', 'MV201', 'P201', 'P202', 'P203', 'P204', 'P206', 'MV301', 'MV302', 'MV303', 'MV304', 'P301', 'P302', 'P401', 'P402', 'P403', 'P404', 'UV401', 'P501', 'P502', 'P601', 'P602', 'P603']

In [221]:
df = df[features_considered]
df.head()

Unnamed: 0,FIT101,MV101,P101,P102,MV201,P201,P202,P203,P204,P206,...,P401,P402,P403,P404,UV401,P501,P502,P601,P602,P603
0,2.427057,2,2,1,2,1,1,2,1,1,...,1,2,1,1,2,2,1,1,1,1
1,2.446274,2,2,1,2,1,1,2,1,1,...,1,2,1,1,2,2,1,1,1,1
2,2.489191,2,2,1,2,1,1,2,1,1,...,1,2,1,1,2,2,1,1,1,1
3,2.53435,2,2,1,2,1,1,2,1,1,...,1,2,1,1,2,2,1,1,1,1
4,2.56926,2,2,1,2,1,1,2,1,1,...,1,2,1,1,2,2,1,1,1,1


In [222]:
scaler = joblib.load("scaler/uae.gz")
data = scaler.transform(df)
print(data)

[[0.88414414 1.         1.         ... 0.         0.         0.        ]
 [0.89114463 1.         1.         ... 0.         0.         0.        ]
 [0.90677872 1.         1.         ... 0.         0.         0.        ]
 ...
 [0.92217929 1.         1.         ... 0.         0.         0.        ]
 [0.91844572 1.         1.         ... 0.         0.         0.        ]
 [0.91132865 1.         1.         ... 0.         0.         0.        ]]


In [250]:
TIME_STEPS = 24
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
    output = []
    for i in range(len(values)//24):
        start_index = time_steps * i
        output.append(values[start_index : start_index + time_steps])

    return np.stack(output)

In [251]:
print(len(data)/24)
data_sequence = create_sequences(data)

18746.625
449919
0 0 24 False
1 24 48 False
2 48 72 False
3 72 96 False
4 96 120 False
5 120 144 False
6 144 168 False
7 168 192 False
8 192 216 False
9 216 240 False
10 240 264 False
11 264 288 False
12 288 312 False
13 312 336 False
14 336 360 False
15 360 384 False
16 384 408 False
17 408 432 False
18 432 456 False
19 456 480 False
20 480 504 False
21 504 528 False
22 528 552 False
23 552 576 False
24 576 600 False
25 600 624 False
26 624 648 False
27 648 672 False
28 672 696 False
29 696 720 False
30 720 744 False
31 744 768 False
32 768 792 False
33 792 816 False
34 816 840 False
35 840 864 False
36 864 888 False
37 888 912 False
38 912 936 False
39 936 960 False
40 960 984 False
41 984 1008 False
42 1008 1032 False
43 1032 1056 False
44 1056 1080 False
45 1080 1104 False
46 1104 1128 False
47 1128 1152 False
48 1152 1176 False
49 1176 1200 False
50 1200 1224 False
51 1224 1248 False
52 1248 1272 False
53 1272 1296 False
54 1296 1320 False
55 1320 1344 False
56 1344 1368 False
57 

In [223]:
def z_score(data, prediction):
    difference = np.absolute(data - prediction)
    mean = np.mean(difference, axis=1, keepdims=True)
    std = np.std(difference, axis=1, keepdims=True)

    z = (np.absolute(difference-mean))/std

    return z

In [195]:
for i in range(len(data_sequence)):
    prediction = model.predict(data_sequence[i], verbose=0).reshape(data_sequence.shape[1],data_sequence.shape[2])

    z = z_score(data_sequence[i], prediction)
    max_z = np.amax(z, axis=1)
    above_threshold = max_z[max_z > 5]
    anomaly_len = len(above_threshold)
    if (anomaly_len > 1):
        print(f"anomaly in {i}")

    if (anomaly_len > 3):
        print(f"attack in {i}")


KeyboardInterrupt: 