In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import joblib

from argparse import ArgumentParser
from scipy import stats

In [2]:
model = tf.keras.models.load_model('model/autoencoder-kravchik-v2')

In [3]:
df = pd.read_csv("dataset/swat_attack.csv", delimiter=";", decimal=",")
df.columns = [column.strip() for column in df.columns]
df.head()

Unnamed: 0,Timestamp,FIT101,LIT101,MV101,P101,P102,AIT201,AIT202,AIT203,FIT201,...,P501,P502,PIT501,PIT502,PIT503,FIT601,P601,P602,P603,Normal/Attack
0,28/12/2015 10:00:00 AM,2.427057,522.8467,2,2,1,262.0161,8.396437,328.6337,2.445391,...,2,1,250.8652,1.649953,189.5988,0.000128,1,1,1,Normal
1,28/12/2015 10:00:01 AM,2.446274,522.886,2,2,1,262.0161,8.396437,328.6337,2.445391,...,2,1,250.8652,1.649953,189.6789,0.000128,1,1,1,Normal
2,28/12/2015 10:00:02 AM,2.489191,522.8467,2,2,1,262.0161,8.394514,328.6337,2.442316,...,2,1,250.8812,1.649953,189.6789,0.000128,1,1,1,Normal
3,28/12/2015 10:00:03 AM,2.53435,522.9645,2,2,1,262.0161,8.394514,328.6337,2.442316,...,2,1,250.8812,1.649953,189.6148,0.000128,1,1,1,Normal
4,28/12/2015 10:00:04 AM,2.56926,523.4748,2,2,1,262.0161,8.394514,328.6337,2.443085,...,2,1,250.8812,1.649953,189.5027,0.000128,1,1,1,Normal


In [4]:
attack_df = df.loc[df['Normal/Attack'] == 'Attack']
attack_indexes = attack_df.index

In [5]:
features_considered = ['FIT101', 'MV101', 'P101', 'P102', 'MV201', 'P201', 'P202', 'P203', 'P204', 'P206', 'MV301', 'MV302', 'MV303', 'MV304', 'P301', 'P302', 'P401', 'P402', 'P403', 'P404', 'UV401', 'P501', 'P502', 'P601', 'P602', 'P603']

In [6]:
df = df[features_considered]
df.head()

Unnamed: 0,FIT101,MV101,P101,P102,MV201,P201,P202,P203,P204,P206,...,P401,P402,P403,P404,UV401,P501,P502,P601,P602,P603
0,2.427057,2,2,1,2,1,1,2,1,1,...,1,2,1,1,2,2,1,1,1,1
1,2.446274,2,2,1,2,1,1,2,1,1,...,1,2,1,1,2,2,1,1,1,1
2,2.489191,2,2,1,2,1,1,2,1,1,...,1,2,1,1,2,2,1,1,1,1
3,2.53435,2,2,1,2,1,1,2,1,1,...,1,2,1,1,2,2,1,1,1,1
4,2.56926,2,2,1,2,1,1,2,1,1,...,1,2,1,1,2,2,1,1,1,1


In [7]:
scaler = joblib.load("scaler/uae.gz")
data = scaler.transform(df)
print(data)

[[0.88414414 1.         1.         ... 0.         0.         0.        ]
 [0.89114463 1.         1.         ... 0.         0.         0.        ]
 [0.90677872 1.         1.         ... 0.         0.         0.        ]
 ...
 [0.92217929 1.         1.         ... 0.         0.         0.        ]
 [0.91844572 1.         1.         ... 0.         0.         0.        ]
 [0.91132865 1.         1.         ... 0.         0.         0.        ]]


In [8]:
TIME_STEPS = 24
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
    output = []
    for i in range(len(values)//24):
        start_index = time_steps * i
        output.append(values[start_index : start_index + time_steps])

    return np.stack(output)

In [9]:
print(len(data)/24)
data_sequence = create_sequences(data)

18746.625


In [10]:
def z_score(data, prediction):
    difference = np.absolute(data - prediction)
    mean = np.mean(difference, axis=1, keepdims=True)
    std = np.std(difference, axis=1, keepdims=True)

    z = (np.absolute(difference-mean))/std

    return z

In [18]:
for i in range(len(data_sequence)):
    prediction = model.predict(data_sequence[i].reshape((1, 24, 26)), verbose=0)

    z = z_score(data_sequence[i], prediction)
    max_z = np.amax(z, axis=1)
    above_threshold = max_z[max_z > 5]
    anomaly_len = len(above_threshold)
    if (anomaly_len > 1):
        print(f"anomaly in sequence {i}")

    if (anomaly_len > 3):
        print(f"attack in sequence {i}")


(1, 24, 26)
(1, 24, 26)
[]


  z = (np.absolute(difference-mean))/std
