In [1]:
import pandas as pd
from sklearn import preprocessing

from gnnad.graphanomaly import GNNAD
from gnnad.plot import plot_test_anomalies, plot_predictions, plot_sensor_error_scores

def normalise(X, scaler_fn):
    scaler = scaler_fn.fit(X)
    return pd.DataFrame(scaler.transform(X), index=X.index, columns = X.columns)

In [3]:
# read in training data
train = pd.read_csv('/nfs/home/canzen/gnnad/swat_data/train.csv', index_col=0)
X_train = train.iloc[:, :-1]
#X_train.index = pd.to_datetime(X_train.index)

# read in test data
test = pd.read_csv('/nfs/home/canzen/gnnad/swat_data/test.csv', index_col=0)
#X_tmp.index = pd.to_datetime(X_tmp.index)
X_test = test.iloc[:, :-1]
y_test = test['attack']

# normalise
#X_test = normalise(X_test, preprocessing.StandardScaler())
#X_train = normalise(X_train, preprocessing.StandardScaler())

# create ANOOMS dict for plotting
#ANOMS = {'type1': {}}
#X_test_anoms = X_tmp.iloc[:,8:]

#for i in range(len(X_test_anoms.columns)):
#    anom_col_name = X_test_anoms.columns[i]
#    sensor_col_name = X_test.columns[i]
#    anom_idxs = X_test_anoms[anom_col_name][X_test_anoms[anom_col_name]].index
#
#    if len(anom_idxs) > 0:
#        ANOMS['type1'][sensor_col_name] = anom_idxs

In [4]:
# plot input data
#plot_test_anomalies(X_test, ANOMS)

In [6]:
# run model
model = GNNAD(threshold_type="max_validation", topk=15, 
              slide_win=5, epoch=50, early_stop_win=10, 
              slide_stride=1, embed_dim=64, 
              out_layer_inter_dim=64, device='cpu')
fitted_model = model.fit(X_train, X_test, y_test)

epoch (0 / 50) (Loss:0.06132565, ACU_loss:20.54409178)
epoch (1 / 50) (Loss:0.01400033, ACU_loss:4.69011188)
epoch (2 / 50) (Loss:0.00946194, ACU_loss:3.16974902)
epoch (3 / 50) (Loss:0.00763743, ACU_loss:2.55853975)
epoch (4 / 50) (Loss:0.00688096, ACU_loss:2.30512147)
epoch (5 / 50) (Loss:0.00686072, ACU_loss:2.29834178)
epoch (6 / 50) (Loss:0.00590032, ACU_loss:1.97660821)
epoch (7 / 50) (Loss:0.00499798, ACU_loss:1.67432420)
epoch (8 / 50) (Loss:0.00485472, ACU_loss:1.62633169)
epoch (9 / 50) (Loss:0.00453757, ACU_loss:1.52008560)
epoch (10 / 50) (Loss:0.00422941, ACU_loss:1.41685266)
epoch (11 / 50) (Loss:0.00414152, ACU_loss:1.38740886)
epoch (12 / 50) (Loss:0.00401334, ACU_loss:1.34446899)
epoch (13 / 50) (Loss:0.00402905, ACU_loss:1.34973291)
epoch (14 / 50) (Loss:0.00395257, ACU_loss:1.32411194)
epoch (15 / 50) (Loss:0.00400093, ACU_loss:1.34031146)
epoch (16 / 50) (Loss:0.00384867, ACU_loss:1.28930570)
epoch (17 / 50) (Loss:0.00395279, ACU_loss:1.32418439)
epoch (18 / 50) (Lo

In [7]:
# model summary
fitted_model.summary()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         Embedding-1                   [-1, 64]           3,200
         Embedding-2                   [-1, 64]           3,200
            Linear-3                   [-1, 64]             320
    SumAggregation-4                [-1, 1, 64]               0
        GraphLayer-5                   [-1, 64]              64
       BatchNorm1d-6                   [-1, 64]             128
              ReLU-7                   [-1, 64]               0
          GNNLayer-8                   [-1, 64]               0
         Embedding-9                   [-1, 64]           3,200
      BatchNorm1d-10               [-1, 64, 50]             128
          Dropout-11               [-1, 50, 64]               0
           Linear-12                [-1, 50, 1]              65
         OutLayer-13                [-1, 50, 1]               0
Total params: 10,305
Trainable params: 

In [8]:
# GDN+, sensor thresholds
preds = fitted_model.sensor_threshold_preds(tau = 99)
fitted_model.print_eval_metrics(preds)

recall: 82.9
precision: 31.0
accuracy: 75.4
specificity: 74.4
f1: 45.2


In [None]:
# plot predictions
#plot_predictions(fitted_model, X_test, ANOMS, preds = preds, figsize=(20, 20))

In [None]:
#plot_sensor_error_scores(fitted_model, X_test)