In [None]:
import pandas as pd

from refactor import GNNAD

In [None]:
X_train = pd.read_csv(f"./data/msl/train.csv", sep=",", index_col=0)
X_train = X_train.drop(columns=["attack"]) if "attack" in X_train.columns else X_train
X_test = pd.read_csv(f"./data/msl/test.csv", sep=",", index_col=0)
y_test = X_test.attack.tolist()

In [None]:
model = GNNAD(shuffle_train=False)
fitted_model = model.fit(X_train, X_test, y_test)

In [1]:
import numpy as np
import pandas as pd
from sklearn.utils.validation import check_random_state

from refactor import GNNAD

random_state = 245
rng = check_random_state(random_state)

# generate multivariate data
cov = [[0.5, 0.3, 0], [0.3, 1.0, 0], [0, 0, 0.8]]
mean = [1, 3, 10]
X_train = (
    pd.DataFrame(rng.multivariate_normal(mean=mean, cov=cov, size=2000))
    .ewm(span=2)
    .mean()
)
X_test = (
    pd.DataFrame(rng.multivariate_normal(mean=mean, cov=cov, size=1000))
    .ewm(span=2)
    .mean()
)

# add anomalies to the test set
X_test.iloc[342:356, :] *= 2
X_test.iloc[752:772, 0:2] *= 0.01

# anomaly labels
y_test = np.zeros(len(X_test))
y_test[342:356] = 1
y_test[752:772] = 1

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
model = GNNAD(shuffle_train=False, topk=2, epoch=10)
fitted_model = model.fit(X_train, X_test, y_test)

epoch (0 / 10) (Loss:40.17884064, ACU_loss:120.53652191)
epoch (1 / 10) (Loss:39.03404490, ACU_loss:117.10213470)
epoch (2 / 10) (Loss:37.60780589, ACU_loss:112.82341766)
epoch (3 / 10) (Loss:36.32013067, ACU_loss:108.96039200)
epoch (4 / 10) (Loss:35.13179779, ACU_loss:105.39539337)
epoch (5 / 10) (Loss:34.05962372, ACU_loss:102.17887115)
epoch (6 / 10) (Loss:34.34579849, ACU_loss:103.03739548)
epoch (7 / 10) (Loss:33.31120809, ACU_loss:99.93362427)
epoch (8 / 10) (Loss:32.25991440, ACU_loss:96.77974319)
epoch (9 / 10) (Loss:31.17664210, ACU_loss:93.52992630)

F1 score: 0.5384615384615384
precision: 0.8235294117647058
recall: 0.4117647058823529



In [5]:
fitted_model.f1
fitted_model.precision
fitted_model.recall

0.4117647058823529

In [None]:
model2 = GNNAD(shuffle_train=False, threshold_type="max_validation")
fitted_model2 = model2.fit()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

test_labels = fitted_model.test_result[2, :, 0].tolist()
plt.plot(np.array(test_labels));

In [None]:
from refactor import eval_scores, get_full_err_scores

test_scores = get_full_err_scores(fitted_model.test_result)
plt.plot(test_scores.T);

In [None]:
total_err_scores = test_scores
gt_labels = test_labels
topk = 1


total_features = total_err_scores.shape[0]

# finds topk feature idx of max scores for each time point
topk_indices = np.argpartition(
    total_err_scores, range(total_features - topk - 1, total_features), axis=0
)[-topk:]

# for each time, sum the topk error scores
total_topk_err_scores = np.sum(
    np.take_along_axis(total_err_scores, topk_indices, axis=0), axis=0
)

final_topk_fmeas, thresolds = eval_scores(
    total_topk_err_scores, gt_labels, return_thresold=True
)  # scores, true_scores

th_i = final_topk_fmeas.index(max(final_topk_fmeas))
thresold = thresolds[th_i]

pred_labels = np.zeros(len(total_topk_err_scores))
pred_labels[total_topk_err_scores > thresold] = 1

for i in range(len(pred_labels)):
    pred_labels[i] = int(pred_labels[i])
    gt_labels[i] = int(gt_labels[i])

In [None]:
test_scores.shape

In [None]:
from scipy.stats import iqr, rankdata

th_steps = 400
true_scores = gt_labels
scores = total_topk_err_scores

padding_list = [0] * (len(true_scores) - len(scores))

if len(padding_list) > 0:
    scores = padding_list + scores

scores_sorted = rankdata(scores, method="ordinal")  # rank of score
th_vals = np.array(range(th_steps)) * 1.0 / th_steps
fmeas = [None] * th_steps
thresholds = [None] * th_steps

i = 0
cur_pred = scores_sorted > th_vals[i] * len(scores)

In [None]:
len(scores)

In [None]:
scores_sorted

In [None]:
plt.plot(scores, "o")

In [None]:
plt.plot(scores_sorted, "o")

In [None]:
cur_pred.size

In [None]:
plt.plot(np.array(thresolds))

In [None]:
plt.plot(topk_indices[0], "o");

In [None]:
plt.plot(np.array(test_labels))

In [None]:
!python main.py -dataset msl -device cpu