In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

In [3]:
from data import get_datasets
from rnns import skipGRU, skipLSTM, GRU, LSTM, eval_rnn

from importlib import reload

from sklearn.model_selection import train_test_split
import pandas as pd
import pandas as pd
import numpy as np
import re
from tqdm import tqdm, trange
import math
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.preprocessing.sequence import pad_sequences
import plotly.figure_factory as ff
from sklearn.metrics import confusion_matrix, roc_curve, auc
import csv
import plotly.express as px
import pickle
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
import random
import timeit

In [4]:
datasets = get_datasets('data')
dataset_yow_yul = datasets['yow-yul']
print(dataset_yow_yul[0]) # (session, time, word, parity_error, attack)

['Flight Data YOW YUL Manual Flight_6', 327606137, 37641, 0, 0]


In [5]:
# subsample benign:
dataset_yow_yul_sampled = [r for r in dataset_yow_yul if r[-1] > 0 or random.uniform(0, 1) > .95]

In [6]:
# converted to dataframe for visualization/statistics
df_yow_yul = pd.DataFrame(dataset_yow_yul_sampled, columns =['session', 'time', 'word', 'parity_error', 'attack'])
df_yow_yul.attack.value_counts()

0     751716
1       5879
2       4214
9       3271
8       1243
10      1142
7        916
4        629
3        482
6        333
5          1
Name: attack, dtype: int64

In [7]:
# stratefy split based on session
train, test = train_test_split(
    dataset_yow_yul_sampled, 
    test_size=.4, 
    shuffle=True, 
    random_state=1,
    stratify=[row[0] for row in dataset_yow_yul_sampled]
)

In [16]:
results = {}

In [None]:


for model in [skipGRU, skipLSTM, GRU, LSTM]:
    _, prediction = eval_rnn(train, [test], rnn_layer=model)
    results[model.__name__] = prediction

In [None]:
results['skipGRU'][0][1]


In [30]:
from sklearn.metrics import (accuracy_score, auc, precision_score,
                             recall_score, roc_auc_score, roc_curve)

In [31]:
def eval(prediction):
    (y_anomly_truth, y_attack_truth), anomly_predictions, time_taken = prediction
    anomly_predictions = np.array(anomly_predictions).flatten()
    _all = {}
    for i in range(0, 11):
        # get metric for each attack
        truth = []
        predictions = []
        for t, a, p in zip(y_anomly_truth, y_attack_truth, anomly_predictions):
            if i == 0 or a == i or a == 0:
                # all attacks or a specific attack
                truth.append(t)
                predictions.append(p)
        
        fpr, tpr, thresholds = roc_curve(truth, predictions)
        roc_auc = auc(fpr, tpr)

        # find optimal threshold
        optimal_idx = np.argmax(tpr - fpr)
        optimal_threshold = thresholds[optimal_idx]
        pred_labels = predictions >= optimal_threshold

        res = {
            'auc': roc_auc,
            'acc': accuracy_score(truth, pred_labels),
            'prc': precision_score(truth, pred_labels),
            'rcl': recall_score(truth, pred_labels),
            'pav': np.mean([p for t, p in zip(truth, predictions) if t == 1]),
            'pvr': np.var([p for t, p in zip(truth, predictions) if t == 1]),
            'nav': np.mean([p for t, p in zip(truth, predictions) if t == 0]),
            'nvr': np.var([p for t, p in zip(truth, predictions) if t == 0]),
            'opt': optimal_threshold,
            'time': time_taken
        }
        _all[i] = {k: f'{v:.4f}' for k, v in res.items()}
    return _all



In [None]:
eval(results['skipGRU'][0])

In [None]:
import stan
reload(stan)
from stan import eval_stan

results['stan'] = eval_stan(train, [test])

In [None]:
eval(results['stan'][0])

In [None]:
import onodueze
reload(onodueze)
from onodueze import eval_onodueze

for model in ['BLSTM', 'IsolationForest', 'LOF', 'MCD', 'XGBoost']:
    if model not in results:
        _, prediction = eval_onodueze(train, [test], model=model)
        results[model] = prediction

In [None]:
results.keys()

In [23]:
from ignn_g import ids
reload(ids)
results['impl'] = ids.eval_imp(train, [test])[1]

preparing data
all data [Data(x=[20, 1], edge_index=[2, 4], y=[1], ya=[1]), Data(x=[20, 1], edge_index=[2, 4], y=[1], ya=[1])] 461847


  0%|          | 0/3609 [00:00<?, ?it/s]


Epoch: 001, Train Loss: 0.0007113, 


  0%|          | 0/3609 [00:00<?, ?it/s]


Epoch: 002, Train Loss: 0.0006972, 


  0%|          | 0/3609 [00:00<?, ?it/s]


Epoch: 003, Train Loss: 0.0006852, 


  0%|          | 0/3609 [00:00<?, ?it/s]


Epoch: 004, Train Loss: 0.0006696, 


  0%|          | 0/3609 [00:00<?, ?it/s]


Epoch: 005, Train Loss: 0.0006578, 


  0%|          | 0/3609 [00:00<?, ?it/s]


Epoch: 006, Train Loss: 0.0006423, 


  0%|          | 0/3609 [00:00<?, ?it/s]


Epoch: 007, Train Loss: 0.0006278, 


  0%|          | 0/3609 [00:00<?, ?it/s]


Epoch: 008, Train Loss: 0.0006112, 


  0%|          | 0/3609 [00:00<?, ?it/s]


Epoch: 009, Train Loss: 0.0005825, 


  0%|          | 0/3609 [00:00<?, ?it/s]


Epoch: 010, Train Loss: 0.0005495, 
start testing


  0%|          | 0/2406 [00:00<?, ?it/s]


In [32]:
eval(results['impl'][0])

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(
  ret = ret.dtype.type(ret / rcount)
 

{0: {'auc': 'nan',
  'acc': '1.0000',
  'prc': '0.0000',
  'rcl': '0.0000',
  'pav': 'nan',
  'pvr': 'nan',
  'nav': '0.0908',
  'nvr': '0.0006',
  'opt': '1.1858',
  'time': '52.3462'},
 1: {'auc': 'nan',
  'acc': '1.0000',
  'prc': '0.0000',
  'rcl': '0.0000',
  'pav': 'nan',
  'pvr': 'nan',
  'nav': '0.0908',
  'nvr': '0.0006',
  'opt': '1.1858',
  'time': '52.3462'},
 2: {'auc': 'nan',
  'acc': '1.0000',
  'prc': '0.0000',
  'rcl': '0.0000',
  'pav': 'nan',
  'pvr': 'nan',
  'nav': '0.0908',
  'nvr': '0.0006',
  'opt': '1.1858',
  'time': '52.3462'},
 3: {'auc': 'nan',
  'acc': '1.0000',
  'prc': '0.0000',
  'rcl': '0.0000',
  'pav': 'nan',
  'pvr': 'nan',
  'nav': '0.0908',
  'nvr': '0.0006',
  'opt': '1.1858',
  'time': '52.3462'},
 4: {'auc': 'nan',
  'acc': '1.0000',
  'prc': '0.0000',
  'rcl': '0.0000',
  'pav': 'nan',
  'pvr': 'nan',
  'nav': '0.0908',
  'nvr': '0.0006',
  'opt': '1.1858',
  'time': '52.3462'},
 5: {'auc': 'nan',
  'acc': '1.0000',
  'prc': '0.0000',
  'rcl':