In [1]:
import sys
from datetime import datetime

import os
import argparse
from pathlib import Path

import matplotlib.pyplot as plt

import json
import random

import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, random_split, Subset

from sklearn.preprocessing import MinMaxScaler

from util.env import get_device, set_device
from util.preprocess import build_loc_net, construct_data
from util.net_struct import get_feature_map, get_fc_graph_struc
from util.iostream import printsep

from datasets.TimeDataset import TimeDataset


# from models.GDN import GDN

from _train import train
from _test import test
from evaluate import get_err_scores, get_best_performance_data, get_val_performance_data, get_full_err_scores

from get_data import fetchData

In [2]:
# Pull data from own psql database (has attack columns in)
# data = fetchData()
# train = data.sample(frac=0.6)
# test = data.drop(train.index)

In [3]:
# data
dataset = 'msl'
train_orig = pd.read_csv(f'./data/{dataset}/train.csv', sep=',', index_col=0)
test_orig = pd.read_csv(f'./data/{dataset}/test.csv', sep=',', index_col=0)
train, test = train_orig, test_orig


In [4]:
# train_orig = pd.read_csv(f'./data/swat/train.csv', sep=',', index_col=0)
# test_orig = pd.read_csv(f'./data/swat/test.csv', sep=',', index_col=0)

# train, test = train_orig, test_orig
if 'Normal/Attack' in train.columns:
    train = train.drop(columns=['Normal/Attack'])


In [5]:
dataset = 'msl'
feature_map = get_feature_map(dataset)
fc_struc = get_fc_graph_struc(dataset)


In [6]:
fc_edge_index = build_loc_net(fc_struc, list(train.columns), feature_map=feature_map)
fc_edge_index = torch.tensor(fc_edge_index, dtype = torch.long)

In [7]:
train_dataset_indata = construct_data(train, feature_map, labels=0)
test_dataset_indata = construct_data(test, feature_map, labels=test.attack.tolist())

In [8]:
cfg = {
            'slide_win': 5,
            'slide_stride': 1,
        }

In [9]:
train_dataset = TimeDataset(train_dataset_indata, fc_edge_index, mode='train', config=cfg)
test_dataset = TimeDataset(test_dataset_indata, fc_edge_index, mode='test', config=cfg)

In [27]:
feature_map.__len__()

27

In [30]:
train_orig

Unnamed: 0_level_0,M-6,M-1,M-2,S-2,P-10,T-4,T-5,F-7,M-3,M-4,...,F-5,D-14,T-9,P-14,T-8,P-11,D-15,D-16,M-7,F-8
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-1.0,0.999976,-0.748738,-1.0,0.994353,0.0,-1.0,-0.642857,1.000046,1.000000,...,-0.059837,-1.0,-0.333329,0.999426,-1.0,0.941907,-1.000000,-1.000000,-1.001157,-0.826087
1,-1.0,0.999976,-0.748738,-1.0,0.993788,0.0,-1.0,-0.964286,1.000046,1.000000,...,-0.059238,-1.0,-0.333329,0.999296,-1.0,0.944196,-1.000000,-1.000000,-1.001157,-0.869565
2,-1.0,0.999976,-0.748738,-1.0,0.994353,0.0,-1.0,-0.785714,1.000046,1.000000,...,-0.059163,-1.0,-0.333329,0.999611,-1.0,0.943751,0.952800,0.983735,-1.001157,-0.869565
3,-1.0,0.999976,-0.748738,-1.0,0.993506,0.0,-1.0,-0.892857,1.000046,1.000000,...,-0.058563,-1.0,-0.333329,0.999500,-1.0,0.941081,0.951874,-1.000000,-1.001157,-0.782609
4,-1.0,0.999976,-0.748738,-1.0,0.994353,0.0,-1.0,-0.964286,1.000046,1.000000,...,-0.056016,-1.0,-1.000000,0.999519,-1.0,0.941653,-1.000000,-1.000000,-1.001157,-0.869565
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1560,-1.0,-0.621155,-1.036327,0.0,0.994070,0.0,-1.0,-0.928571,0.591862,-1.216307,...,0.013526,-1.0,0.000000,0.999500,0.0,0.598373,-1.000000,0.000000,-0.999096,-1.000000
1561,-1.0,-0.622580,-1.031021,0.0,0.992094,0.0,-1.0,-0.928571,0.824871,-1.283452,...,0.015624,-1.0,0.000000,0.999352,0.0,0.659135,1.024063,0.000000,-0.999096,-1.000000
1562,-1.0,-0.624111,-1.025695,0.0,0.991529,0.0,-1.0,0.964286,1.000022,-1.237844,...,-0.835887,-1.0,0.000000,0.999463,0.0,0.762036,1.023137,0.000000,-0.999096,-1.000000
1563,-1.0,-0.625548,-1.020368,0.0,0.991529,0.0,-1.0,-0.964286,0.855654,-1.223329,...,-0.826820,-1.0,0.000000,0.999444,0.0,0.763498,-1.000000,0.000000,-0.999096,-1.000000


In [32]:
train_dataset_indata

[[-1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
