In [None]:
import sys
sys.path.append("../")

In [None]:
%load_ext autoreload
%autoreload 2
import os
import hashlib
import numpy as np
import traceback
from common.dataloader import load_dataset
from common import data_preprocess
from matplotlib import pyplot as plt
from collections import defaultdict

In [None]:
def moving_average(data_dict, w):
    print(f"Moving average with w={w}")
    new_dict = defaultdict(list)
    for idx in range(data_dict["train"].shape[1]):
        new_dict["train"].append(np.convolve(data_dict["train"][:, idx], np.ones(w), 'valid') / w)
        new_dict["test"].append(np.convolve(data_dict["test"][:, idx], np.ones(w), 'valid') / w)
    
    new_dict["train"] = np.vstack(new_dict["train"]).T
    new_dict["test"] = np.vstack(new_dict["test"]).T
    new_dict["test_labels"] = data_dict["test_labels"]
    print(new_dict["train"].shape)
    return new_dict

In [None]:

selectedDims = {
"b2a04b7f":[0,1,4,5,6,7,8,9,13,15,16,18,21,27,30,31,73,83,112,113],
"c2970798":[53,54,79,102,168,173,174,176,202,250,477,487,556],
"e29ca1cd":[40,45,66],
"5dafb960":[0,1,4,5,8,21,73,74,83,101,109,112],
"c23b2b2d":[15,16,24,38],
"aeb5a1de":[28,79,80,81,82,84],
"2fe95315":[22,25,60,70,85],
"0a82a873":[0,3,6,22,23,44,69],
"af732cc4":[17,24,25,32,33,39,60],
"c91f4a07":[0,6,7,13,15,20,25,30,39,45,62,76,109,147,150,166,167,170,188,207,209,228,288,289],
"ca2ae31d":[0,6,16,17,18,25,28,29,34,36,39,40,47,48,50,52,53,59,68,69,71,75,77,79,80,84,98,103,124,125,127,129,148,150,159,170,190,202,249,318,319,344,346],
"f7958fb7":[0,2,4,20,31,33,37,41,43,45,56,57,58,69,81,88,101,109,157,161,169,254,265,282,290,334,346,363,392,410,423,438,445,454,465,490,492]
}

In [None]:

dataset = "HUAWEI" # change to HUAWEI
subdatasets = ["b2a04b7f","c2970798","e29ca1cd","5dafb960","c23b2b2d","aeb5a1de","2fe95315","0a82a873","af732cc4","c91f4a07","ca2ae31d","f7958fb7"] # Change to skipped hashid list
window_size = 100
stride = 50

for concerned_type in ["normal", "anomaly"]:
    outdir = os.path.join("empirical_study", f"{concerned_type}_{dataset}_empirical")
    for subdataset in subdatasets:
        current_out = os.path.join(outdir, subdataset) 
        print(f"Plotting to {current_out}")
        os.makedirs(current_out, exist_ok=True)
        data_dict = load_dataset(dataset, subdataset, "all", root_dir="../")
#         data_dict = load_dataset(dataset, subdataset, "all", root_dir="../", nrows=720)
        dims=selectedDims[subdataset]
        data_dict["train"] = data_dict["train"][:, dims]
        data_dict["test"] = data_dict["test"][:, dims]


        window_dict = data_preprocess.generate_windows(
                    data_dict,
                    window_size=window_size,
                    stride=stride,
                    test_stride=stride
                )
        windows = window_dict["test_windows"]

        anomaly_indice = np.any(window_dict["test_labels"], axis=1).reshape(-1)
        normal_indice = ~anomaly_indice

        print(windows.shape, anomaly_indice.shape, normal_indice.shape)

        anomaly = windows[anomaly_indice]
        normal = windows[normal_indice]

        concerned_window = anomaly if concerned_type == "anomaly" else normal
        for win_idx, window in enumerate(concerned_window):
            fig, ax = plt.subplots(window.shape[1], figsize=(20, 2*window.shape[1]))
            for idx in range(window.shape[1]):
                data = window[:, idx]
                ax[idx].plot(data.reshape(-1), label=f"{subdataset} | {concerned_type} | win_{win_idx} | dim_{idx}")
                ax[idx].set_ylim(0,1)
                ax[idx].legend()
            fig.savefig(os.path.join(current_out, f"win_{win_idx}.pdf"))
            plt.close()
        print(f"===============Finish {subdataset} {concerned_type} part===============")
