In [2]:
import sys
sys.path.append("../")


In [10]:
import warnings
warnings.filterwarnings('ignore')
import os
import pandas as pd
import numpy as np
from collections import defaultdict
from glob import glob
from common.evaluation import iter_thresholds
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score

In [4]:
summarize_dict = {
    "Group A": {"machine-1-1": 38},
    "Group B": {"machine-1-1": 38},
    "Group C": {"machine-1-1": 38}
}
# summarize_dict = {"Group A": {"e29ca1cd": 3,
# "c23b2b2d": 4,
# "aeb5a1de": 6,
# "2fe95315": 5,
# "0a82a873": 7,
# "af732cc4": 7},
# "Group B": {"b2a04b7f": 20,
# "c2970798": 13,
# "5dafb960": 12},
# "Group C": {"c91f4a07": 24,
# "ca2ae31d": 43,
# "f7958fb7": 37}}
results_dir = "../benchmark/benchmarking_results/"
models = ["3sigma", "iforest",  "LODA", "PCA", "AutoEncoder", "lstm", "lstm_vae", "dagmm", "omnianomaly", "CMAnomaly_old",]

In [5]:
def load_tuple(path, nrows=None):
    anomaly_score = np.load(
        os.path.join(path, "anomaly_score.npz"), allow_pickle=True
    )["arr_0"].item()["test"]
    anomaly_label = np.load(os.path.join(path, "anomaly_label.npz"))[
        "arr_0"
    ].astype(int)
    return (anomaly_score[:nrows], anomaly_label[:nrows])

In [17]:
def compute_metrics(anomaly_score, anomaly_label):
    _, _, best_adjust_pred, _ = iter_thresholds(anomaly_score, anomaly_label, metric="f1", adjustment=True)
    _, _, best_raw_pred, _ = iter_thresholds(anomaly_score, anomaly_label, metric="f1", adjustment=False)
    
    aF1 = f1_score(anomaly_label, best_adjust_pred)
    aPC = precision_score(anomaly_label, best_adjust_pred)
    aRC = recall_score(anomaly_label, best_adjust_pred)
    
    rF1 = f1_score(anomaly_label, best_raw_pred)
    rPC = precision_score(anomaly_label, best_raw_pred)
    rRC = recall_score(anomaly_label, best_raw_pred)
    return [aF1, aPC, aRC, rF1, rPC, rRC] 

def concat_keys(value_dict, keys):
    anomaly_score_con = np.concatenate([value_dict[k]["anomaly_score"] for k in keys])
    anomaly_label_con = np.concatenate([value_dict[k]["anomaly_label"] for k in keys])

In [21]:
df_dict = {}
nrows = 100
model = "3sigma"

df_dict["model"] = model
folders = glob(os.path.join(results_dir, f"{model}/*/*/*"))
subdataset_values = {}
metrics_save = defaultdict(list)
for folder in folders:
    folder_components = folder.split(os.sep)
    dataset, subdataset = folder_components[-2], folder_components[-1]
    df_dict["dataset"] = dataset
    anomaly_score, anomaly_label = load_tuple(folder, nrows)
    subdataset_values[subdataset] = {
        "anomaly_score": anomaly_score,
        "anomaly_label": anomaly_label,
    }
print("Finish collecting.")
for group_name, subdatasets in summarize_dict.items():
    subdatasets = list(subdatasets.keys())
    print(group_name, subdatasets)

print("Finish individuall evaluation")

Finish collecting.
Group A ['machine-1-1']
Group B ['machine-1-1']
Group C ['machine-1-1']
Finish individuall evaluation


In [13]:
df_dict.update({k: sum(v)/len(v) for k,v in metrics_save.items()}) 


{'model': '3sigma',
 'dataset': 'SMD',
 'aF1': 0.0,
 'aPC': 0.0,
 'aRC': 0.0,
 'rF1': 0.0,
 'rPC': 0.0,
 'rRC': 0.0}

In [15]:
anomaly_score_con = np.concatenate([v["anomaly_score"] for k, v in subdataset_values.items()])
anomaly_label_con = np.concatenate([v["anomaly_label"] for k, v in subdataset_values.items()])
_, _, best_adjust_pred, _ = iter_thresholds(anomaly_score_con, anomaly_label_con, metric="f1", adjustment=True)
_, _, best_raw_pred, _ = iter_thresholds(anomaly_score_con, anomaly_label_con, metric="f1", adjustment=False)

df_dict["aF1_con"] = f1_score(anomaly_label_con, best_adjust_pred)
df_dict["aPC_con"] = precision_score(anomaly_label_con, best_adjust_pred)
df_dict["aRC_con"] = recall_score(anomaly_label_con, best_adjust_pred)

df_dict["rF1_con"] = f1_score(anomaly_label_con, best_raw_pred)
df_dict["rPC_con"] = precision_score(anomaly_label_con, best_raw_pred)
df_dict["rRC_con"] = recall_score(anomaly_label_con, best_raw_pred)

In [16]:
df_dict

{'model': '3sigma',
 'dataset': 'SMD',
 'aF1': 0.0,
 'aPC': 0.0,
 'aRC': 0.0,
 'rF1': 0.0,
 'rPC': 0.0,
 'rRC': 0.0,
 'aF1_con': 0.0,
 'aPC_con': 0.0,
 'aRC_con': 0.0,
 'rF1_con': 0.0,
 'rPC_con': 0.0,
 'rRC_con': 0.0}