In [68]:
import os
import pandas as pd
import numpy as np
import json
from concurrent.futures.thread import ThreadPoolExecutor

from vmft_lad.BaseDetector import BaseDetector
from vmft_lad.ILogInferenceProvider import ILogInferenceProvider
from heap_providers.SubsequenceMaxHeap import SubsequenceMaxHeap
from log_inference_providers.BartLarge.FastBartLargeZeroShotLogInference import (
    FastBartLargeZeroShotLogInference,
)
from log_inference_providers.Falcon7B.Falcon7BFewShotLogInference import (
    FastFalcon7BFewShotLogInference,
)
from log_inference_providers.FakeLogInferenceProvider import FakeLogInferenceProvider
from log_inference_providers.GPT3.FastGPT3FewShotLogInference import (
    FastGPT3FewShotLogInference,
)
from log_inference_providers.EmertonMonarch7B.FastEmertonMonarch7BFewShotLogInference import (
    FastEmertonMonarch7BFewShotLogInference,
)
from log_inference_providers.Cyrax7B.FastCyrax7BFewShotLogInference import (
    FastCyrax7BFewShotLogInference,
)
from eval_criteria import get_criteria1, get_criteria2

In [69]:
# Evaluation parameters
benign_eval_window = 200

# Model parameters
window_size = 4
probationary_period = 150

inference_providers = {
    # "Fake": FakeLogInferenceProvider,
    "BartLarge": FastBartLargeZeroShotLogInference,
    "Falcon7B": FastFalcon7BFewShotLogInference,
    # "GPT3": FastGPT3FewShotLogInference,
    "EmertonMonarch7B": FastEmertonMonarch7BFewShotLogInference,
    "Cyrax7B": FastCyrax7BFewShotLogInference,
}

dataset_name = "cpu"

In [70]:
dataset_dir = f"../../../data/{dataset_name}"
dataset_metadata = json.load(open(f"{dataset_dir}/dataset_metadata.json"))

In [71]:
def execute(threshold: float, inference_provider: ILogInferenceProvider, criteria_1: dict, criteria_2: dict):
    print("Start evaluating threshold: ", threshold)
    subsequence_match_threshold = 0.05 if threshold > 0.05 else threshold

    temp_results_criteria_1 = {
        "threshold": threshold,
        "TP": 0,
        "TN": 0,
        "FP": 0,
        "FN": 0,
    }
    temp_results_criteria_2 = {
        "threshold": threshold,
        "TP": 0,
        "TN": 0,
        "FP": 0,
        "FN": 0,
        "TP_LATE": 0,
    }

    for datasetFile in os.scandir(dataset_dir):
        if not datasetFile.name.endswith(".csv"):
            continue

        dataset_df = pd.read_csv(datasetFile.path, header=0)
        data = dataset_df["value"].tolist()

        dataset_name = datasetFile.name.split(".")[0]

        label_index = dataset_metadata[dataset_name]["label_region"] - 1
        failure_point_index = dataset_metadata[dataset_name]["failure_point"]

        # Creating a new instance of max heap for each run to avoid any data leakage and to ensure a fair comparison
        max_heap_provider = SubsequenceMaxHeap()

        model = BaseDetector(
            inferenceProvider=inference_provider,
            maxHeapProvider=max_heap_provider,
            data=data,
            windowSize=window_size,
            probationaryPeriod=probationary_period,
            subsequenceMatchThreshold=subsequence_match_threshold,
            anomalyThreshold=threshold)

        i = probationary_period+1  # iterator for the data points
        # Benign region
        while (i < label_index):
            anomaly_score_window = []
            anomaly_score_window.append(model.handleRecord(i))
            i += 1
            while (i % benign_eval_window != 0 and i < label_index):
                anomaly_score_window.append(model.handleRecord(i))
                i += 1
            detection_in_window = False
            for anomaly_score in anomaly_score_window:
                if anomaly_score >= threshold:
                    detection_in_window = True
                    break
            if detection_in_window:
                temp_results_criteria_1["FP"] += 1
                temp_results_criteria_2["FP"] += 1
            else:
                temp_results_criteria_1["TN"] += 1
                temp_results_criteria_2["TN"] += 1

        # Anomalous region
        positive_prediction = False
        while (i < failure_point_index):
            anomaly_score = model.handleRecord(i)
            positive_prediction = anomaly_score >= threshold
            if positive_prediction:  # Early detection
                temp_results_criteria_1["TP"] += 1
                temp_results_criteria_2["TP"] += 1
                break
            i += 1

        while (not positive_prediction and (i < len(data))):
            anomaly_score = model.handleRecord(i)
            positive_prediction = anomaly_score >= threshold
            if positive_prediction:  # Late detection
                temp_results_criteria_1["TP"] += 1
                # Can make this FN as well
                temp_results_criteria_2["TP_LATE"] += 1
                break
            i += 1

        if (not positive_prediction):
            temp_results_criteria_1["FN"] += 1
            temp_results_criteria_2["FN"] += 1

    criteria_1[threshold] = temp_results_criteria_1
    criteria_2[threshold] = temp_results_criteria_2

    print("Done evaluating threshold: ", threshold)

In [72]:
# criteria_1 = {}
# criteria_2 = {}
# inference_provider = inference_providers["GPT3"]()
# threshold = 0.2
# execute(threshold, inference_provider, criteria_1, criteria_2)

In [73]:
thresholds = np.arange(0, 0.36, 0.01).round(3).tolist()

for inference_provider_name, inference_provider in inference_providers.items():
    print("\nStart evaluating inference provider: ", inference_provider_name)
    results_dir = f"./results/{dataset_name}/{inference_provider_name}"
    os.makedirs(results_dir, exist_ok=True)

    criteria_1 = {}
    criteria_2 = {}
    inference_provider = inference_provider()
    with ThreadPoolExecutor(max_workers=8) as executor:
        for threshold in thresholds:
            executor.submit(execute, threshold,
                            inference_provider, criteria_1, criteria_2)

    df_criteria_1 = pd.DataFrame(criteria_1).T.sort_values("threshold")
    df_criteria_1.to_csv(f"{results_dir}/criteria1.csv", index=False)

    df_criteria_2 = pd.DataFrame(criteria_2).T.sort_values("threshold")
    df_criteria_2.to_csv(f"{results_dir}/criteria2.csv", index=False)

    print("Done evaluating inference provider: ", inference_provider_name)

print("Finish execution of all.")


Start evaluating inference provider:  BartLarge
Start evaluating threshold:  0.0
Start evaluating threshold:  0.01
Start evaluating threshold:  0.02
Start evaluating threshold:  0.03
Start evaluating threshold:  0.04
Start evaluating threshold:  0.05
Start evaluating threshold:  0.06
Start evaluating threshold:  0.07
Done evaluating threshold:  0.02
Start evaluating threshold:  0.08
Done evaluating threshold:  0.07
Start evaluating threshold:  0.09
Done evaluating threshold:  0.04
Start evaluating threshold:  0.1
Done evaluating threshold:  0.03
Start evaluating threshold:  0.11
Done evaluating threshold:  0.01
Start evaluating threshold:  0.12
Done evaluating threshold:  0.06
Start evaluating threshold:  0.13
Done evaluating threshold:  0.05
Start evaluating threshold:  0.14
Done evaluating threshold:  0.0
Start evaluating threshold:  0.15
Done evaluating threshold:  0.09
Start evaluating threshold:  0.16
Done evaluating threshold:  0.08
Start evaluating threshold:  0.17
Done evaluat