# Benchmark

We use a list of public datasets to benchmark all the algorithms in StreamAD. Thanks!

1. AIOPS_KPI, [AIOps Challenge public dataset for KPI anomaly detection](https://github.com/NetManAIOps/KPI-Anomaly-Detection)
2. AWSCloudwatch, [AWS server metrics](https://github.com/numenta/NAB/tree/master/data/realAWSCloudwatch)
3. GAIA, [GAIA](https://github.com/CloudWise-OpenSource/GAIA-DataSet/tree/main/Companion_Data)

In [1]:

import pandas as pd
import numpy as np
from tqdm import tqdm
from time import perf_counter
from streamad.util import StreamGenerator, CustomDS
from streamad.evaluate import NumentaAwareMetircs, PointAwareMetircs, SeriesAwareMetircs
from dataset import prepare_ds, read_ds
from streamad.model import KNNDetector, xStreamDetector, SpotDetector, RandomDetector, RShashDetector, SRDetector, RrcfDetector, HSTreeDetector, ZScoreDetector, LodaDetector, OCSVMDetector, MadDetector, SArimaDetector,ZSpotDetector


We download the dataset, unzip it, and reconstruct its structure with **prepare_ds()** and load the dataset with **read_ds()**

By now, **ds_name** and **file_name** are represented by

```python

DS = {
    "AIOPS_KPI": ["preliminary_train", "finals_train", "finals_ground_truth"],
    "AWSCloudwatch": [],
    "GAIA": [
        "changepoint_data",
        "concept_drift_data",
        "linear_data",
        "low_signal-to-noise_ratio_data",
        "partially_stationary_data",
        "periodic_data",
        "staircase_data",
    ],
```



In [46]:
path = './streamad-benchmark-dataset'
ds_name = 'MICRO'
prepare_ds(ds_name=ds_name,path=path)

Dataset AWSCloudwatch already exists


In [47]:
dfs = read_ds(ds_name=ds_name,ds_file="periodic_data")

In [48]:
benchmark_items = [
    "Detector",
    "Dataset",
    "Key",
    "Size(#)",
    "Time(s)",
    "Point_Precision",
    "Point_Recall",
    "Point_Fbeta",
    "Series_Precision",
    "Series_Recall",
    "Series_Fbeta",
    "Numenta_Precision",
    "Numenta_Recall",
    "Numenta_Fbeta",
]
benchmark_df = pd.DataFrame(columns=benchmark_items)

In [35]:
models = {
# KNNDetector: dict(),
# xStreamDetector: dict(),
SpotDetector: dict(back_mean_len=5, global_memory=True, window_len=20),
# ZSpotDetector: dict(back_mean_len=5, z=3, window_len=20),
# RandomDetector: dict(),
# ZScoreDetector: dict(window_len=400),
}

In [52]:
threshold = 0.7
for key, (df, label) in dfs.items():

    ds = CustomDS(df, label)
    stream = StreamGenerator(ds.data)

    for model_class, model_param in models.items():
        model = model_class(**model_param)
        scores = []
        start_time = perf_counter()
        for x in tqdm(stream.iter_item(), total=len(ds.data)):
            score = model.fit_score(x)
            if score is not None:
                score = 1 if score >= threshold else score
            
            scores.append(score)

        time = perf_counter() - start_time

        benchmark_values = [model.__class__.__name__, ds_name, key, len(ds.data), time]

        label = ds.label
        for metric in [
            PointAwareMetircs(),
            SeriesAwareMetircs(),
            NumentaAwareMetircs(),
        ]:
            # scores = np.nan_to_num(np.array(scores, dtype=float), nan=0)
            benchmark_values.extend(list(metric.evaluate(label, scores)))

        benchmark_df.loc[len(benchmark_df)] = benchmark_values

        from streamad.util import plot
        data, label, date, features = ds.data, ds.label, ds.date, ds.features
        idx=0
        gap=50000
        data = data[idx:idx+gap]
        scores=scores[idx:idx+gap]
        date=date[idx:idx+gap]
        label=label[idx:idx+gap]
        fig = plot(data=data,scores=scores,date=date,features=features,label=label)
        fig.show()
        fig.write_image(f"./image/{model.__class__.__name__}_{key}.png")
    # break
    


100%|██████████| 4032/4032 [00:06<00:00, 670.31it/s] 


100%|██████████| 4032/4032 [00:00<00:00, 4088.22it/s]


100%|██████████| 4032/4032 [00:03<00:00, 1176.86it/s]


100%|██████████| 4032/4032 [00:05<00:00, 784.41it/s] 


100%|██████████| 4032/4032 [00:03<00:00, 1021.17it/s]


100%|██████████| 4032/4032 [00:01<00:00, 3442.72it/s]


100%|██████████| 4032/4032 [00:05<00:00, 778.75it/s] 


100%|██████████| 4730/4730 [00:04<00:00, 1114.01it/s]


100%|██████████| 4032/4032 [00:19<00:00, 208.29it/s] 


100%|██████████| 4032/4032 [00:06<00:00, 608.82it/s] 


100%|██████████| 4032/4032 [00:03<00:00, 1160.81it/s]


100%|██████████| 4730/4730 [00:01<00:00, 4556.36it/s]


100%|██████████| 1243/1243 [00:00<00:00, 2699.35it/s]


100%|██████████| 4032/4032 [00:03<00:00, 1311.18it/s]


100%|██████████| 4032/4032 [00:03<00:00, 1258.14it/s]


100%|██████████| 4621/4621 [00:04<00:00, 1060.32it/s]


100%|██████████| 4032/4032 [00:02<00:00, 1451.62it/s]


In [13]:
benchmark_df

Unnamed: 0,Detector,Dataset,Key,Size(#),Time(s),Point_Precision,Point_Recall,Point_Fbeta,Series_Precision,Series_Recall,Series_Fbeta,Numenta_Precision,Numenta_Recall,Numenta_Fbeta
0,ZSpotDetector,GAIA,low_signal-to-noise_ratio_data_16_from2019-11-...,8640,0.364568,0.111111,0.002062,0.004049,0.111111,0.166667,0.133333,0.111111,0.166667,0.133333
1,ZSpotDetector,GAIA,low_signal-to-noise_ratio_data_21_from2019-11-...,8640,0.286785,0.125,0.002075,0.004082,0.125,0.2,0.153846,0.125,0.2,0.153846
2,ZSpotDetector,GAIA,low_signal-to-noise_ratio_data_7_from2019-11-1...,8640,0.264866,0.125,0.002101,0.004132,0.166667,0.333333,0.222222,0.125,0.333333,0.181818
3,ZSpotDetector,GAIA,low_signal-to-noise_ratio_data_25_from2019-01-...,8806,0.24265,0.333333,0.008032,0.015686,0.375,0.030303,0.056075,0.333333,0.060606,0.102564
4,ZSpotDetector,GAIA,low_signal-to-noise_ratio_data_47_from2019-11-...,6482,0.200386,0.111111,0.002532,0.00495,0.0625,0.5,0.111111,0.111111,0.5,0.181818
5,ZSpotDetector,GAIA,low_signal-to-noise_ratio_data_11_from2019-11-...,8640,0.25447,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,ZSpotDetector,GAIA,low_signal-to-noise_ratio_data_19_from2019-01-...,8807,0.274442,0.052632,0.111111,0.071429,0.055556,0.111111,0.074074,0.052632,0.111111,0.071429
7,ZSpotDetector,GAIA,low_signal-to-noise_ratio_data_1_from2019-08-1...,8353,0.270332,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,ZSpotDetector,GAIA,low_signal-to-noise_ratio_data_5_from2019-11-1...,8640,0.249382,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,ZSpotDetector,GAIA,low_signal-to-noise_ratio_data_37_from2019-11-...,8640,0.295813,0.166667,0.002494,0.004914,0.2,0.125,0.153846,0.166667,0.125,0.142857


In [10]:
benchmark_df.to_csv('./benchamark_results.csv', index=False)


In [51]:
# We can check the results and plot the figures, not for necessary.
from streamad.util import plot
data, label, date, features = ds.data, ds.label, ds.date, ds.features
idx=0
gap=50000
data = data[idx:idx+gap]
scores=scores[idx:idx+gap]
date=date[idx:idx+gap]
label=label[idx:idx+gap]
plot(data=data,scores=scores,date=date,features=features,label=label)

## Plot the benchmark results into a table

In [15]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
benchmark_df = pd.read_csv("./benchamark_results.csv")
# fig = go.Figure(
#     data=[
#         go.Table(
#             header=dict(values=list(benchmark_df.columns)),
#             cells=dict(
#                 values=[
#                     benchmark_df[i].round(decimals=5)
#                     if benchmark_df[i].dtype is np.dtype("float") or benchmark_df[i].dtype is np.dtype("int")
#                     else benchmark_df[i]
#                     for i in benchmark_df.columns.tolist()
#                 ],
#                 format=[""]*3 + [".3f"] * (len(benchmark_df.columns)-3),
#                 fill_color='white',
#                 line_color='lightgrey'
#             ),
#         ),
        
#     ],
# )
# fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
# fig.write_image("benchmark_results.svg")
# fig


Write benchmark results into a table file.

In [16]:
benchmark_df = benchmark_df.applymap(lambda x: round(x,3) if isinstance(x,(int,float)) else x)
content = benchmark_df.to_markdown(index=False)

with open('../docs/source/benchmark.md','w') as f:
    f.write('# Benchmark \n' + content)

In [20]:
pd.set_option('display.max_rows', 1000)
benchmark_df.to_clipboard(index=False)

In [21]:
benchmark_df

Unnamed: 0,Detector,Dataset,Key,Size(#),Time(s),Point_Precision,Point_Recall,Point_Fbeta,Series_Precision,Series_Recall,Series_Fbeta,Numenta_Precision,Numenta_Recall,Numenta_Fbeta
0,SpotDetector,GAIA,linear_data_4_from2018-12-19to2019-01-31_8313,12672,0.452,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,SpotDetector,GAIA,linear_data_42_from2018-12-19to2019-01-31_8153,12672,0.42,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,SpotDetector,GAIA,linear_data_7_from2018-12-19to2019-01-31_8300,12672,0.448,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,SpotDetector,GAIA,linear_data_11_from2018-12-19to2019-01-31_8164,12672,0.441,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,SpotDetector,GAIA,linear_data_32_from2018-12-19to2019-01-31_8151,12672,0.502,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,SpotDetector,GAIA,linear_data_25_from2018-12-19to2019-01-31_8412,12672,0.451,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,SpotDetector,GAIA,linear_data_4_from2018-12-19to2019-01-31_8313,12672,0.694,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,SpotDetector,GAIA,linear_data_42_from2018-12-19to2019-01-31_8153,12672,0.958,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,SpotDetector,GAIA,linear_data_7_from2018-12-19to2019-01-31_8300,12672,0.705,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
9,SpotDetector,GAIA,linear_data_11_from2018-12-19to2019-01-31_8164,12672,0.499,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [None]:
path = './streamad-benchmark-dataset'
ds_name = 'AWSCloudwatch'
prepare_ds(ds_name=ds_name,path=path)


In [None]:
from sklearn.metrics import recall_score

true = [0,0,0,0,0,0]
predict = [0,1,0,0,0,0]

recall_score(true,predict, zero_division=1)