In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path
module_path = str(Path.cwd().parents[0])
if module_path not in sys.path:
    sys.path.append(module_path)

import numpy as np

from quovadis_tad.eval_simple_baselines import evaluate_simple_baselines_on_all_paper_datasets

# Evaluate the simple baselines
In this notebook we run all our simple baselines on the datasets appearing in the paper. The main purpose of the notebook is reproducibility. If you want to use your own datasets, own methods and even combine them with ours, please look in the `simple_baselines_example_usage.ipynb` notebook for inspiration. 

In [3]:
def highlight_max(s, props=''):
    return np.where(s == np.nanmax(s.values), props, '')
print(module_path)

C:\Users\jhaja\OneDrive\Desktop\quovadis\QuoVadisTAD


## Evaluate on a single dataset
Full evaluation on all datasets can take some time. One can first try running on a single dataset of interest to get some fast results. Below we have an example of evaluating the methods only on SWAT. On the first part, we apply it using the optimal score normalization and on the second part we return all such scores to check the impact of different normalization options.

#### Optimal normalization

In [4]:
df_std = evaluate_simple_baselines_on_all_paper_datasets(
    root_path=module_path,
    dataset_names=['ourBench'],  # provide one or more dataset names e.g ['swat', 'wadi_127', 'wadi_112', 'smd', 'ucr_IB'], see dataset_reader enum.
    data_normalization="0-1",              
    eval_method='point_wise',
    score_normalization='optimal',  # Will only return the scores for the optimal score normalization method.
    verbose=False
)

Number of train files: 27
Number of test files: 27
Number of label files: 27


KeyboardInterrupt: 

In [5]:
(
    df_std
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

Unnamed: 0_level_0,OURBENCH,OURBENCH,OURBENCH,OURBENCH
Unnamed: 0_level_1,F1,P,R,AUPRC
1-NN Distance,0.941,0.896,0.992,0.926
Simple L2_norm,0.758,0.61,1.0,0.494
Random,0.758,0.61,1.0,0.701


In [6]:
import sys
import os
print("Python executable:", sys.executable)
#print("Environment variables:", os.environ)

Python executable: C:\Users\jhaja\Desktop\miniconda3\envs\quovadis\python.exe


#### See the impact of different score normalisations

In [5]:
df_std = evaluate_simple_baselines_on_all_paper_datasets(
    root_path=module_path,
    dataset_names=['ourBench'],
    data_normalization="0-1",              
    eval_method='range_wise',
    score_normalization='optimal',  #'all' Will return scores for all score normalization methods for baselines which return multiple outputs. In this case only PCA_Error.
    verbose=False
)

Number of train files: 27
Number of test files: 27
Number of label files: 27


In [6]:
(
    df_std
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

Unnamed: 0_level_0,OURBENCH,OURBENCH,OURBENCH,OURBENCH
Unnamed: 0_level_1,F1,P,R,AUPRC
1-NN Distance,0.766,0.635,0.963,0.668
Simple L2_norm,0.674,0.508,1.0,0.478
Random,0.442,0.508,0.39,0.604


## Evaluate on all datasets - Point-Wise metrics
Here we reproduce the point-wise F1 score of all simple baselines on all datasets.

In [None]:
df_point_wise = evaluate_simple_baselines_on_all_paper_datasets(
    root_path=module_path,
    dataset_names=None,
    data_normalization="0-1",              
    eval_method='point_wise',  # 
    score_normalization='optimal',
    verbose=True
)

In [None]:
(
    df_point_wise
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

In [None]:
(
    df_point_wise
    .drop(['P', 'R','AUPRC'], axis=1, level=1)
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

## Evaluate all datasets - Range-Wise metrics
Here we reproduce the range-wise scores of all simple baselines on all datasets.

In [None]:
df_range_wise = evaluate_simple_baselines_on_all_paper_datasets(
    root_path=module_path,
    dataset_names=None,
    data_normalization="0-1",
    eval_method='range_wise',
    score_normalization='optimal',
    verbose=True
)

In [None]:
(
    df_range_wise
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)

In [None]:
(
    df_range_wise
    .drop(['P', 'R','AUPRC'], axis=1, level=1)
    .style
    .format(precision=3)
    .apply(highlight_max, props='color:white;background-color:darkblue', axis=0)
)