# Imports

In [1]:
import sys, os, time, copy, random, math
import numpy as np
import pandas as pd
import pickle
import itertools
from glob import glob
from collections import Counter, OrderedDict

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.ensemble import IsolationForest
from sklearn.metrics import roc_auc_score, roc_curve, auc
from sklearn.decomposition import PCA

%reload_ext autoreload
%autoreload 2

# Dataset settings

In [26]:
all_datases = [d for d in glob(os.path.join('/home/vincentv/dtaijupiter/projects/camera_ready_PBAD/data/univariate/', '*')) if not('__' in d)]
for d in all_datases:
    rset = pickle.load(open(os.path.join(d, 'data_settings.pickle'), 'rb'))
    df = pd.read_csv(os.path.join(d, 'train_data.csv'), sep=',', header=0, names=['pc1', 'label'], usecols=[1, 2])
    nr_vals = len(np.unique(df['pc1'].values))
    #rset['bin_size'] = 1
    #with open(os.path.join(d, 'data_settings.pickle'), 'wb') as handle:
    #    pickle.dump(rset, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print(d)
    #print(rset)
    
    print(rset['discretize'], rset['alphabet_size'], nr_vals)
    print()
    
    break

/home/vincentv/dtaijupiter/projects/camera_ready_PBAD/data/univariate/new_york_taxi
False 30 79



# Functions

In [2]:
def compute_average_rank(df_matrix, threshold=0.001):
    """ Compute the average rank of each method in the columns of the dataframe:
    - columns = methods
    - index = datasets
    """
    
    # invert the values (lower == better)
    matrix = 1.0 - df_matrix
    ranking = np.zeros(matrix.shape)
    methods = df_matrix.columns
    
    k = float(matrix.shape[1])
    N = float(matrix.shape[0])
    
    # ranks
    i = 0
    for index, row in matrix.iterrows():
        rank = rank_row(row.values, threshold)
        ranking[i, :] = rank
        i += 1
        
    # average rank
    a_ranks = pd.DataFrame(0, columns=['avg_rank', 'stdv', 'avg_metric'], index=methods)
    avg_rank = np.mean(ranking, axis=0)
    std_rank = np.std(ranking, axis=0)
    for i, m in enumerate(methods):
        a_ranks.iloc[i, 0] = avg_rank[i]
        a_ranks.iloc[i, 1] = std_rank[i]
        a_ranks.iloc[i, 2] = np.mean(df_matrix.loc[:, m].values)
        
    return a_ranks

In [3]:
def count_wins_equal_losses(matrix, threshold=0.001, return_best=False):
    results = {}
    #sources = np.array([s.split('source_')[1] for s in matrix.index])
    #source_types = np.unique(sources)
    methods = matrix.columns
    for _, m in enumerate(methods):
        # win-loose-equal matrix
        other_methods = np.setdiff1d(methods, m)
        wle_matrix = pd.DataFrame(0, columns=['win', 'equal', 'lose'], index=other_methods)
        for i in range(matrix.shape[0]):
            row = matrix.iloc[i, :]
            mv = row[m]
            for m2 in other_methods:
                mv2 = row[m2]
                if abs(mv - mv2) < threshold:
                    # equal
                    wle_matrix.loc[m2, 'equal'] += 1
                elif mv > mv2:
                    # win
                    wle_matrix.loc[m2, 'win'] += 1
                else:
                    # loose
                    wle_matrix.loc[m2, 'lose'] += 1
        results[m] = wle_matrix
        
    if return_best:
        best_results = {}
        best_nwins = 0
        for k, v in results.items():
            nwins = np.sum(v.win.values)
            if nwins > best_nwins:
                best_nwins = nwins
                best_results = {'param': k, 'nwins': nwins}
        
        return best_results
        
    return results

In [4]:
def rank_row(row, abs_tol=0.001):
    """ NOT PERFECT! problem: consecutive comparison of elements in the array can allow for the same rank between the first element and last element of a series of subsequent elements,
        where the distance between the two is larger than the allowed distance, but the increments between all the consecutive elements are smaller than the allowed distance.
        Therefore, they all get the same rank, although technically they shouldn't have the same rank. """
    arr = np.ravel(np.asarray(row))
    sorter = np.argsort(arr, kind='quicksort')
    
    inv = np.empty(arr.size, dtype=np.intp)
    inv[sorter] = np.arange(arr.size, dtype=np.intp)
    
    arr = arr[sorter]
    
    s = 0
    n = 1
    ranks = np.arange(1, arr.size + 1, 1, dtype=float)
    for i in range(1, arr.size):
        curr, prev = arr[i], arr[i - 1]
        if abs(curr - prev) > abs_tol:
            ranks[s:i] = np.sum(ranks[s:i]) / n
            s = i
            n = 1
        elif i == arr.size - 1:
            ranks[s:] = np.sum(ranks[s:]) / (n + 1)
            break
        else:
            n += 1
            
    return ranks[inv]

In [5]:
def number_of_wins_overal(matrix):
    
    nds, nm = matrix.shape
    methods = matrix.columns
    nwins = np.zeros(nm, dtype=int)
    
    best_methods = np.argmax(matrix.astype(float).values, axis=1)
    for i, bm in enumerate(best_methods):
        nwins[bm] += 1
        
    return nwins

In [6]:
def find_best_settings_algorithm(results, algorithm_settings, threshold=0.005):
    # filter results on the provided settings
    tmp_results = results.copy()
    for key, value in algorithm_settings.items():
        tmp_results = tmp_results[tmp_results[key] == value]
    # group the results by parameters: prep, transfer_param, classifier_param
    groups = tmp_results.groupby(['AD_settings'])
    # construct the results matrix
    dataset_names = np.unique(tmp_results['dname'].values)
    matrix = pd.DataFrame(index=dataset_names)
    names = []
    for group_name, group in groups:
        aucs = group.loc[:, ['dname', 'auroc']]
        aucs.set_index('dname', inplace=True)
        aucs.columns = [group_name]
        matrix = matrix.join(aucs)
        names.append(group_name)
    matrix.astype(float)
    mean_aucs = np.mean(matrix, axis=0)
    # remove the NaN-columns (i.e, these settings did not work for the data)
    nan_methods = matrix.columns[matrix.isnull().any()]
    working_methods = matrix.columns[~matrix.isnull().any()]
    if len(working_methods) == 1:
        print('WORKING METHOD: ', working_methods[0])
        return matrix, nan_methods
    matrix = matrix.loc[:, working_methods].copy()
    # make the comparison
    # average ranks/number of wins
    matrix = 1.0 - matrix
    ranking = np.zeros(matrix.shape)
    methods = matrix.columns
    # ranks
    i = 0
    for index, row in matrix.iterrows():
        rank = rank_row(row.values, threshold)
        ranking[i, :] = rank
        i += 1
    # average rank
    final_results = pd.DataFrame(0, columns=['avg_rank', 'avg_AUROC', 'settings'], index=methods)
    avg_rank = np.mean(ranking, axis=0)
    std_rank = np.std(ranking, axis=0)
    for i, m in enumerate(names):
        final_results.iloc[i, 0] = round(avg_rank[i], 3) # +' pm '+str(round(std_rank[i], 3))
        final_results.iloc[i, 1] = mean_aucs[m]
        final_results.iloc[i, 2] = 'settings: {}'.format(m)
    # sort
    final_results.sort_values('avg_rank', inplace=True, ascending=True)
    return final_results, nan_methods

In [7]:
def find_feasible_settings_algorithm(results, algorithm_settings, threshold=0.005):
    # filter results on the provided settings
    tmp_results = results.copy()
    for key, value in algorithm_settings.items():
        tmp_results = tmp_results[tmp_results[key] == value]
    # group the results by parameters: prep, transfer_param, classifier_param
    groups = tmp_results.groupby(['AD_settings'])
    # construct the results matrix
    dataset_names = np.unique(tmp_results['dname'].values)
    matrix = pd.DataFrame(index=dataset_names)
    names = []
    for group_name, group in groups:
        aucs = group.loc[:, ['dname', 'auroc']]
        aucs.set_index('dname', inplace=True)
        aucs.columns = [group_name]
        matrix = matrix.join(aucs)
        names.append(group_name)
    matrix.astype(float)
    mean_aucs = np.mean(matrix, axis=0)
    #remove the NaN-columns (i.e, these settings did not work for the data)
    nan_methods = matrix.columns[matrix.isnull().any()]
    working_methods = matrix.columns[~matrix.isnull().any()]
    if len(working_methods) == 1:
        print('WORKING METHOD: ', working_methods[0])
        return matrix, nan_methods
    matrix = matrix.loc[:, working_methods].copy()
    #make the comparison
    #average ranks/number of wins
    matrix = 1.0 - matrix
    ranking = np.zeros(matrix.shape)
    methods = matrix.columns
    # ranks
    i = 0
    for index, row in matrix.iterrows():
        rank = rank_row(row.values, threshold)
        ranking[i, :] = rank
        i += 1
    # average rank
    final_results = pd.DataFrame(0, columns=['avg_rank', 'avg_AUROC', 'settings'], index=methods)
    avg_rank = np.mean(ranking, axis=0)
    std_rank = np.std(ranking, axis=0)
    for i, m in enumerate(methods):
        final_results.iloc[i, 0] = round(avg_rank[i], 3) # +' pm '+str(round(std_rank[i], 3))
        final_results.iloc[i, 1] = mean_aucs[m]
        final_results.iloc[i, 2] = 'settings: {}'.format(m)
    # sort
    final_results.sort_values('avg_rank', inplace=True, ascending=True)
    return final_results, nan_methods

In [8]:
def construct_results_matrix(results, methods={}, metric='auroc'):
    if len(methods) == 0:
        sys.exit('Requires a selection of methods!')
    # construct the matrix
    dataset_names = np.unique(results['dname'].values)
    matrix = pd.DataFrame(index=dataset_names)
    # selecting the optimal parameters
    oracle_settings = {}
    for mname, mfilter in methods.items():
        if mname != 'PBAD' and mfilter['AD_settings'] == 'oracle':
            print('ORACLE selects best settings for:', mfilter['method'])
            # select the best setting for this each dataset using the oracle
            tmp_results = results[results['method'] == mfilter['method']].copy()
            tmp_groups = tmp_results.groupby('AD_settings')
            tmp_matrix = pd.DataFrame(index=dataset_names)
            for gname, g in tmp_groups:
                aucs = g.loc[:, ['dname', metric]]
                aucs.set_index('dname', inplace=True)
                aucs.columns = [gname]
                tmp_matrix = tmp_matrix.join(aucs)
            oracle_settings[mname] = tmp_matrix
            aucs = tmp_matrix.max(axis=1).to_frame()
        else:
            m_results = _apply_strict_data_filter(results.copy(), mfilter)
            # TODO: requires all tests to have been completed + no duplicate results or sth
            aucs = m_results.loc[:, ['dname', metric]]
            aucs.set_index('dname', inplace=True)
        aucs.columns = [mname]
        matrix = matrix.join(aucs)
    matrix.astype(np.float)
    return matrix, oracle_settings

In [9]:
def adjust_results_matrix(matrix, drop=False):
    """ Adjust 3 things:
    1. combine the colruyt stores (day and night)
    2. Add the average metric
    3. Add the number of wins
    4. Add the average ranks
    """
    # names of the datasets
    dnames = matrix.index.values
    colruyts = np.unique([d.split('_')[1] for d in dnames if 'colruyt' in d])
    colruyt_names = np.unique([d for d in dnames if 'colruyt' in d])
    colruyt_pairs = [[np.where(dnames == 'colruyt_'+c+'_day')[0][0], np.where(dnames == 'colruyt_'+c+'_night')[0][0]] for c in colruyts]
    for i, c in enumerate(colruyt_pairs):
        matrix.loc['colruyt_'+colruyts[i], :] = np.mean(matrix.iloc[c, :], axis=0)
    matrix.drop(colruyt_names, axis=0, inplace=True)
    # drop brasschaat
    if drop:
        matrix.drop('colruyt_brasschaat', axis=0, inplace=True)
    base_matrix = matrix.copy()
    # add average metric
    matrix.loc['average_metric', :] = np.mean(matrix.astype(float).values, axis=0)
    # add the number of wins
    nwins = number_of_wins_overal(base_matrix.astype(float))
    matrix.loc['number_of_wins', :] = nwins
    # add the average ranks
    a_ranks = compute_average_rank(base_matrix.astype(float), threshold=0.001)
    matrix.loc['average_rank', :] = a_ranks.iloc[:, 0].values
    # return result
    return matrix

In [10]:
def _apply_strict_data_filter(data, apply_filter):
    if len(apply_filter) > 0:
        keep_ix = data.index.values
        for k, v in apply_filter.items():
            ix = []
            for i, val in enumerate(data[k].values):
                if isinstance(v, tuple) or isinstance(v, list):
                    if val in v:
                        ix.append(i)
                else:
                    if val == v:
                        ix.append(i)
            keep_ix = np.intersect1d(keep_ix, np.array(ix))
        data = data.iloc[keep_ix, :]
    return data

# Univariate

Procedure:

1. Load the data: only night/day - no NY Taxi
2. Select the best parameters for each method accross the board OR set minsup etc. by hand
3. AUROC and AP per dataset

## Dataset information

In [7]:
data_root = '/home/vincentv/dtaijupiter/projects/camera_ready_PBAD/data/univariate/'
ds_benchmarks = np.sort([d for d in glob(os.path.join(data_root, '*')) if not('day' in d or 'night' in d or 'ds_d731' in d or '__new_york' in d or 'brasschaat' in d)])  # 'taxi' in d or '__' in d or '_full' in d
#ds_benchmarks = np.sort([d for d in glob(os.path.join(data_root, '*')) if '_full' in d and 'colruyt' in d and not('brasschaat' in d)])  # 'taxi' in d or '__' in d or '_full' in d
dnames = [d.split('/')[-1] for d in ds_benchmarks]
print(dnames)

['ambient_temperature', 'new_york_taxi', 'request_latency']


In [23]:
ds_details = pd.DataFrame(0, columns=['nr_datapoints', 'nr_labeled', 'nr_norm', 'nr_anom', 'dps_per_hour', 'total_time', 'origin'], index=dnames)
for i, dn in enumerate(dnames):
    df = pd.read_csv(os.path.join(ds_benchmarks[i], 'train_data.csv'), sep=',', header=0, names=['pc1', 'label'], usecols=[1, 2])
    labels = df.label.values
    ixl = np.where(labels != 0.0)[0]
    ixa = np.where(labels == 1.0)[0]
    ixn = np.where(labels == -1.0)[0]
    
    dpss = {'colruyt': 12, 'ambient_temperature': 1, 'request_latency': 12, 'new_york_taxi_good': 2}
    
    # labels etc.
    if 'colruyt' in dn:
        dps = dpss['colruyt']
        nrl = int(len(ixl))
        nral = int(len(ixa))
        nrnl = int(len(ixn))
        tt = (df.shape[0] / dps) / 24
    else:
        dps = dpss[dn]
        nrl = int(len(ixl)) / dps
        nral = int(len(ixa)) / dps
        nrnl = int(len(ixn)) / dps
        tt = (df.shape[0] / dps) / 24
    ds_details.loc[dn, 'nr_datapoints'] = int(df.shape[0])
    ds_details.loc[dn, 'nr_labeled'] = nrl
    ds_details.loc[dn, 'nr_anom'] = nral
    ds_details.loc[dn, 'nr_norm'] = nrnl
    ds_details.loc[dn, 'dps_per_hour'] = dps
    ds_details.loc[dn, 'total_time'] = int(tt)
    
    # some additional info
    if 'colruyt' in dn:
        ds_details.loc[dn, 'origin'] = 'Company'
    else:
        ds_details.loc[dn, 'origin'] = 'Numenta'

In [24]:
ds_details.sort_index(ascending=True, inplace=True)
ds_details.round(0)

Unnamed: 0,nr_datapoints,nr_labeled,nr_norm,nr_anom,dps_per_hour,total_time,origin
__colruyt_aalst_full,292632,1217,829,388,12,1016,Company
__colruyt_aarschot_full,281485,2391,2178,213,12,977,Company
__colruyt_hasselt_full,169253,1595,1488,107,12,587,Company
__colruyt_heverlee_full,292608,1821,615,1206,12,1016,Company
__colruyt_mol_full,364032,574,504,70,12,1264,Company
__colruyt_roodebeek_full,364032,1047,815,232,12,1264,Company


In [25]:
np.mean(ds_details, axis=0)

nr_datapoints    294007.000000
nr_labeled         1440.833333
nr_norm            1071.500000
nr_anom             369.333333
dps_per_hour         12.000000
total_time         1020.666667
dtype: float64

## Load the data

In [12]:
# load all the results
path = '/home/vincentv/dtaijupiter/projects/camera_ready_PBAD/results/_compiled_results/univariate_no_discretization/'
#path = '/home/vincentv/dtaijupiter/projects/pattern-outlier-detection/results/_compiled_results/univariate_results_extended/'
#path = '/home/vincentv/dtaijupiter/projects/pattern-outlier-detection/results/_compiled_results/univariate/'
result_parts = [f for f in glob(os.path.join(path, '*')) if '.csv' in f]
result_parts = [d for d in result_parts if not('full_results' in d or '__' in d)]
all_results = [pd.read_csv(i, sep=',', header=0, index_col=0) for i in result_parts]
results = pd.concat(all_results)
results.index = np.arange(0, len(results), 1)

# add the method name as an extra column
results['method'] = results.apply(lambda x: x['AD_settings'].split('__AD_method_name_')[-1].split('__')[0], axis=1)

# print
print(results.shape)
results.head()

(315, 16)


Unnamed: 0,AD_settings,alphabet_size,ap,auroc,bin_size,capvalue,data_type,discretize,dname,mph,nlabels,scaler,scaling,wincrement,wsize,method
0,__AD_distance_formula_1.0__AD_exact_match_Fals...,30,0.37464,0.551638,1,0.5,univariate,False,request_latency,12,0.0,1.0,False,6.0,12.0,PBAD
1,__AD_jaccard_threshold_0.9__AD_method_name_FPO...,30,0.296453,0.493286,1,0.5,univariate,False,request_latency,12,0.0,1.0,False,6.0,12.0,FPOF
2,__AD_distance_formula_1.0__AD_exact_match_Fals...,30,0.358666,0.528736,1,0.5,univariate,False,request_latency,12,0.0,1.0,False,6.0,12.0,PBAD
3,__AD_distance_formula_1.0__AD_exact_match_Fals...,30,0.382477,0.553028,1,0.5,univariate,False,request_latency,12,0.0,1.0,False,6.0,12.0,PBAD
4,__AD_distance_formula_1.0__AD_exact_match_Fals...,30,0.397902,0.597092,1,0.5,univariate,False,request_latency,12,0.0,1.0,False,6.0,12.0,PBAD


In [13]:
# all datasets and methods
dnames = np.unique(results.dname.values)
print(len(dnames), dnames)

# all methods
methods = np.unique(results.method.values)
print(methods)

17 ['ambient_temperature' 'colruyt_aalst_day' 'colruyt_aalst_night'
 'colruyt_aarschot_day' 'colruyt_aarschot_night' 'colruyt_brasschaat_day'
 'colruyt_brasschaat_night' 'colruyt_hasselt_day' 'colruyt_hasselt_night'
 'colruyt_heverlee_day' 'colruyt_heverlee_night' 'colruyt_mol_day'
 'colruyt_mol_night' 'colruyt_roodebeek_day' 'colruyt_roodebeek_night'
 'new_york_taxi' 'request_latency']
['FPOF' 'MIFPOD' 'MPAD' 'PAV' 'PBAD']


## BASELINES: optimal parameter settings by AUROC rank

In [71]:
settings_results, nan_methods = find_best_settings_algorithm(results, {'method': 'FPOF'})
settings_results

Unnamed: 0,avg_rank,avg_AUROC,settings
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_closed__AD_relative_minsup_0.05__tune_False__,2.294,0.776992,settings: __AD_jaccard_threshold_0.9__AD_metho...
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_closed__AD_relative_minsup_0.1__tune_False__,2.324,0.781047,settings: __AD_jaccard_threshold_0.9__AD_metho...
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_closed__AD_relative_minsup_0.01__tune_False__,2.559,0.775152,settings: __AD_jaccard_threshold_0.9__AD_metho...
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_maximal__AD_relative_minsup_0.1__tune_False__,4.088,0.620551,settings: __AD_jaccard_threshold_0.9__AD_metho...
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_maximal__AD_relative_minsup_0.05__tune_False__,4.412,0.564989,settings: __AD_jaccard_threshold_0.9__AD_metho...
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_maximal__AD_relative_minsup_0.01__tune_False__,5.324,0.531634,settings: __AD_jaccard_threshold_0.9__AD_metho...


In [72]:
settings_results, nan_methods = find_best_settings_algorithm(results, {'method': 'MIFPOD'})
settings_results

WORKING METHOD:  __AD_method_name_MIFPOD__AD_relative_minsup_0.1__tune_False__


Unnamed: 0,__AD_method_name_MIFPOD__AD_relative_minsup_0.01__tune_False__,__AD_method_name_MIFPOD__AD_relative_minsup_0.05__tune_False__,__AD_method_name_MIFPOD__AD_relative_minsup_0.1__tune_False__
ambient_temperature,0.898504,0.996947,0.995574
colruyt_aalst_day,0.510741,0.473347,0.39693
colruyt_aalst_night,,0.5,0.517182
colruyt_aarschot_day,0.5,0.336319,0.342539
colruyt_aarschot_night,,0.525231,0.487273
colruyt_brasschaat_day,0.5,0.507863,0.560052
colruyt_brasschaat_night,,0.5,0.249328
colruyt_hasselt_day,,0.5,0.565211
colruyt_hasselt_night,,0.5,0.523733
colruyt_heverlee_day,0.514879,0.579236,0.596459


## PBAD: feasible parameter settings across the entire benchmark

In [24]:
settings_results, nan_methods = find_feasible_settings_algorithm(results, {'method': 'PBAD'})
print(nan_methods)
settings_results

Index(['__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__',
       '__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_2.0__tune_False__',
       '__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_sequential__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__',
       '__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_sequential__AD_relative_minsup_0.01__AD_sequential_

Unnamed: 0,avg_rank,avg_AUROC,settings
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_itemset__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__,2.412,0.793056,settings: __AD_distance_formula_1.0__AD_exact_...
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_itemset__AD_relative_minsup_0.01__AD_sequential_minlength_2.0__tune_False__,2.471,0.792397,settings: __AD_distance_formula_1.0__AD_exact_...
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_raw__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__,2.5,0.803941,settings: __AD_distance_formula_1.0__AD_exact_...
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_raw__AD_relative_minsup_0.01__AD_sequential_minlength_2.0__tune_False__,2.618,0.803144,settings: __AD_distance_formula_1.0__AD_exact_...


## Horse-race results for each dataset - ORACLE for the baselines + feasible, fixed settings for PBAD

In [14]:
methods = OrderedDict([
    ('MP', {'method': 'MPAD', 'AD_settings': 'oracle'}),
    ('PAV', {'method': 'PAV', 'AD_settings': 'oracle'}),
    #('MIFPOD', {'method': 'MIFPOD', 'AD_settings': '__AD_method_name_MIFPOD__AD_relative_minsup_0.1__tune_False__'}),
    ('MIFPOD', {'method': 'MIFPOD', 'AD_settings': 'oracle'}),
    #('FPOF', {'method': 'FPOF', 'AD_settings': '__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_closed__AD_relative_minsup_0.1__tune_False__'}),
    ('FPOF', {'method': 'FPOF', 'AD_settings': 'oracle'}),
    #('PBAD', {'method': 'PBAD', 'AD_settings': '__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__'}),
    ('PBAD', {'method': 'PBAD', 'AD_settings': '__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__'}),
])

### AUROC and AP

In [15]:
# get matrix with the results
matrix, oracle_settings = construct_results_matrix(results, methods, metric='auroc')

# combine the night and day of Colruyt stores + add the average metric + the rank + the number of wins
matrix = adjust_results_matrix(matrix, drop=True)

# AUROC results: round to 3 numbers
matrix.astype(float).round(3)

ORACLE selects best settings for: MPAD
ORACLE selects best settings for: PAV
ORACLE selects best settings for: MIFPOD
ORACLE selects best settings for: FPOF


Unnamed: 0,MP,PAV,MIFPOD,FPOF,PBAD
ambient_temperature,0.24,0.59,0.997,0.999,0.998
new_york_taxi,0.861,0.281,0.846,0.877,0.879
request_latency,0.599,0.608,0.467,0.493,0.553
colruyt_aalst,0.656,0.482,0.514,0.825,0.884
colruyt_aarschot,0.6,0.52,0.513,0.857,0.945
colruyt_hasselt,0.536,0.457,0.544,0.671,0.605
colruyt_heverlee,0.675,0.579,0.548,0.613,0.721
colruyt_mol,0.444,0.581,0.455,0.79,0.96
colruyt_roodebeek,0.682,0.609,0.5,0.874,0.752
average_metric,0.588,0.523,0.598,0.778,0.811


In [16]:
# average increase in metric for PBAD over the nearest competitor
auc_values = matrix.values
pbad_aucs = auc_values[:, -1][:-3]
closest_comp = np.max(auc_values[:, :-1], axis=1)[:-3]
one_up = (pbad_aucs - closest_comp) / closest_comp * 100
print('Mean one-up:', np.mean(one_up), '- STD one-up:', np.std(one_up))

Mean one-up: 1.42896441608 - STD one-up: 10.621566421


In [17]:
# get matrix with the results
matrix, oracle_settings = construct_results_matrix(results, methods, metric='ap')

# combine the night and day of Colruyt stores + add the average metric + the rank + the number of wins
matrix = adjust_results_matrix(matrix, drop=True)

# AUROC results: round to 3 numbers
matrix.astype(float).round(3)

ORACLE selects best settings for: MPAD
ORACLE selects best settings for: PAV
ORACLE selects best settings for: MIFPOD
ORACLE selects best settings for: FPOF


Unnamed: 0,MP,PAV,MIFPOD,FPOF,PBAD
ambient_temperature,0.014,0.04,0.917,0.957,0.917
new_york_taxi,0.214,0.057,0.3,0.403,0.453
request_latency,0.515,0.361,0.255,0.296,0.382
colruyt_aalst,0.499,0.301,0.328,0.812,0.821
colruyt_aarschot,0.353,0.127,0.094,0.688,0.862
colruyt_hasselt,0.126,0.121,0.079,0.35,0.233
colruyt_heverlee,0.774,0.687,0.7,0.817,0.808
colruyt_mol,0.199,0.243,0.111,0.671,0.906
colruyt_roodebeek,0.578,0.431,0.228,0.692,0.551
average_metric,0.364,0.263,0.335,0.632,0.659


In [18]:
# average increase in metric for PBAD over the nearest competitor
auc_values = matrix.values
pbad_aucs = auc_values[:, -1][:-3]
closest_comp = np.max(auc_values[:, :-1], axis=1)[:-3]
one_up = (pbad_aucs - closest_comp) / closest_comp * 100
print('Mean one-up:', np.mean(one_up), '- STD one-up:', np.std(one_up))

Mean one-up: -1.24387275817 - STD one-up: 21.6225668001


### ECML-PKDD first submission (not camera-ready)

In [165]:
# get matrix with the results
matrix, oracle_settings = construct_results_matrix(results, methods, metric='auroc')

# combine the night and day of Colruyt stores + add the average metric + the rank + the number of wins
matrix = adjust_results_matrix(matrix, drop=True)

# AUROC results: round to 3 numbers
matrix.astype(float).round(3)

ORACLE selects best settings for: MatrixProfileAD
ORACLE selects best settings for: PAV
ORACLE selects best settings for: MIFPOD
ORACLE selects best settings for: FPOF


Unnamed: 0,MPAD,PAV,MIFPOD,FPOF,PBAD
ambient_temperature,0.452,0.584,0.974,0.999,0.998
new_york_taxi,0.648,0.304,0.526,0.877,0.85
request_latency,0.53,0.621,0.406,0.493,0.553
colruyt_aalst,0.72,0.767,0.442,0.826,0.942
colruyt_aarschot,0.742,0.796,0.399,0.857,0.941
colruyt_hasselt,0.47,0.469,0.53,0.677,0.842
colruyt_heverlee,0.65,0.685,0.552,0.614,0.841
colruyt_mol,0.584,0.599,0.332,0.79,0.93
colruyt_roodebeek,0.558,0.676,0.448,0.876,0.836
average_metric,0.595,0.611,0.512,0.779,0.859


## Comparing variations of PBAD

In [19]:
methods = OrderedDict([
    ('PBAD-exact', {'method': 'PBAD', 'AD_settings': '__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__'}),
    ('PBAD', {'method': 'PBAD', 'AD_settings': '__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__'}),
])

In [20]:
# get matrix with the results
matrix, oracle_settings = construct_results_matrix(results, methods, metric='auroc')

# combine the night and day of Colruyt stores + add the average metric + the rank + the number of wins
matrix = adjust_results_matrix(matrix, drop=True)

# AUROC results: round to 3 numbers
matrix.astype(float).round(3)

Unnamed: 0,PBAD-exact,PBAD
ambient_temperature,,0.998
new_york_taxi,,0.879
request_latency,,0.553
colruyt_aalst,,0.884
colruyt_aarschot,,0.945
colruyt_hasselt,,0.605
colruyt_heverlee,,0.721
colruyt_mol,,0.96
colruyt_roodebeek,,0.752
average_metric,,0.811


# Multivariate

Procedure:

1. Determine the overal best window size settings (using average rank/auroc?)
2. Determine for each method the best settings

## Load the data

In [362]:
# load all the results
path = '/home/vincentv/dtaijupiter/projects/camera_ready_PBAD/results/_compiled_results/multivariate/'
result_parts = [f for f in glob(os.path.join(path, '*')) if '.csv' in f]
all_results = [pd.read_csv(i, sep=',', header=0, index_col=0) for i in result_parts]
results = pd.concat(all_results)
results.index = np.arange(0, len(results), 1)

# add the method name as an extra column
results['method'] = results.apply(lambda x: x['AD_settings'].split('__AD_method_name_')[-1].split('__')[0], axis=1)

# print
print(results.shape)
results.head()

(76, 16)


Unnamed: 0,AD_settings,PCA,alphabet_size,anom_limit,ap,auroc,bin_size,data_type,discretize,dname,ndims,nlabels,scaling,wincrement,wsize,method
0,__AD_method_name_MIFPOD__AD_relative_minsup_0....,yes,30,8,0.130911,0.774831,10,multivariate,True,lunges_vs_squats,3,0.0,False,1,10,MIFPOD
1,__AD_distance_formula_1.0__AD_exact_match_Fals...,yes,30,8,0.735934,0.935739,10,multivariate,True,lunges_vs_squats,3,0.0,False,1,10,PBAD
2,__AD_method_name_PAV__tune_False__,yes,30,8,0.110101,0.670709,10,multivariate,True,lunges_vs_squats,3,0.0,False,1,10,PAV
3,__AD_method_name_MIFPOD__AD_relative_minsup_0....,yes,30,8,0.050453,0.41619,10,multivariate,True,lunges_vs_squats,3,0.0,False,1,10,MIFPOD
4,__AD_jaccard_threshold_0.9__AD_method_name_FPO...,yes,30,8,0.053705,0.453381,10,multivariate,True,lunges_vs_squats,3,0.0,False,1,10,FPOF


## BASELINES: optimal parameter settings using an ORACLE

In [349]:
settings_results, nan_methods = find_best_settings_algorithm(results, {'method': 'FPOF'})
settings_results

Unnamed: 0,avg_rank,avg_AUROC,settings
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_closed__AD_relative_minsup_0.1__tune_False__,1.125,0.924806,settings: __AD_jaccard_threshold_0.9__AD_metho...
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_closed__AD_relative_minsup_0.05__tune_False__,1.875,0.914274,settings: __AD_jaccard_threshold_0.9__AD_metho...
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_closed__AD_relative_minsup_0.01__tune_False__,3.0,0.885609,settings: __AD_jaccard_threshold_0.9__AD_metho...
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_maximal__AD_relative_minsup_0.1__tune_False__,4.0,0.814039,settings: __AD_jaccard_threshold_0.9__AD_metho...
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_maximal__AD_relative_minsup_0.05__tune_False__,5.0,0.700598,settings: __AD_jaccard_threshold_0.9__AD_metho...
__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_maximal__AD_relative_minsup_0.01__tune_False__,6.0,0.543563,settings: __AD_jaccard_threshold_0.9__AD_metho...


In [350]:
settings_results, nan_methods = find_best_settings_algorithm(results, {'method': 'MIFPOD'})
settings_results

Unnamed: 0,avg_rank,avg_AUROC,settings
__AD_method_name_MIFPOD__AD_relative_minsup_0.1__tune_False__,1.25,0.712153,settings: __AD_method_name_MIFPOD__AD_relative...
__AD_method_name_MIFPOD__AD_relative_minsup_0.05__tune_False__,1.75,0.597443,settings: __AD_method_name_MIFPOD__AD_relative...
__AD_method_name_MIFPOD__AD_relative_minsup_0.01__tune_False__,3.0,0.487896,settings: __AD_method_name_MIFPOD__AD_relative...


## PBAD: feasible parameter settings across the entire benchmark

In [351]:
settings_results, nan_methods = find_feasible_settings_algorithm(results, {'method': 'PBAD'})
print(nan_methods)
settings_results

Index([], dtype='object')


Unnamed: 0,avg_rank,avg_AUROC,settings
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__,3.0,0.928666,settings: __AD_distance_formula_1.0__AD_exact_...
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_2.0__tune_False__,3.0,0.930392,settings: __AD_distance_formula_1.0__AD_exact_...
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_raw__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__,4.625,0.794491,settings: __AD_distance_formula_1.0__AD_exact_...
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_raw__AD_relative_minsup_0.01__AD_sequential_minlength_2.0__tune_False__,4.875,0.7922,settings: __AD_distance_formula_1.0__AD_exact_...
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_itemset__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__,5.0,0.867023,settings: __AD_distance_formula_1.0__AD_exact_...
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_itemset__AD_relative_minsup_0.01__AD_sequential_minlength_2.0__tune_False__,5.0,0.870018,settings: __AD_distance_formula_1.0__AD_exact_...
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_sequential__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__,5.25,0.836588,settings: __AD_distance_formula_1.0__AD_exact_...
__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_sequential__AD_relative_minsup_0.01__AD_sequential_minlength_2.0__tune_False__,5.25,0.838254,settings: __AD_distance_formula_1.0__AD_exact_...


## Horse-race results for each dataset - ORACLE for the baselines + feasible, fixed settings for PBAD

In [363]:
methods = OrderedDict([
    ('MP', {'method': 'MPAD', 'AD_settings': 'oracle'}),
    ('PAV', {'method': 'PAV', 'AD_settings': 'oracle'}),
    #('MIFPOD', {'method': 'MIFPOD', 'AD_settings': '__AD_method_name_MIFPOD__AD_relative_minsup_0.1__tune_False__'}),
    ('MIFPOD', {'method': 'MIFPOD', 'AD_settings': 'oracle'}),
    #('FPOF', {'method': 'FPOF', 'AD_settings': '__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_closed__AD_relative_minsup_0.1__tune_False__'}),
    ('FPOF', {'method': 'FPOF', 'AD_settings': 'oracle'}),
    ('PBAD', {'method': 'PBAD', 'AD_settings': '__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_2.0__tune_False__'}),
])

### AUROC and AP results

In [371]:
# get matrix with the results
matrix, oracle_settings = construct_results_matrix(results, methods, metric='auroc')

# combine the night and day of Colruyt stores + add the average metric + the rank + the number of wins
matrix = adjust_results_matrix(matrix)

# AUROC results: round to 3 numbers
matrix.astype(float).round(3)

ORACLE selects best settings for: MPAD
ORACLE selects best settings for: PAV
ORACLE selects best settings for: MIFPOD
ORACLE selects best settings for: FPOF


Unnamed: 0,MP,PAV,MIFPOD,FPOF,PBAD
lunges_and_sidelunges_vs_squats,0.472,0.571,0.819,0.966,0.983
lunges_vs_squats,0.604,0.671,0.775,0.966,0.94
sidelunges_vs_lunges,0.471,0.425,0.804,0.864,0.907
squats_vs_sidelunges,0.484,0.504,0.482,0.903,0.914
average_metric,0.508,0.542,0.72,0.925,0.936
number_of_wins,0.0,0.0,0.0,1.0,3.0
average_rank,4.5,4.0,3.5,1.75,1.25


In [372]:
# average increase in metric for PBAD over the nearest competitor
auc_values = matrix.values
pbad_aucs = auc_values[:, -1][:-3]
closest_comp = np.max(auc_values[:, :-1], axis=1)[:-3]
one_up = (pbad_aucs - closest_comp) / closest_comp * 100
print('Mean one-up:', np.mean(one_up), '- STD one-up:', np.std(one_up))

Mean one-up: 1.33420657262 - STD one-up: 2.74202905783


In [373]:
# get matrix with the results
matrix, oracle_settings = construct_results_matrix(results, methods, metric='ap')

# combine the night and day of Colruyt stores + add the average metric + the rank + the number of wins
matrix = adjust_results_matrix(matrix)

# AUROC results: round to 3 numbers
matrix.astype(float).round(3)

ORACLE selects best settings for: MPAD
ORACLE selects best settings for: PAV
ORACLE selects best settings for: MIFPOD
ORACLE selects best settings for: FPOF


Unnamed: 0,MP,PAV,MIFPOD,FPOF,PBAD
lunges_and_sidelunges_vs_squats,0.283,0.255,0.43,0.862,0.888
lunges_vs_squats,0.082,0.11,0.131,0.662,0.737
sidelunges_vs_lunges,0.128,0.115,0.444,0.572,0.573
squats_vs_sidelunges,0.094,0.092,0.087,0.391,0.707
average_metric,0.147,0.143,0.273,0.622,0.726
number_of_wins,0.0,0.0,0.0,0.0,4.0
average_rank,4.0,4.5,3.5,2.0,1.0


In [374]:
# average increase in metric for PBAD over the nearest competitor
auc_values = matrix.values
pbad_aucs = auc_values[:, -1][:-3]
closest_comp = np.max(auc_values[:, :-1], axis=1)[:-3]
one_up = (pbad_aucs - closest_comp) / closest_comp * 100
print('Mean one-up:', np.mean(one_up), '- STD one-up:', np.std(one_up))

Mean one-up: 23.801538179 - STD one-up: 33.1618136125


# Test

In [21]:
# load all the results
path = '/home/vincentv/dtaijupiter/projects/camera_ready_PBAD/results/_compiled_results/univariate_no_discretization/'
#path = '/home/vincentv/dtaijupiter/projects/pattern-outlier-detection/results/_compiled_results/univariate_results_extended/'
#path = '/home/vincentv/dtaijupiter/projects/pattern-outlier-detection/results/_compiled_results/univariate/'
result_parts = [f for f in glob(os.path.join(path, '*')) if '.csv' in f]
result_parts = [d for d in result_parts if not('full_results' in d or '__' in d)]
all_results = [pd.read_csv(i, sep=',', header=0, index_col=0) for i in result_parts]
results = pd.concat(all_results)
results.index = np.arange(0, len(results), 1)

# add the method name as an extra column
results['method'] = results.apply(lambda x: x['AD_settings'].split('__AD_method_name_')[-1].split('__')[0], axis=1)

# print
print(results.shape)
results.head()

(315, 16)


Unnamed: 0,AD_settings,alphabet_size,ap,auroc,bin_size,capvalue,data_type,discretize,dname,mph,nlabels,scaler,scaling,wincrement,wsize,method
0,__AD_distance_formula_1.0__AD_exact_match_Fals...,30,0.37464,0.551638,1,0.5,univariate,False,request_latency,12,0.0,1.0,False,6.0,12.0,PBAD
1,__AD_jaccard_threshold_0.9__AD_method_name_FPO...,30,0.296453,0.493286,1,0.5,univariate,False,request_latency,12,0.0,1.0,False,6.0,12.0,FPOF
2,__AD_distance_formula_1.0__AD_exact_match_Fals...,30,0.358666,0.528736,1,0.5,univariate,False,request_latency,12,0.0,1.0,False,6.0,12.0,PBAD
3,__AD_distance_formula_1.0__AD_exact_match_Fals...,30,0.382477,0.553028,1,0.5,univariate,False,request_latency,12,0.0,1.0,False,6.0,12.0,PBAD
4,__AD_distance_formula_1.0__AD_exact_match_Fals...,30,0.397902,0.597092,1,0.5,univariate,False,request_latency,12,0.0,1.0,False,6.0,12.0,PBAD


In [32]:
methods = OrderedDict([
    ('MP', {'method': 'MPAD', 'AD_settings': 'oracle'}),
    ('PAV', {'method': 'PAV', 'AD_settings': 'oracle'}),
    #('MIFPOD', {'method': 'MIFPOD', 'AD_settings': '__AD_method_name_MIFPOD__AD_relative_minsup_0.1__tune_False__'}),
    ('MIFPOD', {'method': 'MIFPOD', 'AD_settings': 'oracle'}),
    #('FPOF', {'method': 'FPOF', 'AD_settings': '__AD_jaccard_threshold_0.9__AD_method_name_FPOF__AD_pattern_pruning_closed__AD_relative_minsup_0.1__tune_False__'}),
    ('FPOF', {'method': 'FPOF', 'AD_settings': 'oracle'}),
    #('PBAD', {'method': 'PBAD', 'AD_settings': '__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__'}),
    ('PBAD', {'method': 'PBAD', 'AD_settings': '__AD_distance_formula_1.0__AD_exact_match_False__AD_jaccard_threshold_0.9__AD_method_name_PBAD__AD_pattern_match_discrete_False__AD_pattern_pruning_closed__AD_pattern_type_all__AD_relative_minsup_0.01__AD_sequential_minlength_1.0__tune_False__'}),
])

In [33]:
# get matrix with the results
matrix, oracle_settings = construct_results_matrix(results, methods, metric='auroc')

# combine the night and day of Colruyt stores + add the average metric + the rank + the number of wins
matrix = adjust_results_matrix(matrix, drop=True)

# AUROC results: round to 3 numbers
matrix.astype(float).round(3)

ORACLE selects best settings for: MPAD
ORACLE selects best settings for: PAV
ORACLE selects best settings for: MIFPOD
ORACLE selects best settings for: FPOF


Unnamed: 0,MP,PAV,MIFPOD,FPOF,PBAD
ambient_temperature,0.24,0.59,0.997,0.999,0.998
new_york_taxi,0.861,0.281,0.846,0.877,0.879
request_latency,0.599,0.608,0.467,0.493,0.553
colruyt_aalst,0.656,0.482,0.514,0.825,0.884
colruyt_aarschot,0.6,0.52,0.513,0.857,0.945
colruyt_hasselt,0.536,0.457,0.544,0.671,0.605
colruyt_heverlee,0.675,0.579,0.548,0.613,0.721
colruyt_mol,0.444,0.581,0.455,0.79,0.96
colruyt_roodebeek,0.682,0.609,0.5,0.874,0.752
average_metric,0.588,0.523,0.598,0.778,0.811
