# Result table

This notebook makes the result table from in the paper and related numbers

## Imports and config

In [1]:
import pandas as pd 
import numpy as np
from pathlib import Path
from ODD.analysis.dataset_selection import get_datasets_to_use
from ODD.analysis.hyperparameters import select_peak_performance, select_best_average_performance, calculate_best_average_performance, calculate_validation_set_performances
from ODD.analysis.result_processing import average_performance_per_method, average_aligned_ranks_with_versions, average_ranks_with_versions, average_ranks_with_versions_and_nemenyi
from ODD.analysis.result_analysis_charts import *
from tqdm import tqdm
import altair as alt 
alt.data_transformers.disable_max_rows()
# alt.renderers.enable('png')
from collections import defaultdict


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
VERSION = 1 
algorithms = ['CBLOF', 'HBOS', 'IForest', 'KNN' , 'LOF', 'OCSVM']
grid_versions_to_use = defaultdict(lambda: 1)
grid_versions_to_use['HBOS'] = 2
grid_versions_to_use['CBLOF'] = 2
grid_versions_to_use['IForest'] = 2 
grid_versions_to_use['OCSVM'] = 3

result_path = Path()/'results'
processed_path = Path()/'processed_results_v5'
comparison_path = Path()/'comparisons'

## Helper functions

In [4]:
def compare_algorithms_under_evaluation_setting(result_df, alpha = 0.05):
    result_df = result_df.rename(columns = {'algorithm_name':'algo_name'})
    
    avg_performance = average_performance_per_method(result_df)
#     avg_aligned_ranks = average_aligned_ranks_with_versions(filtered_results)
    avg_ranks, critical_distance = average_ranks_with_versions_and_nemenyi(result_df, alpha)
    performance_df = pd.concat([avg_performance, avg_ranks], axis = 1).reindex(columns = ['avg_auc', 'auc_ranks', 'avg_ap', 'ap_ranks'])
    return performance_df

In [5]:
def generate_result_table(
    comparison,  
    size_to_use,
    alpha = 0.05,
):  

    comparison_df = pd.read_csv(comparison_path/comparison/'comparison.csv')
    comparison_df = comparison_df[(comparison_df.reference != 'tuned') | (comparison_df['size'] == size_to_use)]
    all_comparison_df = comparison_df.groupby('reference').apply(compare_algorithms_under_evaluation_setting).rename_axis(columns = 'metric')
    temp_df = all_comparison_df.stack().to_frame('value').reset_index()
    return pd.pivot_table(temp_df, index = 'algo_name', columns = ['reference', 'metric'], values ='value')
   

In [7]:
def to_latex(result_table, critical_dist = None, index = True, opmaak = True):
    table = result_table.round(2).astype('str')
    def opmaak_f(column):
        if 'algo' in column.name[1]: 
            return column
        numbers = column.astype(float)
        if 'avg' in column.name[1]:
            max_value = numbers.max()
            best_algorithms = column.index[numbers == max_value]
            column[best_algorithms] = "\\tu{"+column[best_algorithms] + '}'
            return column
        else:
    #         critical_diff = differences.squeeze()[column.name[0]]

            max_value = numbers.min()
            best_algorithms = column.index[numbers == max_value]
            column[best_algorithms] = "\\tu{"+column[best_algorithms] + '}'
            if critical_dist is not None:
                close_algos = column.index[(numbers-max_value) <= critical_dist]
                column[close_algos] = "\\tb{"+column[close_algos]+'}'
            return column
    if opmaak:
        table = table.apply(opmaak_f, axis = 0)
    print(table.to_latex(escape = False, index = index))

    

## Intermediate result table 

In [9]:
result_table = generate_result_table('statistical_validation_set_size', 0.05)
result_table

reference,best-default,best-default,best-default,best-default,out-of-the-box,out-of-the-box,out-of-the-box,out-of-the-box,peak,peak,peak,peak,tuned,tuned,tuned,tuned
metric,ap_ranks,auc_ranks,avg_ap,avg_auc,ap_ranks,auc_ranks,avg_ap,avg_auc,ap_ranks,auc_ranks,avg_ap,avg_auc,ap_ranks,auc_ranks,avg_ap,avg_auc
algo_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
CBLOF,2.9375,3.25,0.425568,0.829088,3.40625,3.21875,0.405667,0.803797,2.59375,3.03125,0.475399,0.867125,3.0625,3.9375,0.385028,0.799546
HBOS,3.65625,3.90625,0.361452,0.800956,3.375,3.8125,0.345693,0.799944,4.34375,4.59375,0.394601,0.829418,3.25,3.5625,0.366921,0.814782
IForest,3.25,2.75,0.395585,0.839045,3.15625,2.65625,0.390331,0.821566,2.90625,2.71875,0.47583,0.880479,2.875,2.4375,0.410835,0.851182
KNN,3.46875,3.34375,0.390402,0.83291,2.65625,2.90625,0.415819,0.823568,3.25,3.875,0.459913,0.864912,2.5625,2.5,0.400127,0.843109
LOF,3.65625,3.59375,0.375329,0.811393,4.5,4.125,0.280679,0.767826,3.5,3.5,0.436899,0.867635,4.3125,3.8125,0.332606,0.816843
OCSVM,4.03125,4.15625,0.372914,0.797,3.90625,4.28125,0.365479,0.767115,4.40625,3.28125,0.423161,0.880086,4.9375,4.75,0.283948,0.750981


In [10]:
small_result_table = result_table.drop(['avg_ap', 'ap_ranks'], axis = 1, level =1).reindex(['peak','best-default', 'out-of-the-box', 'tuned'],axis = 1, level = 0).reindex(['avg_auc', 'auc_ranks'], axis = 1, level = 1)
small_result_table

reference,peak,peak,best-default,best-default,out-of-the-box,out-of-the-box,tuned,tuned
metric,avg_auc,auc_ranks,avg_auc,auc_ranks,avg_auc,auc_ranks,avg_auc,auc_ranks
algo_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
CBLOF,0.867125,3.03125,0.829088,3.25,0.803797,3.21875,0.799546,3.9375
HBOS,0.829418,4.59375,0.800956,3.90625,0.799944,3.8125,0.814782,3.5625
IForest,0.880479,2.71875,0.839045,2.75,0.821566,2.65625,0.851182,2.4375
KNN,0.864912,3.875,0.83291,3.34375,0.823568,2.90625,0.843109,2.5
LOF,0.867635,3.5,0.811393,3.59375,0.767826,4.125,0.816843,3.8125
OCSVM,0.880086,3.28125,0.797,4.15625,0.767115,4.28125,0.750981,4.75


### Relative improvement of peak performance over out-of-the-box performance 

In [11]:
rel_improvement = (small_result_table.loc[:, ('peak', 'avg_auc')]/small_result_table.loc[:, ('out-of-the-box', 'avg_auc')]).sort_values(ascending = False)
rel_improvement_proc = (rel_improvement*100).round(1) - 100
rel_improvement_proc

algo_name
OCSVM      14.7
LOF        13.0
CBLOF       7.9
IForest     7.2
KNN         5.0
HBOS        3.7
dtype: float64

#### Average relative improvement

In [12]:
rel_improv = (small_result_table.loc[:, ('peak', 'avg_auc')]/small_result_table.loc[:, ('out-of-the-box', 'avg_auc')]).mean() 
round(rel_improv*100 - 100.0, 1)

8.6

### Relative improvement of tuned over out-of-the-box performance

In [13]:
rel_improvement = (small_result_table.loc[:, ('tuned', 'avg_auc')]/small_result_table.loc[:, ('out-of-the-box', 'avg_auc')]).sort_values(ascending = False)
rel_improvement_proc = (rel_improvement*100).round(1)- 100
rel_improvement_proc

algo_name
LOF        6.4
IForest    3.6
KNN        2.4
HBOS       1.9
CBLOF     -0.5
OCSVM     -2.1
dtype: float64

## Final table layout of the paper

In [15]:
seperate_tables = (
    small_result_table
    .rename_axis(columns = ['setting', 'metric'])
    .groupby('setting', axis = 1)
)
names = small_result_table.columns.get_level_values(0).unique()
tables = [table.droplevel(0, axis =1).reset_index().sort_values('auc_ranks').reset_index(drop = True) for name, table in seperate_tables]
sorted_small_result_table = pd.concat(tables, axis = 1,keys = names)
sorted_small_result_table

reference,peak,peak,peak,best-default,best-default,best-default,out-of-the-box,out-of-the-box,out-of-the-box,tuned,tuned,tuned
metric,algo_name,avg_auc,auc_ranks,algo_name,avg_auc,auc_ranks,algo_name,avg_auc,auc_ranks,algo_name,avg_auc,auc_ranks
0,IForest,0.880479,2.71875,IForest,0.839045,2.75,IForest,0.821566,2.65625,IForest,0.851182,2.4375
1,CBLOF,0.867125,3.03125,CBLOF,0.829088,3.25,KNN,0.823568,2.90625,KNN,0.843109,2.5
2,OCSVM,0.880086,3.28125,KNN,0.83291,3.34375,CBLOF,0.803797,3.21875,HBOS,0.814782,3.5625
3,LOF,0.867635,3.5,LOF,0.811393,3.59375,HBOS,0.799944,3.8125,LOF,0.816843,3.8125
4,KNN,0.864912,3.875,HBOS,0.800956,3.90625,LOF,0.767826,4.125,CBLOF,0.799546,3.9375
5,HBOS,0.829418,4.59375,OCSVM,0.797,4.15625,OCSVM,0.767115,4.28125,OCSVM,0.750981,4.75


In [18]:
to_latex(sorted_small_result_table, index = False, opmaak = False)

\begin{tabular}{llllllllllll}
\toprule
     peak & \multicolumn{3}{l}{best-default} & \multicolumn{3}{l}{out-of-the-box} & \multicolumn{3}{l}{tuned} \\
algo_name & avg_auc & auc_ranks &    algo_name & avg_auc & auc_ranks &      algo_name & avg_auc & auc_ranks & algo_name & avg_auc & auc_ranks \\
\midrule
  IForest &    0.88 &      2.72 &      IForest &    0.84 &      2.75 &        IForest &    0.82 &      2.66 &   IForest &    0.85 &      2.44 \\
    CBLOF &    0.87 &      3.03 &        CBLOF &    0.83 &      3.25 &            KNN &    0.82 &      2.91 &       KNN &    0.84 &       2.5 \\
    OCSVM &    0.88 &      3.28 &          KNN &    0.83 &      3.34 &          CBLOF &     0.8 &      3.22 &      HBOS &    0.81 &      3.56 \\
      LOF &    0.87 &       3.5 &          LOF &    0.81 &      3.59 &           HBOS &     0.8 &      3.81 &       LOF &    0.82 &      3.81 \\
      KNN &    0.86 &      3.88 &         HBOS &     0.8 &      3.91 &            LOF &    0.77 &      4.12 &     