In [2]:
import pandas as pd 
import numpy as np
from pathlib import Path
from ODD.analysis.dataset_selection import get_datasets_to_use
from ODD.analysis.hyperparameters import select_peak_performance, select_best_average_performance, calculate_best_average_performance, calculate_validation_set_performances
from ODD.analysis.result_processing import average_performance_per_method, average_aligned_ranks_with_versions, average_ranks_with_versions, average_ranks_with_versions_and_nemenyi
from ODD.analysis.result_analysis_charts import *
from tqdm import tqdm
import altair as alt 
alt.data_transformers.disable_max_rows()
# alt.renderers.enable('png')
from collections import defaultdict


In [3]:
%load_ext autoreload
%autoreload 2

# Config

In [4]:
VERSION = 1 
algorithms = ['CBLOF', 'HBOS', 'IForest', 'KNN' , 'LOF', 'OCSVM']
grid_versions_to_use = defaultdict(lambda: 1)
grid_versions_to_use['HBOS'] = 2
grid_versions_to_use['CBLOF'] = 2
grid_versions_to_use['IForest'] = 2 

result_path = Path()/'results'
processed_path = Path()/'processed_results_v4'

# Single function to generate a result table

In [5]:
def compare_algorithms_under_evaluation_setting(evaluation_setting_path, datasets_to_use, alpha, validation_reference, validation_df = None):
    # read the correct results 
#     try: 
    print(evaluation_setting_path)
    relevant_dataframes = [pd.read_pickle(evaluation_setting_path / f"{algo}.pkl") for algo in algorithms]
#     except: 
#         relevant_dataframes = [pd.read_csv(evaluation_setting_path / f"{algo}.csv") for algo in algorithms]
        
    all_results = pd.concat(relevant_dataframes, axis = 0, keys = algorithms, names = ['algorithm_name']).droplevel(1).reset_index()
    
    # only keep the results for the correct datasets 
    filtered_results= all_results.groupby(['dataset_id', 'anomaly_fraction']).filter(lambda x: x.name in datasets_to_use.index)
    
    # drop all columns that might cause problems 
    filtered_results = filtered_results.drop(columns = ['auc', 'ap', 'validation_indices', 'test_auc', 'test_ap'], errors = 'ignore')
    
    # calculate performance on test set 
    filtered_results = calculate_validation_set_performances(filtered_results, validation_df)
        
    # rename test_auc to auc 
    filtered_results = filtered_results.rename(columns = {'test_auc': 'auc', 'test_ap':'ap'})
    
    filtered_results = filtered_results[['algorithm_name', 'dataset_id', 'auc', 'ap']]
    filtered_results = filtered_results.rename(columns = {'algorithm_name':'algo_name'})
    
    avg_performance = average_performance_per_method(filtered_results)
#     avg_aligned_ranks = average_aligned_ranks_with_versions(filtered_results)
    avg_ranks, critical_distance = average_ranks_with_versions_and_nemenyi(filtered_results, alpha)
    performance_df = pd.concat([avg_performance, avg_ranks], axis = 1).reindex(columns = ['avg_auc', 'auc_ranks', 'avg_ap', 'ap_ranks'])
    return performance_df, critical_distance

In [6]:
def generate_result_table(
    evaluation_settings,  
    reference_for_validation_set = None, 
    datasets_to_use = None,
    alpha = 0.05,
):  
    print(f'Doing comparison based on {len(datasets_to_use)} datasets')
    
    # calculate the best average performance if necessary (could be simplified now that datasets are fixed)
    if 'best_average_performance' in evaluation_settings: 
        print('calculating best average performance')
        calculate_best_average_performance(Path()/'results', algorithms, grid_versions_to_use, datasets_to_use, processed_path/'best_average_performance')
    
    # read the validation sets to use
    validation_df = None
    if reference_for_validation_set is not None: 
        try: 
            validation_df = pd.read_pickle(processed_path/reference_for_validation_set/'reference_validation.pkl')
        except: 
            validation_df = pd.read_pickle(processed_path/reference_for_validation_set/'validation_set.pkl')
    
    # do all the comparisons
    all_comparisons = [
        compare_algorithms_under_evaluation_setting(
            processed_path/evaluation_setting, 
            datasets_to_use, 
            alpha, 
            validation_reference = reference_for_validation_set,
            validation_df = validation_df) 
        for evaluation_setting in evaluation_settings]
    
    # make them into a single result table 
    all_result_table = pd.concat([tup[0] for tup in all_comparisons], axis = 1, keys = evaluation_settings)
    
    # critical distances should be the same so just pick one 
    critical_distances = all_comparisons[0][1]
    assert all(comparison[1] == critical_distances for comparison in all_comparisons)
    
    return all_result_table, critical_distances

In [7]:
def to_latex(result_table, critical_dist = None, index = True, opmaak = True):
    table = result_table.round(2).astype('str')
    def opmaak_f(column):
        if 'algo' in column.name[1]: 
            return column
        numbers = column.astype(float)
        if 'avg' in column.name[1]:
            max_value = numbers.max()
            best_algorithms = column.index[numbers == max_value]
            column[best_algorithms] = "\\tu{"+column[best_algorithms] + '}'
            return column
        else:
    #         critical_diff = differences.squeeze()[column.name[0]]

            max_value = numbers.min()
            best_algorithms = column.index[numbers == max_value]
            column[best_algorithms] = "\\tu{"+column[best_algorithms] + '}'
            if critical_dist is not None:
                close_algos = column.index[(numbers-max_value) <= critical_dist]
                column[close_algos] = "\\tb{"+column[close_algos]+'}'
            return column
    if opmaak:
        table = table.apply(opmaak_f, axis = 0)
    print(table.to_latex(escape = False, index = index))

    

# Contamination 5, validation set 10%

In [8]:
datasets_to_use = pd.read_pickle("used_datasets/used_stat_datasets.pkl")
datasets_to_use;

In [44]:
# evaluation_setting_paths = [processed_path/'peak_performance', processed_path]
result_table, critical_distance = \
    generate_result_table(
    ['peak_performance', 'best_average_performance',  'default_performance','statistical_validation_set/p=0.05'],
    reference_for_validation_set = 'statistical_validation_set/p=0.05',
    alpha =0.05, 
    datasets_to_use = datasets_to_use,
)
result_table

Doing comparison based on 16 datasets
calculating best average performance
processed_results_v4/peak_performance
processed_results_v4/best_average_performance
processed_results_v4/default_performance
processed_results_v4/statistical_validation_set/p=0.05


Unnamed: 0_level_0,peak_performance,peak_performance,peak_performance,peak_performance,best_average_performance,best_average_performance,best_average_performance,best_average_performance,default_performance,default_performance,default_performance,default_performance,statistical_validation_set/p=0.05,statistical_validation_set/p=0.05,statistical_validation_set/p=0.05,statistical_validation_set/p=0.05
Unnamed: 0_level_1,avg_auc,auc_ranks,avg_ap,ap_ranks,avg_auc,auc_ranks,avg_ap,ap_ranks,avg_auc,auc_ranks,avg_ap,ap_ranks,avg_auc,auc_ranks,avg_ap,ap_ranks
algo_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
CBLOF,0.867319,2.59375,0.477313,2.40625,0.828283,3.125,0.421778,2.8125,0.802025,3.46875,0.401122,3.28125,0.760219,3.5,0.405515,3.0
HBOS,0.829871,4.4375,0.385925,4.34375,0.803009,3.8125,0.351154,3.59375,0.803923,3.5,0.333555,3.3125,0.823403,3.4375,0.357882,3.75
IForest,0.879065,2.3125,0.469862,2.78125,0.837595,2.875,0.377955,3.5,0.819594,2.84375,0.381293,3.28125,0.846745,2.84375,0.402079,3.0
KNN,0.863933,3.5625,0.457628,3.0,0.831408,3.625,0.385413,3.40625,0.822471,2.75,0.406915,2.71875,0.84394,2.875,0.375934,3.0
LOF,0.868489,3.1875,0.434999,3.4375,0.812255,3.5625,0.368009,3.71875,0.764854,4.125,0.279457,4.3125,0.823282,3.03125,0.343242,3.3125
OCSVM,0.812963,4.90625,0.38642,5.03125,0.797266,4.0,0.368591,3.96875,0.767773,4.3125,0.357137,4.09375,0.702734,5.3125,0.27029,4.9375


In [45]:
small_result_table = result_table.drop(['avg_ap', 'ap_ranks'], axis = 1, level =1)
small_result_table

Unnamed: 0_level_0,peak_performance,peak_performance,best_average_performance,best_average_performance,default_performance,default_performance,statistical_validation_set/p=0.05,statistical_validation_set/p=0.05
Unnamed: 0_level_1,avg_auc,auc_ranks,avg_auc,auc_ranks,avg_auc,auc_ranks,avg_auc,auc_ranks
algo_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
CBLOF,0.867319,2.59375,0.828283,3.125,0.802025,3.46875,0.760219,3.5
HBOS,0.829871,4.4375,0.803009,3.8125,0.803923,3.5,0.823403,3.4375
IForest,0.879065,2.3125,0.837595,2.875,0.819594,2.84375,0.846745,2.84375
KNN,0.863933,3.5625,0.831408,3.625,0.822471,2.75,0.84394,2.875
LOF,0.868489,3.1875,0.812255,3.5625,0.764854,4.125,0.823282,3.03125
OCSVM,0.812963,4.90625,0.797266,4.0,0.767773,4.3125,0.702734,5.3125


In [46]:
to_latex(small_result_table)

\begin{tabular}{lllllllll}
\toprule
{} & \multicolumn{2}{l}{peak_performance} & \multicolumn{2}{l}{best_average_performance} & \multicolumn{2}{l}{default_performance} & \multicolumn{2}{l}{statistical_validation_set/p=0.05} \\
{} &          avg_auc &  auc_ranks &                  avg_auc &  auc_ranks &             avg_auc &  auc_ranks &                           avg_auc &  auc_ranks \\
algo_name &                  &            &                          &            &                     &            &                                   &            \\
\midrule
CBLOF     &             0.87 &       2.59 &                     0.83 &       3.12 &                 0.8 &       3.47 &                              0.76 &        3.5 \\
HBOS      &             0.83 &       4.44 &                      0.8 &       3.81 &                 0.8 &        3.5 &                              0.82 &       3.44 \\
IForest   &        \tu{0.88} &  \tu{2.31} &                \tu{0.84} &  \tu{2.88} &           \t

In [47]:
seperate_tables = (
    small_result_table
    .rename_axis(columns = ['setting', 'metric'])
    .groupby('setting', axis = 1)
)
names = small_result_table.columns.get_level_values(0).unique()
tables = [table.droplevel(0, axis =1).reset_index().sort_values('auc_ranks').reset_index(drop = True) for name, table in seperate_tables]
sorted_small_result_table = pd.concat(tables, axis = 1,keys = names)
sorted_small_result_table

Unnamed: 0_level_0,peak_performance,peak_performance,peak_performance,best_average_performance,best_average_performance,best_average_performance,default_performance,default_performance,default_performance,statistical_validation_set/p=0.05,statistical_validation_set/p=0.05,statistical_validation_set/p=0.05
metric,algo_name,avg_auc,auc_ranks,algo_name,avg_auc,auc_ranks,algo_name,avg_auc,auc_ranks,algo_name,avg_auc,auc_ranks
0,IForest,0.879065,2.3125,IForest,0.837595,2.875,KNN,0.822471,2.75,IForest,0.846745,2.84375
1,CBLOF,0.867319,2.59375,CBLOF,0.828283,3.125,IForest,0.819594,2.84375,KNN,0.84394,2.875
2,LOF,0.868489,3.1875,LOF,0.812255,3.5625,CBLOF,0.802025,3.46875,LOF,0.823282,3.03125
3,KNN,0.863933,3.5625,KNN,0.831408,3.625,HBOS,0.803923,3.5,HBOS,0.823403,3.4375
4,HBOS,0.829871,4.4375,HBOS,0.803009,3.8125,LOF,0.764854,4.125,CBLOF,0.760219,3.5
5,OCSVM,0.812963,4.90625,OCSVM,0.797266,4.0,OCSVM,0.767773,4.3125,OCSVM,0.702734,5.3125


In [48]:
(sorted_small_result_table.loc[:, ('peak_performance', 'avg_auc')] / sorted_small_result_table.loc[:, ('default_performance', 'avg_auc')]).mean()

1.0714034100911256

In [1]:
(sorted_small_result_table.loc[:, ('statistical_validation_set/p=0.05', 'avg_auc')] / sorted_small_result_table.loc[:, ('default_performance', 'avg_auc')]).mean()

NameError: name 'sorted_small_result_table' is not defined

In [49]:
to_latex(sorted_small_result_table, index = False, opmaak = False)

\begin{tabular}{llllllllllll}
\toprule
peak_performance & \multicolumn{3}{l}{best_average_performance} & \multicolumn{3}{l}{default_performance} & \multicolumn{3}{l}{statistical_validation_set/p=0.05} \\
       algo_name & avg_auc & auc_ranks &                algo_name & avg_auc & auc_ranks &           algo_name & avg_auc & auc_ranks &                         algo_name & avg_auc & auc_ranks \\
\midrule
         IForest &    0.88 &      2.31 &                  IForest &    0.84 &      2.88 &                 KNN &    0.82 &      2.75 &                           IForest &    0.85 &      2.84 \\
           CBLOF &    0.87 &      2.59 &                    CBLOF &    0.83 &      3.12 &             IForest &    0.82 &      2.84 &                               KNN &    0.84 &      2.88 \\
             LOF &    0.87 &      3.19 &                      LOF &    0.81 &      3.56 &               CBLOF &     0.8 &      3.47 &                               LOF &    0.82 &      3.03 \\
             KN

In [134]:
print(critical_distance)

1.8850978091335209


# Contamination 5, validation set 20% 

In [64]:
original_contamination, critical_distance = generate_result_table(['peak_performance_v1', 'best_average_performance_v1','default_performance_v1',  'tuned_performance_stat10', 'tuned_performance_stat5'], 'contamination5')
original_contamination

Unnamed: 0_level_0,peak_performance_v1,peak_performance_v1,peak_performance_v1,peak_performance_v1,best_average_performance_v1,best_average_performance_v1,best_average_performance_v1,best_average_performance_v1,default_performance_v1,default_performance_v1,default_performance_v1,default_performance_v1,tuned_performance_stat10,tuned_performance_stat10,tuned_performance_stat10,tuned_performance_stat10,tuned_performance_stat5,tuned_performance_stat5,tuned_performance_stat5,tuned_performance_stat5
Unnamed: 0_level_1,avg_auc,avg_ap,auc_ranks,ap_ranks,avg_auc,avg_ap,auc_ranks,ap_ranks,avg_auc,avg_ap,auc_ranks,ap_ranks,avg_auc,avg_ap,auc_ranks,ap_ranks,avg_auc,avg_ap,auc_ranks,ap_ranks
algo_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
CBLOF,0.878829,0.492853,1.934783,1.891304,0.803165,0.369213,3.043478,3.26087,0.731561,0.318759,4.043478,3.956522,0.768111,0.344093,3.952381,3.571429,0.784845,0.394834,3.842105,3.052632
HBOS,0.788629,0.348515,4.978261,4.413043,0.766357,0.326657,4.26087,3.521739,0.767071,0.320578,3.543478,3.152174,0.798466,0.360656,3.547619,3.428571,0.799559,0.349222,3.578947,3.473684
IForest,0.860801,0.458191,2.608696,2.782609,0.787455,0.351315,3.065217,3.217391,0.781176,0.337176,2.934783,2.891304,0.845703,0.390543,2.571429,2.761905,0.850044,0.401605,2.315789,2.894737
KNN,0.840169,0.396594,3.630435,3.565217,0.80034,0.338173,3.065217,2.891304,0.791153,0.351595,2.826087,2.717391,0.839414,0.355529,2.5,2.952381,0.836878,0.365819,2.789474,3.0
LOF,0.848294,0.383246,3.173913,3.478261,0.788304,0.279026,3.804348,3.891304,0.724026,0.239141,3.695652,4.130435,0.804513,0.296991,3.333333,3.380952,0.811998,0.285668,3.315789,3.789474
OCSVM,0.787531,0.369607,4.673913,4.869565,0.761495,0.335522,3.76087,4.217391,0.723169,0.32657,3.956522,4.152174,0.697418,0.2397,5.095238,4.904762,0.745124,0.284393,5.157895,4.789474


In [65]:
to_latex(original_contamination, critical_distance)

\begin{tabular}{lllllllllllllllllllll}
\toprule
{} & \multicolumn{4}{l}{peak_performance_v1} & \multicolumn{4}{l}{best_average_performance_v1} & \multicolumn{4}{l}{default_performance_v1} & \multicolumn{4}{l}{tuned_performance_stat10} & \multicolumn{4}{l}{tuned_performance_stat5} \\
{} &             avg_auc &     avg_ap &       auc_ranks &        ap_ranks &                     avg_auc &     avg_ap &       auc_ranks &        ap_ranks &                avg_auc &     avg_ap &       auc_ranks &        ap_ranks &                  avg_auc &     avg_ap &      auc_ranks &        ap_ranks &                 avg_auc &    avg_ap &       auc_ranks &        ap_ranks \\
algo_name &                     &            &                 &                 &                             &            &                 &                 &                        &            &                 &                 &                          &            &                &                 &                         & 

In [62]:
print(critical_distance)

1.572280260447006


### Contamination 5

In [None]:
contamination5 = compare_algorithms_different_performance_settings('contamination5')
contamination5

In [None]:
to_latex(contamination5)

### all experiments

In [None]:
contamination_all = compare_algorithms_different_performance_settings('all')
contamination_all

In [None]:
to_latex(contamination_all)

# Look at performance distributions

In [None]:
def algorithm_performance_plot(algorithm, contamination_filter, metric = 'auc'): 
    r_path = result_path / f"grid_{algorithm}_v{VERSION}.csv"
    result_df = pd.read_csv(r_path)
    result_df = apply_contamination_filter(contamination_filter, result_df)
    result_df = result_df.assign(dataset_id = lambda x: x.dataset_id +'_' +x.anomaly_fraction.astype('str'))[['algo_name', 'dataset_id', 'auc', 'ap']]
#     result_df = result_df[result_df.dataset_id.str.startswith('ALOI')]
    return alt.Chart(result_df).mark_boxplot().encode(
        x = 'dataset_id:N', 
        y = 'auc:Q'
    ).properties(title = f"{algorithm} data={contamination_filter}")

In [None]:
algorithm_performance_plot('CBLOF', 'contamination5')

In [None]:
algorithm_performance_plot('KNN', 'contamination5')

In [None]:
algorithm_performance_plot('LOF', 'contamination5')

In [None]:
algorithm_performance_plot('IForest', 'contamination5')

In [None]:
algorithm_performance_plot('HBOS', 'contamination5')