# In the end these plots were not used in the final version of the paper

In [1]:
import pandas as pd 
import numpy as np
from pathlib import Path
from ODD.analysis.hyperparameters import select_peak_performance, select_best_average_performance
from ODD.analysis.result_processing import average_performance_per_method, average_aligned_ranks_with_versions, average_ranks_with_versions, average_ranks_with_versions_and_nemenyi
from ODD.analysis.result_analysis_charts import *
from ODD.analysis.dataset_selection import get_datasets_to_use
from tqdm import tqdm
import altair as alt 
alt.data_transformers.disable_max_rows()
# alt.renderers.enable('png')


DataTransformerRegistry.enable('default')

In [2]:
%load_ext autoreload
%autoreload 2

# Config

In [23]:
VERSION = 1 
algorithms = ['CBLOF', 'HBOS', 'IForest', 'KNN' , 'LOF', 'OCSVM']
result_path = Path()/'results'
processed_path = Path()/'processed_results'

## Old plot

In [4]:
def compare_evaluation_settings_chart_default_as_reference_absolute(evaluation_settings, datasets_to_use, algorithm, baseline,
                                          sort_reference, metric_to_use='full_auc', show_only_tuned = True, show_relative = True):
    # read the data
    relevant_paths = [] 
    for evaluation_setting in evaluation_settings: 
        if (processed_path/evaluation_setting/'average').exists(): 
            relevant_paths.append(processed_path/evaluation_setting/'average')
        else: 
            relevant_paths.append(processed_path/evaluation_setting)
    result_df = pd.concat(
        [pd.read_csv(path/ f"{algorithm}.csv") for path in
         relevant_paths], keys=evaluation_settings, names=['evaluation_setting']).droplevel(1)
    
    # apply the contamination filter
    result_df = result_df.groupby(['dataset_id', 'anomaly_fraction']).filter(lambda x: x.name in datasets_to_use.index).reset_index()
    
    
    result_df = result_df.set_index(['evaluation_setting', 'dataset_id', 'anomaly_fraction'])
    if metric_to_use is not None and result_df.auc.isna().any():
        result_df.loc[result_df.auc.isna(), 'auc'] = result_df.loc[result_df.auc.isna(), metric_to_use]

    # data transformation for relative plotting and sorting
   
    default_performance = result_df.loc[baseline].auc.to_frame(baseline)
    tuned_performance = result_df.loc[sort_reference].auc.to_frame('sort')
    plot_df = default_performance.join(tuned_performance).join(result_df).reset_index()
    plot_df['relative_performance'] = plot_df.auc - plot_df[baseline]
    plot_df['relative_sort'] = plot_df.sort - plot_df[baseline]
    if show_relative: 
        # plot the results
        return alt.Chart(plot_df.reset_index()).mark_point().encode(
            x=alt.X('dataset_id:N', sort=alt.EncodingSortField(field='relative_sort')),
            y=alt.Y('relative_performance:Q',title = 'AUC improvement over default performance'),
            color='evaluation_setting:N'
        ).properties(title=algorithm)
    else: 
        return alt.Chart(plot_df.reset_index()).mark_point().encode(
            x=alt.X('dataset_id:N', sort=alt.EncodingSortField(field='sort')),
            y=alt.Y('auc:Q',title = 'AUC', scale = alt.Scale(zero = False)),
            color='evaluation_setting:N'
        ).properties(title=algorithm, height = 200)

def make_plot_per_algorithm(evaluation_settings, contamination_filter, baseline = 'default_performance_v1', sort_reference = 'peak_performance_v1', columns = 3): 
    datasets_to_use = get_datasets_to_use(processed_path, evaluation_settings, contamination_filter)
    algorithms2 = np.array(algorithms).reshape((-1, columns))
    final_charts = []
    for row in algorithms2: 
        charts = []
        for algo in row: 
            chart = compare_evaluation_settings_chart_default_as_reference_absolute(evaluation_settings, datasets_to_use, algo, baseline, sort_reference)
            charts.append(chart)
        final_charts.append(alt.hconcat(*charts).resolve_scale(y = 'shared'))
    return alt.vconcat(*final_charts).resolve_scale(y = 'shared')

## New plot

In [61]:
def average_compare_plot(evaluation_settings, datasets_to_use, metric_to_use = 'full_auc', line_settings = ['default_performance_v1', 'peak_performance_v1'], sort = 'peak_performance_v1', reference = 'default_performance_v1'): 
    # read the data
    relevant_paths = [] 
    for evaluation_setting in evaluation_settings: 
        if (processed_path/evaluation_setting/'average').exists(): 
            relevant_paths.append(processed_path/evaluation_setting/'average')
        else: 
            relevant_paths.append(processed_path/evaluation_setting)
    result_dfs = []
    for algo in algorithms: 
        result_df = pd.concat(
            [pd.read_csv(path/ f"{algo}.csv") for path in
             relevant_paths], keys=evaluation_settings, names=['evaluation_setting']).droplevel(1)
        result_dfs.append(result_df)
    
    result_df = pd.concat(result_dfs, keys = algorithms, names = ['algorithm'])
    
    # apply the contamination filter
    result_df = result_df.groupby(['dataset_id', 'anomaly_fraction']).filter(lambda x: x.name in datasets_to_use.index).reset_index()
    
    # use the correct metric 
    if metric_to_use is not None and result_df.auc.isna().any():
        result_df.loc[result_df.auc.isna(), 'auc'] = result_df.loc[result_df.auc.isna(), metric_to_use]
        
    
    result_df = result_df.set_index(['evaluation_setting'])
    
    
    # convert to improvement over reference 
    reference_df = result_df.loc[reference].reset_index()[['algorithm', 'dataset_id', 'auc']].reset_index(drop = True).rename(columns = {'auc': 'reference_auc'})
    result_df = result_df.reset_index().merge(reference_df, on = ['algorithm','dataset_id'])
    result_df['auc'] = result_df['auc'] - result_df['reference_auc']
    result_df = result_df.groupby(['algorithm', 'evaluation_setting'])[['auc']].mean().reset_index(level = 0)
    
    # line df 
    line_df = pd.pivot(result_df.loc[line_settings, :].reset_index(), index = 'algorithm', columns = 'evaluation_setting', values = 'auc').reset_index()
    point_df = result_df.drop(line_settings).reset_index()[['algorithm', 'evaluation_setting', 'auc']]
    
    # add sort
#     sort_df = result_df.loc[sort][['dataset_id', 'auc']].rename(columns = {'auc': 'sort'})

#     # add reference 
#     line_df = line_df.merge(sort_df, on = 'dataset_id')
#     point_df = point_df.merge(sort_df, on = 'dataset_id')
    
    line_chart = alt.Chart(line_df).mark_rule(color = 'gray', opacity = 0.8).encode(
        y = alt.Y('algorithm:N'),#, sort= alt.EncodingSortField(field="sort", op="min")),
        x = alt.X(f"{line_settings[0]}:Q",scale = alt.Scale(zero = False)), 
        x2 = f"{line_settings[1]}:Q"
    )
    
    point_chart = alt.Chart(point_df).mark_point().encode(
        y = alt.Y('algorithm:N'),# sort= alt.EncodingSortField(field="sort", op="min"), title = 'Datasets'),
        x =  alt.X('auc:Q', title = 'AUC improvement over out-of-the-box performance'), 
        color = 'evaluation_setting'
    )
    return alt.layer(line_chart, point_chart)

In [68]:
def compare_plot(algorithm, evaluation_settings, datasets_to_use, metric_to_use = 'full_auc', line_settings = ['default_performance_v1', 'peak_performance_v2'], sort = 'peak_performance_v2', reference = 'default_performance_v1'): 
    # read the data
    relevant_paths = [] 
    for evaluation_setting in evaluation_settings: 
        if (processed_path/evaluation_setting/'average').exists(): 
            relevant_paths.append(processed_path/evaluation_setting/'average')
        else: 
            relevant_paths.append(processed_path/evaluation_setting)
    result_df = pd.concat(
        [pd.read_csv(path/ f"{algorithm}.csv") for path in
         relevant_paths], keys=evaluation_settings, names=['evaluation_setting']).droplevel(1)
    
    # apply the contamination filter
    result_df = result_df.groupby(['dataset_id', 'anomaly_fraction']).filter(lambda x: x.name in datasets_to_use.index).reset_index()
    
    # fill in the auc if necessary 
    result_df = result_df.set_index(['evaluation_setting', 'dataset_id', 'anomaly_fraction'])
    if metric_to_use is not None and result_df.auc.isna().any():
        result_df.loc[result_df.auc.isna(), 'auc'] = result_df.loc[result_df.auc.isna(), metric_to_use]
    
    # convert to improvement over reference 
    reference_df = result_df.loc[reference].reset_index()[['dataset_id', 'auc']].reset_index(drop = True).rename(columns = {'auc': 'reference_auc'})
    result_df = result_df.reset_index().merge(reference_df, on = 'dataset_id').set_index('evaluation_setting')
    result_df['auc'] = result_df['auc'] - result_df['reference_auc']
    
    # line df 
    line_df = pd.pivot(result_df.loc[line_settings, :].reset_index(), index = 'dataset_id', columns = 'evaluation_setting', values = 'auc').reset_index()
    point_df = result_df.drop(line_settings).reset_index()[['dataset_id', 'evaluation_setting', 'auc']]
    
    # add sort
    sort_df = result_df.loc[sort][['dataset_id', 'auc']].rename(columns = {'auc': 'sort'})

    # add reference 
    line_df = line_df.merge(sort_df, on = 'dataset_id')
    point_df = point_df.merge(sort_df, on = 'dataset_id')
    
    line_chart = alt.Chart(line_df).mark_rule(color = 'gray', opacity = 0.8).encode(
        y = alt.Y('dataset_id:O', sort= alt.EncodingSortField(field="sort", op="min")),
        x = alt.X(f"{line_settings[0]}:Q",scale = alt.Scale(zero = False)), 
        x2 = f"{line_settings[1]}:Q"
    )
    
    point_chart = alt.Chart(point_df).mark_point().encode(
        y = alt.Y('dataset_id:O', sort= alt.EncodingSortField(field="sort", op="min"), title = 'Datasets'),
        x =  alt.X('auc:Q', title = 'AUC improvement over out-of-the-box performance'), 
        color = 'evaluation_setting'
    )
    return alt.layer(line_chart, point_chart).properties(title = algorithm)
    
        
    
    
    

def comparison_plot_per_algorithm(evaluation_settings, datasets_to_use, columns = 3): 
    final_charts = []
    algorithms2 = np.array(algorithms).reshape((-1, columns))
    for row in algorithms2: 
        charts = []
        for algo in row: 
            chart = compare_plot(algo, evaluation_settings, datasets_to_use)
            charts.append(chart)
        final_charts.append(alt.hconcat(*charts).resolve_scale(x = 'shared'))
    return alt.vconcat(*final_charts).resolve_scale(x = 'shared')

In [71]:
evaluation_settings = ['default_performance_v1', 'peak_performance_v2', 'best_average_performance_v2', 'tuned_performance_size_abs10']
datasets_to_use = get_datasets_to_use(processed_path, evaluation_settings, 'original_or_random')
comparison_plot_per_algorithm(evaluation_settings, datasets_to_use)
# compare_plot('KNN', evaluation_settings, datasets_to_use)

In [66]:
evaluation_settings = ['default_performance_v1', 'peak_performance_v1', 'best_average_performance_v1', 'tuned_performance_fixed10', 'tuned_performance_fixed15','tuned_performance_fixed20']
datasets_to_use = get_datasets_to_use(processed_path, evaluation_settings, 'original_or_random')
average_compare_plot(evaluation_settings, datasets_to_use)

# Compare different parameter settings per algorithm 

In [35]:
evaluation_settings = ['default_performance_v1', 'peak_performance_v1', 'best_average_performance_v1','tuned_performance_fixed10']
make_plot_per_algorithm(evaluation_settings, 'original_or_random', sort_reference = 'peak_performance_v1')

In [14]:
evaluation_settings = ['default_performance_v1', 'peak_performance_v1', 'best_average_performance_v1', 'tuned_performance_expa10n40',  'tuned_performance_expa10n90', 'tuned_performance_expa20n40', 'tuned_performance_expa20n80']
make_plot_per_algorithm(evaluation_settings, 'original_or_random')

## Compare averages fixed

In [24]:
evaluation_settings = ['default_performance_v1', 'peak_performance_v1', 'best_average_performance_v1', 'tuned_performance_stat10%_avg']
schart10, sdf10= compare_evaluation_settings_chart_default_as_reference_absolute(processed_path, evaluation_settings, 'contamination5', "LOF", default_reference = 'default_performance_v1', tuned_reference = 'peak_performance_v1')
evaluation_settings = ['default_performance_v1', 'peak_performance_v1', 'best_average_performance_v1', 'tuned_performance_stat5%_avg']
schart5, sdf5 = compare_evaluation_settings_chart_default_as_reference_absolute(processed_path, evaluation_settings, 'contamination5', "LOF", default_reference = 'default_performance_v1', tuned_reference = 'peak_performance_v1')
evaluation_settings = ['default_performance_v1', 'peak_performance_v1', 'best_average_performance_v1', 'tuned_performance_stat1%_avg']
schart1, sdf1 = compare_evaluation_settings_chart_default_as_reference_absolute(processed_path, evaluation_settings, 'contamination5', "LOF", default_reference = 'default_performance_v1', tuned_reference = 'peak_performance_v1')
(schart10.properties(title = 'p=0.10') | schart5.properties(title = 'p=0.05')|schart1.properties(title = 'p=0.01')).resolve_scale(color = 'independent')

## Compare different runs fixed

In [23]:
evaluation_settings = ['default_performance_v1', 'peak_performance_v1','best_average_performance_v1',]
evaluation_settings.extend(f'tuned_performance_fixed10%_repeat{idx}' for idx in range(0,5))
chart10 = compare_evaluation_settings_chart_default_as_reference_absolute(processed_path, evaluation_settings, 'original_or_random', "LOF", default_reference = 'default_performance_v1', tuned_reference = 'peak_performance_v1')
evaluation_settings = ['default_performance_v1', 'peak_performance_v1', 'best_average_performance_v1']
evaluation_settings.extend(f'tuned_performance_fixed20%_repeat{idx}' for idx in range(0,5))
chart20 = compare_evaluation_settings_chart_default_as_reference_absolute(processed_path, evaluation_settings, 'original_or_random', "LOF", default_reference = 'default_performance_v1', tuned_reference = 'peak_performance_v1')
(chart10 | chart20).resolve_scale(color = 'independent')

TypeError: unsupported operand type(s) for |: 'tuple' and 'tuple'

## Compare averages fixed

In [25]:
evaluation_settings = ['default_performance_v1', 'peak_performance_v1','best_average_performance_v1', 'tuned_performance_fixed10%_avg']
fchart10, fdf10 = compare_evaluation_settings_chart_default_as_reference_absolute(processed_path, evaluation_settings, 'original_or_random', "LOF", default_reference = 'default_performance_v1', tuned_reference = 'peak_performance_v1')
evaluation_settings = ['default_performance_v1', 'peak_performance_v1', 'best_average_performance_v1','tuned_performance_fixed20%_avg']
fchart20, fdf20 = compare_evaluation_settings_chart_default_as_reference_absolute(processed_path, evaluation_settings, 'original_or_random', "LOF", default_reference = 'default_performance_v1', tuned_reference = 'peak_performance_v1')
(fchart10 | fchart20).resolve_scale(color = 'independent')

In [28]:
( fchart10.properties(title = 'fixed LOF')).resolve_scale(color = 'independent')

In [None]:
'Glass', 'HeartDisease', 'Hepatitis', 'Lymphography', 'Parkinson',
       'PenDigits', 'WPBC', 'Wilt'

In [None]:
for algo in algorithms:
    display(compare_evaluation_settings_chart_default_as_reference_absolute(processed_path, evaluation_settings, 'contamination5', algo, default_reference = 'default_performance_v1', tuned_reference = 'peak_performance_v1'))

In [None]:
evaluation_settings = ['default_performance_v1', 'peak_performance_v1', 'best_average_performance_v1', 'tuned_performance_v1_fixed10%','tuned_performance_v2_fixed10%']
# algorithm = 'OCSVM'
for algo in algorithms:
    display(compare_settings_default_as_reference(evaluation_settings, 'contamination5', algo, 'default_performance_v1', 'peak_performance_v1', 'full_auc'))

# Compare different algorithms per parameter setting

In [None]:
def compare_algorithms(parameter_setting, contamination_filter):
    # read the correct results 
    relevant_dataframes = [pd.read_csv(processed_path / f"{algo}_{parameter_setting}_v{VERSION}.csv") for algo in algorithms]
    all_results = pd.concat(relevant_dataframes, axis = 0)
    
    # filter out the datasets to use 
    filtered_results = dataset_filters[contamination_filter](all_results)
    
    # keep the relevant columns 
    filtered_results = filtered_results[['algo_name', 'dataset_id', 'anomaly_fraction', 'auc', 'ap']]
    avg_performance = average_performance_per_method(filtered_results)
#     avg_aligned_ranks = average_aligned_ranks_with_versions(filtered_results)
    avg_ranks, critical_distance = average_ranks_with_versions_and_nemenyi(filtered_results)
    performance_df = pd.concat([avg_performance, avg_ranks], axis = 1)
    return performance_df, critical_distance

def compare_algorithms_different_performance_settings(contamination_filter): 
    evaluation_function_names = evaluation_functions.keys()
    result_dfs = [compare_algorithms(name, contamination_filter) for name in evaluation_function_names]
    all_result_table = pd.concat([tup[0] for tup in result_dfs], axis = 1, keys = evaluation_function_names)
    return all_result_table, result_dfs[0][1]

### Original contamination

In [None]:
original_contamination, critical_distance = compare_algorithms_different_performance_settings('select_original_max20')
original_contamination

In [None]:
to_latex(original_contamination, critical_distance)

In [None]:
print(critical_distance)

### Contamination 5

In [None]:
contamination5 = compare_algorithms_different_performance_settings('contamination5')
contamination5

In [None]:
to_latex(contamination5)

### all experiments

In [None]:
contamination_all = compare_algorithms_different_performance_settings('all')
contamination_all

In [None]:
to_latex(contamination_all)

# Look at performance distributions

In [None]:
def algorithm_performance_plot(algorithm, contamination_filter, metric = 'auc'): 
    r_path = result_path / f"grid_{algorithm}_v{VERSION}.csv"
    result_df = pd.read_csv(r_path)
    result_df = apply_contamination_filter(contamination_filter, result_df)
    result_df = result_df.assign(dataset_id = lambda x: x.dataset_id +'_' +x.anomaly_fraction.astype('str'))[['algo_name', 'dataset_id', 'auc', 'ap']]
#     result_df = result_df[result_df.dataset_id.str.startswith('ALOI')]
    return alt.Chart(result_df).mark_boxplot().encode(
        x = 'dataset_id:N', 
        y = 'auc:Q'
    ).properties(title = f"{algorithm} data={contamination_filter}")

In [None]:
algorithm_performance_plot('CBLOF', 'contamination5')

In [None]:
algorithm_performance_plot('KNN', 'contamination5')

In [None]:
algorithm_performance_plot('LOF', 'contamination5')

In [None]:
algorithm_performance_plot('IForest', 'contamination5')

In [None]:
algorithm_performance_plot('HBOS', 'contamination5')