# Calculate peak and best-average performance
Process the results of the gridsearch and calculate peak and best-average performance.  
*note: here best-average performance is calculated over all datasets while later we use certain versions of each dataset and therefore best-average performance needs to be recalculated then*

## Imports and config

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd 
import numpy as np
from pathlib import Path
from ODD.analysis.hyperparameters import select_peak_performance, select_best_average_performance
from ODD.analysis.result_processing import average_performance_per_method, average_aligned_ranks_with_versions, average_ranks_with_versions, average_ranks_with_versions_and_nemenyi
from tqdm import tqdm
import altair as alt 
alt.data_transformers.disable_max_rows()
from collections import defaultdict

In [None]:
version_dict = defaultdict(lambda : 1)
version_dict['HBOS'] = 2
version_dict['CBLOF'] = 2
version_dict['IForest'] = 2
version_dict['OCSVM'] = 3
algorithms = ['CBLOF', 'HBOS', 'IForest', 'KNN' , 'LOF', 'OCSVM']

In [None]:
OUTPUT_VERSION = 2
result_path = Path()/'results'
processed_path = Path()/'processed_results_v5'

In [None]:
evaluation_functions = dict(
    peak_performance = select_peak_performance, 
    best_average_performance = select_best_average_performance, 
    default_performance = None,
    tuned_performance = None
)

## Actual calculation

In [None]:
for algo in algorithms: 
    version = version_dict[algo]
    # read result
    r_path = result_path / f"grid_{algo}_v{version}.pkl"
    grid_results = None
    for evaluation_name, select_function in tqdm(evaluation_functions.items(), desc = algo): 
        if select_function is None: 
            continue
        proc_path = processed_path / f"{evaluation_name}" / f"{algo}.csv"
        
        # if result is already calculated, skip
        if proc_path.exists(): 
            continue 
        
        # make directory if necessary
        proc_path.parent.mkdir(parents = True, exist_ok = True)
        
        # only load grid once, if necessary
        if grid_results is None: 
            grid_results = pd.read_pickle(r_path)
        
        # calculate peak/best-average performance
        result_df = select_function(grid_results)
        
        # save result
        result_df.to_pickle(processed_path/f'{evaluation_name}'/ f'{algo}.pkl')
        result_df.to_csv(proc_path)
        