# Unsupervised gridsearch
This notebook executed the unsupervised gridsearch using Dask and our own 'affe' (another framework for experimentation) helper framework for experimentation.  
In this notebook you'll also find a few helper methods to generate config files for the gridsearch.  
However, you can also makes these files manually.  
Just ensure that in each of the following notebooks is using the correct version of the gridsearch.  
*note: the experiments are ran for way more datasets than actually used in the final results*

In [None]:
# to autoreload code changes, for development
%load_ext autoreload
%autoreload 2

## Imports and configuration

In [None]:
import numpy as np
from ODD.workflows.experiment_config import execute_experiment, ConfigBuilder
from pathlib import Path
from collections import defaultdict

In [None]:
config_dir = Path()/'config'
result_dir = Path()/'results'

In [None]:
scheduler = "domain_to_dask_scheduler.com:8786"

### Helper for generating config files

In [None]:
def make_gs_config(config_path, algo, parameter_dict): 
    (
        ConfigBuilder(config_path.stem, root_levels_up = 2, timeout_s= 3600)
        .add_dataset('campos', datasets ='all', versions = [1], anomaly_fraction = [-1,2,5,10])
        .add_algorithm(algo, f'{algo}_grid', **parameter_dict)
        .to_file(config_path)
    )

In [None]:
setup_dict = dict(
    LOF = dict(
        n_neighbors = list(range(1,300,2))
    ), 
    KNN = dict(
        n_neighbors = list(range(1,300,2))
    ), 
    HBOS = dict(
        n_bins = list(range(5, 101,5))
    ), 
    IForest = dict(
        n_estimators = list(range(25,301,25)), 
        max_samples = list(np.arange(0.1,1.01,0.1)), 
        max_features = list(np.arange(0.1,1.01, 0.1))
    ), 
    CBLOF = dict(
        n_clusters = list(range(2,50, 2)), 
        alpha = list(np.arange(0.5,0.99,0.1)), 
        beta = list(range(2, 21, 2)), 
        use_weights = [False, True]
    ), 
    OCSVM = dict(
        kernel = ['rbf'], 
        nu = list(np.linspace(0.02,1,50)), 
        gamma = [ 0.001, 0.005, 0.01, 0.05, 0.1,0.5, 1.0, 5.0, 10.0, 50.0, 100.0, 500.0, 1000.0, 5000.0, 10000.0]
    )
)
version_dict = defaultdict(lambda : 1)
version_dict['HBOS'] = 2
version_dict['IForest'] = 2
version_dict['CBLOF'] = 2 
version_dict['OCSVM'] = 3

for algo, parameter_dict in setup_dict.items(): 
    version = version_dict[algo]
    file_name = f"grid_{algo}_v{version}.toml"
    config_path = config_dir/file_name
    if not config_path.exists():
        make_gs_config(config_path, algo, parameter_dict)
    

## Run the gridsearch using the generated config files

In [None]:
for algo in setup_dict.keys(): 
    version = version_dict[algo]
    config_name = f"grid_{algo}_v{version}.toml"
    result_name = f"grid_{algo}_v{version}.pkl"
    execute_experiment(config_dir/config_name, result_dir/result_name, dask_scheduler = scheduler, shuffle = True, progress = False, dask_batch_size = 25)
