# Experiment

In this notebook we will explore a range of parameters for both GA and ES to find the best combination of parameters for each algorithm.
We do this as a simple gridsearch where we specify a set of values for each parameter and then simply run all possible configurations.
The results of each run are stored in a csv file.

Imports

In [None]:
import os
from itertools import product

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from nasbench.api import NASBench

from main import init_ipynb
from utils import get_directories, ProgressBar

In [None]:
DIRS = get_directories(os.path.join(os.path.abspath(''), 'experiment.ipynb'))

In [None]:
NB = NASBench(DIRS['data'] + 'nasbench_only108.tfrecord')

---

## Genetic Algorithm

In [None]:
exp_id = 'GA_exp'
DIRS['csv_exp'] = DIRS['csv'] + exp_id + os.sep
if not os.path.exists(DIRS['csv_exp']):
    os.mkdir(DIRS['csv_exp'])

for n, p in DIRS.items():
    print(f'{n}: {p}')

In [None]:
default_args = dict(budget=5000, verbose=0, repetitions=20, log=False, seed=42)

In [None]:
selections = ['rw', 'ts', 'rk', 'su']
mutations = [('u', 0.01), ('u', 0.04), ('u', 0.1), ('b', 1), ('b', 2), ('b', 3)]
recombinations = [('kp', 1), ('kp', 2), ('kp', 3), ('u', None)]
pop_divisions = [(100, 40, 60), (100, 40, 100), (100, 20, 80), (100, 20, 100)]
pop_divisions += [(40, 20, 20), (40, 20, 40), (40, 10, 30), (40, 10, 40)]

# create a list of all combinations of the above parameters
combinations = list(product(selections, mutations, recombinations, pop_divisions))
n_combs = len(combinations)
progress = ProgressBar(n_combs, exp_id)

for i, (sel, mut, rec, (ps, mu, lm)) in enumerate(combinations):
    args = default_args.copy()
    args['optimizer'] = 'GA'
    args['selection'] = sel
    args['mutation'] = mut[0]
    if mut[0] == 'u':
        args['mut_r'] = mut[1]
        args['mut_b'] = 1
    else:
        args['mut_b'] = mut[1]
        args['mut_r'] = None
    args['recombination'] = rec[0]
    args['xp'] = rec[1]
    args['population_size'] = ps
    args['mu_'] = mu
    args['lambda_'] = lm
    mut_id = f'u-{mut[1]:.3f}' if mut[0] == 'u' else f'n-{mut[1]}'
    rec_id = f'kp-{rec[1]}' if rec[0] == 'kp' else 'u'
    run_id = f'GA_P{ps}_M{mu}_L{lm}_SEL{sel}_REC{rec_id}_MUT{mut_id}'
    args['run_id'] = run_id

    init_ipynb(NB, args, save_to=DIRS['csv_exp'])

    progress(i)

In [None]:
df_GA = pd.DataFrame(columns=['max_avg_value', 'pop_size', 'mu', 'lambda', 'selection', 'recombination', 'mutation'])
df_GA.index.name = 'run_id'

for file in os.listdir(DIRS['csv_exp']):
    if not file.endswith('.csv'):
        continue
    df_ = pd.read_csv(os.path.join(DIRS['csv_exp'], file), index_col=0)
    max_val = df_.mean(axis=1).iloc[-1]
    run_id = file[3:-4]  # trim 'GA_' and '.csv'
    ps, mu, lm, sel, rec, mut = run_id.split('_')
    df_GA.loc[run_id] = [max_val, ps[1:], mu[1:], lm[1:], sel[3:], rec[3:], mut[3:]]

# sort by highest value
df_GA = df_GA.sort_values(by='max_avg_value', ascending=False)

# save to csv with same name as dir where all individual csv files are stored
df_GA.to_csv(os.path.join(DIRS['csv'], f'{exp_id}.csv'))

---

## Evolution Strategy

In [None]:
exp_id = 'ES_exp'
DIRS['csv_exp'] = DIRS['csv'] + exp_id + os.sep
if not os.path.exists(DIRS['csv_exp']):
    os.mkdir(DIRS['csv_exp'])

for n, p in DIRS.items():
    print(f'{n}: {p}')

In [None]:
recombinations = ['d', 'i', 'dg', 'ig']
taus = [0.1, 0.2, 0.5, 0.99]
sigmas = [0.01, 0.1, 0.5]
chunk_sizes = [3, 7]
individual_sigmas = [True, False]
pop_divisions = [(100, 40, 60), (100, 40, 100), (100, 20, 80), (100, 20, 100)]
pop_divisions += [(40, 20, 20), (40, 20, 40), (40, 10, 30), (40, 10, 40)]

combinations = list(product(recombinations, taus, sigmas, chunk_sizes, individual_sigmas, pop_divisions))
n_combs = len(combinations)
progress = ProgressBar(n_combs, exp_id)

for i, (rec, tau, sig, chsz, isig, (ps, mu, lm)) in enumerate(combinations):
    args = default_args.copy()
    args['optimizer'] = 'ES'
    args['recombination'] = rec
    args['tau_'] = tau
    args['sigma_'] = sig
    args['chunk_size'] = chsz
    args['individual_sigmas'] = isig
    args['population_size'] = ps
    args['mu_'] = mu
    args['lambda_'] = lm
    run_id = f'ES_P{ps}_M{mu}_L{lm}_S{sig}_T{tau}_C{chsz}_REC{rec}'
    if isig: run_id += '_IS'
    args['run_id'] = run_id

    init_ipynb(NB, args, save_to=DIRS['csv_exp'])

    progress(i)

In [None]:
df_ES = pd.DataFrame(columns=['max_avg_value', 'pop_size', 'mu', 'lambda', 'sigma', 'tau', 'chunk_size', 'recombination', 'individual_sigmas'])
df_ES.index.name = 'run_id'

for file in os.listdir(DIRS['csv_exp']):
    if not file.endswith('.csv'):
        continue
    df_ = pd.read_csv(os.path.join(DIRS['csv_exp'], file), index_col=0)
    max_val = df_.mean(axis=1).iloc[-1]
    run_id = file[3:-4]  # trim 'ES_' and '.csv'
    ps, mu, lm, sig, tau, chsz, rec, *isig = run_id.split('_')
    df_ES.loc[run_id] = [max_val, ps[1:], mu[1:], lm[1:], sig[1:], tau[1:], chsz[1:], rec[3:], True if isig else False]

# sort by highest value
df_ES = df_ES.sort_values(by='max_avg_value', ascending=False)

# save to csv with same name as dir where all individual csv files are stored
df_ES.to_csv(os.path.join(DIRS['csv'], f'{exp_id}.csv'))