# Final

In this notebook we run the configurations we have selected based on the results of our `experiment` notebook.
The main difference here is that we now attach the an `IOH` logger to the problem object so we can export and visualize the results of these final runs in IOanalyzer.
After the experiments have been done, we also visualize the results with some learning curves.

Imports

In [None]:
import os
from itertools import product

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from nasbench.api import NASBench

from main import init_ipynb
from utils import get_directories, ProgressBar

In [None]:
DIRS = get_directories(os.path.join(os.path.abspath(''), 'final.ipynb'))

In [None]:
NB = NASBench(DIRS['data'] + 'nasbench_only108.tfrecord', seed=42)

In [None]:
default_args = dict(budget=5000, verbose=0, repetitions=20, log=True, seed=42)

---

## Genetic Algorithm

In [None]:
exp_id = 'GA_fin'
DIRS['csv_exp'] = DIRS['csv'] + exp_id + os.sep
if not os.path.exists(DIRS['csv_exp']):
    os.mkdir(DIRS['csv_exp'])

for n, p in DIRS.items():
    print(f'{n}: {p}')

In [None]:
constants_GA = dict(optimizer='GA', population_size=100, recombination='u', mutation='u', xp=1, mut_b=1)
default_args_GA = default_args.copy()
default_args_GA.update(constants_GA)

#    lm, mu, sel , mut_r
configs_GA = [
    (80, 20, 'ts', 0.04),
    (80, 20, 'rk', 0.01),
    (90, 10, 'ts', 0.04),
    (90, 10, 'rk', 0.01)
]

progress = ProgressBar(4, exp_id)
for i, (lambda_, mu_, selection, mut_r) in enumerate(configs_GA):
    # set the variable arguments
    args = default_args_GA.copy()
    args['lambda_'] = lambda_
    args['mu_'] = mu_
    args['selection'] = selection
    args['mut_r'] = mut_r
    run_id = f'GA_P100_M{mu_}_L{lambda_}_SEL{selection}_RECu_MUTu-{mut_r}'
    args['run_id'] = run_id

    # run the experiment
    init_ipynb(NB, args, save_to=DIRS['csv_exp'])
    progress(i)

---

## Evolution Strategy

In [None]:
exp_id = 'ES_fin'
DIRS['csv_exp'] = DIRS['csv'] + exp_id + os.sep
if not os.path.exists(DIRS['csv_exp']):
    os.mkdir(DIRS['csv_exp'])

for n, p in DIRS.items():
    print(f'{n}: {p}')

In [None]:
constants_ES = dict(optimizer='ES', population_size=100, tau_=0.2, individual_sigmas=True, chunk_size=3)
default_args_ES = default_args.copy()
default_args_ES.update(constants_ES)

#    lm, mu, sig, rec
configs_ES = [
    (80, 20, 0.5, 'd'),
    (80, 20, 0.1, 'dg'),
    (90, 10, 0.5, 'd'),
    (90, 10, 0.1, 'dg')
]

progress = ProgressBar(4, exp_id)
for i, (lambda_, mu_, sigma_, recombination) in enumerate(configs_ES):
    # set the variable arguments
    args = default_args_ES.copy()
    args['lambda_'] = lambda_
    args['mu_'] = mu_
    args['sigma_'] = sigma_
    args['recombination'] = recombination
    run_id = f'ES_P100_M{mu_}_L{lambda_}_S{sigma_}_T0.2_C3_REC{recombination}_IS'
    args['run_id'] = run_id

    # run the experiment
    init_ipynb(NB, args, save_to=DIRS['csv_exp'])
    progress(i)

---

## Plots

In [None]:
def get_data(data_path: str, best_so_far: bool = False):
    """
    Load all csv files in data_path and return a dictionary with the data.
    db[run_id] = np.array with shape (repetitions, budget)
    If best_so_far is True, the array will only contain the best value so far for each time step.
    """
    db = {}
    files = os.listdir(data_path)
    # sort by moment of creation to ensure correct order wrt run_id
    files.sort(key=lambda x: os.path.getmtime(data_path + os.sep + x))
    for file in files:
        if file.endswith('.csv'):
            df = pd.read_csv(data_path + os.sep + file, index_col=0)
            if best_so_far:
                db[file[:-4]] = df.cummax().values
            else:
                db[file[:-4]] = df.values
    return db

In [None]:
def extract_settings_from_run_id(run_id: str, params: list[str]) -> dict[str, str]:
    """Small helper function to reduce complexity in plot function."""
    param_replacements = {
        'M': '$\mu$',
        'L': '$\lambda$',
        'S': '$\sigma$',
        'T': '$\tau$',
        'C': 'chunksize',
        'SEL': 'sel',
        'REC': 'rec',
        'MUT': 'mut',
    }
    settings = {}
    for setting in run_id.split('_'):
        for param in params:
            if param == setting[:len(param)]:
                # check for special case (MUT also matches M)
                if param == 'M' and setting[:3] == 'MUT':
                    continue
                param_r = param_replacements[param]
                settings[param_r] = setting.replace(param, '')
    return settings

In [None]:
def plot_learning_curve(
    db: dict[str, np.ndarray],
    title: str,
    params: list[str] = None,
    best_so_far: bool = False,
    custom_labels: dict[str, str] = None,
    ) -> plt.Figure:
    """
    Plots the learning curves for all experiments in db.
    db is a dict that maps the run_id to the data (see get_data).
    The columns in this data represent independent repetitions, and their rows represent best fitness found per generation,
    or the best fitness found so far if best_so_far is True.
    The run_id will determine their labels in the legend, unless custom_labels is provided.
    The data will be shown as thin lines for each repetition, and the mean will be shown as a thick line.
    The standard deviation will be shown as a shaded area.
    """
    fig, ax = plt.subplots(figsize=(10, 6))
    for i, (run_id, data) in enumerate(db.items()):
        # create the label
        if custom_labels is not None:
            label = custom_labels[run_id]
        else:
            settings = extract_settings_from_run_id(run_id, params)
            label = ', '.join([f'{param}={setting}' for param, setting in settings.items()])
        # raw data from each repetition
        for j in range(data.shape[1]):
            ax.plot(data[:, j], color=f'C{i}', alpha=0.2, linewidth=0.5)
        # standard deviation
        ax.fill_between(
            np.arange(data.shape[0]), data.mean(axis=1) - data.std(axis=1),
            data.mean(axis=1) + data.std(axis=1),
            color = f'C{i}',
            alpha = 0.1
        )
        # mean
        ax.plot(
            data.mean(axis=1),
            color = f'C{i}',
            alpha = 0.8,
            linewidth = 2,
            label = label
        )
    # reference line for f_opt
    ax.axhline(0.950554, color='gray', linestyle='--', alpha = 1, linewidth=1)
    # add text in upper left corner
    ax.text(0.05, 0.95, '$f^*$', transform=ax.transAxes, fontsize=14, verticalalignment='top', color='gray')
    
    # finishing touches
    ax.set_xlim(-1, 50)
    ax.set_xticks([0] + list(range(9, 50, 10)), [1] + list(range(10, 51, 10)))
    ax.set_xlabel('generation', fontsize=12)
    ax.set_ylabel(r'$f^*_{\mathrm{found}}$', fontsize=12)
    ax.legend(fontsize=12, loc='lower right')
    ax.set_title(title, weight='bold', fontsize=16)
    fig.tight_layout()

    return fig

### GA plots

In [None]:
data_GA_1 = get_data(DIRS['csv'] + 'GA_fin')
fig_GA_1 = plot_learning_curve(
    db = data_GA_1,
    title = r'Best $\mathbf{f}$ found per generation (GA)',
    params = ['M', 'L', 'SEL', 'MUT']
)
fig_GA_1.savefig(DIRS['plots'] + 'GA_f1.png', dpi=500)

In [None]:
data_GA_2 = get_data(DIRS['csv'] + 'GA_fin', best_so_far=True)
fig_GA_2 = plot_learning_curve(
    db = data_GA_2,
    title = 'Best $\mathbf{f}$ found so far (GA)',
    params = ['M', 'L', 'SEL', 'MUT'],
    best_so_far = True
)
fig_GA_2.savefig(DIRS['plots'] + 'GA_f2.png', dpi=500)

### ES plot

In [None]:
data_ES = get_data(DIRS['csv'] + 'ES_fin')
fig_ES = plot_learning_curve(
    db = data_ES,
    title = r'Best $\mathbf{f}$ found per generation (ES)',
    params = ['M', 'L', 'S', 'REC'],
    best_so_far = False
)
fig_ES.savefig(DIRS['plots'] + 'ES_f.png', dpi=500)

### Comparison plot

In [None]:
# best GA run is M=20 L=80, SEL=ts, MUT=u-0.04
# best EA run is M=20 L=80, S=0.1, REC=dg

GA_id = 'P100_M20_L80_SELts_RECu_MUTu-0.04'
ES_id = 'P100_M20_L80_S0.1_T0.2_C3_RECdg_IS'

GA_data = pd.read_csv(DIRS['csv'] + 'GA_fin' + os.sep + f'GA_{GA_id}.csv', index_col=0).cummax().values
ES_data = pd.read_csv(DIRS['csv'] + 'ES_fin' + os.sep + f'ES_{ES_id}.csv', index_col=0).values

GA_label = r'GA | $\mu=20$, $\lambda=80$, sel=ts, mut=u-0.04'
ES_label = r'ES | $\mu=20$, $\lambda=80$, $\sigma=0.1$, rec=dg'

comp_db = {'GA_id': GA_data, 'ES_id': ES_data}
comp_labels = {'GA_id': GA_label, 'ES_id': ES_label}

fig_comp = plot_learning_curve(
    db = comp_db,
    title = 'Comparison of GA and ES',
    params = None,
    best_so_far = True,
    custom_labels = comp_labels
)
fig_comp.savefig(DIRS['plots'] + 'GA_vs_ES.png', dpi=500)