In [None]:
import os
from itertools import product

import pandas as pd
import matplotlib.pyplot as plt
from nasbench.api import NASBench

from main import init_ipynb, main
from utils import get_directories, ProgressBar

In [None]:
NB = NASBench('../data/nasbench_only108.tfrecord')

In [None]:
DIRS = get_directories(os.path.join(os.path.abspath(''), 'experiment.ipynb'))
exp_id = 'GA_exp1'
DIRS['csv_exp'] = DIRS['csv'] + exp_id + os.sep
if not os.path.exists(DIRS['csv_exp']):
    os.mkdir(DIRS['csv_exp'])

for n, p in DIRS.items():
    print(f'{n}: {p}')

In [None]:
default_args_GA = dict(
    optimizer = 'GA',
    population_size = 100,
    mu_ = 40,
    lambda_ = 60,
    budget = 5000,
    recombination = 'kp',
    selection = 'rw',
    mutation = 'uniform',
    xp = 1,
    mut_r = None,
    mut_b = None,
    run_id = None,
    verbose = 0,
    seed = 42,
    repetitions = 20,
    log = False
)

In [None]:
selections = ['rw', 'ts', 'rk', 'su']
mutations = [('u', 0.001), ('u', 0.005), ('u', 0.1), ('b', 1), ('b', 2), ('b', 3)]
recombinations = [('kp', 1), ('kp', 2), ('kp', 3), ('u', None)]
pop_divisions = [(100, 40, 60), (100, 40, 100), (100, 20, 80), (100, 20, 100)]
pop_divisions += [(40, 20, 20), (40, 20, 40), (40, 10, 30), (40, 10, 40)]

# create a list of all combinations of the above parameters
combinations = list(product(selections, mutations, recombinations, pop_divisions))
n_combs = len(combinations)
progress = ProgressBar(n_combs, exp_id)

for i, (sel, mut, rec, (ps, mu, lm)) in enumerate(combinations):
    args = default_args_GA.copy()
    args['selection'] = sel
    args['mutation'] = mut[0]
    if mut[0] == 'u':
        args['mut_r'] = mut[1]
    else:
        args['mut_b'] = mut[1]
    args['recombination'] = rec[0]
    args['xp'] = rec[1]
    args['population_size'] = ps
    args['mu_'] = mu
    args['lambda_'] = lm
    run_id = f'GA_{ps}_{mu}_{lm}_{sel}_{mut[0]}({mut[1]})_{rec[0]}({rec[1]})'.replace('(None)', '')
    args['run_id'] = run_id

    init_ipynb(NB, args)
    main(save_to=DIRS['csv_exp'])

    progress(i)

In [None]:
df_GA = pd.DataFrame(columns=['max_avg_value', 'pop_size', 'mu', 'lambda', 'selection', 'mutation', 'recombination'])
df_GA.index.name = 'run_id'

for file in os.listdir(DIRS['csv_exp']):
    if not file.endswith('.csv'):
        continue
    df_ = pd.read_csv(os.path.join(DIRS['csv_exp'], file), index_col=0)
    max_val = df_.mean(axis=1).iloc[-1]
    run_id = file[3:-4]  # trim 'GA_' and '.csv'
    ps, mu, lm, sel, mut, rec = run_id.split('_')
    df_GA.loc[run_id] = [max_val, ps, mu, lm, sel, mut, rec]

# sort by highest value
df_GA = df_GA.sort_values(by='max_avg_value', ascending=False)

# save to csv with same name as dir where all individual csv files are stored
df_GA.to_csv(os.path.join(DIRS['csv'], f'{exp_id}.csv'))

In [None]:
df_GA

POC

In [None]:
def plot_elitism(df: pd.DataFrame, title: str) -> plt.Figure:
    
    fig, ax = plt.subplots(figsize=(5, 5))
    df = df.copy()
    
    # observations
    df['color'] = 'tab:blue' # μ,λ
    df.loc[df['pop_size'] == df['lambda'], 'color'] = 'tab:orange'  # μ+λ (elitism)
    ax.scatter(df.index, list(reversed(list(df['max_avg_value']))), c=df['color'], marker='|', s=50, alpha=1)
    ax.set_ylabel('Validation accuracy')
    ax.set_xlabel('Frequency / Ranking')
    ax.set_xticks([], [])
    ax.set_title(title, weight='bold')
    
    # distribution
    twax = ax.twiny()
    df_comma = df[df['color'] == 'tab:blue']
    df_plus = df[df['color'] == 'tab:orange']
    df_comma.hist(column='max_avg_value', ax=twax, color='tab:blue', alpha=0.5, bins=100, orientation='horizontal', label='$\mu,\lambda$')
    df_plus.hist(column='max_avg_value', ax=twax, color='tab:orange', alpha=0.5, bins=100, orientation='horizontal', label='$\mu+\lambda$')
    twax.grid(False)
    twax.set_ylabel('')
    twax.set_xticks([], [])
    twax.set_title('')
    twax.legend()
    
    fig.tight_layout()
    return fig

In [None]:
fig_elitism_GA = plot_elitism(df_GA, title='$\mathbf{\mu,\lambda}$ vs $\mathbf{\mu+\lambda}$ in GA')
fig_elitism_GA.savefig(DIRS['plots'] + f'{exp_id}_elitism.png', dpi=300)

In [None]:
exp_id = 'ES_exp1'
DIRS['csv_exp'] = DIRS['csv'] + exp_id + os.sep
if not os.path.exists(DIRS['csv_exp']):
    os.mkdir(DIRS['csv_exp'])

for n, p in DIRS.items():
    print(f'{n}: {p}')

In [None]:
default_args_ES = dict(
    optimizer = 'ES',
    population_size = 100,
    mu_ = 40,
    lambda_ = 60,
    budget = 5000,
    recombination = 'd',
    tau_ = 0.1,
    sigma_ = 0.01,
    chunk_size = 3,
    individual_sigmas = False,
    run_id = None,
    verbose = 0,
    seed = 42,
    repetitions = 20,
    log = False,
)

In [None]:
recombinations = ['d', 'i', 'dg', 'ig']
taus = [0.001, 0.01, 0.1, 0.99]
sigmas = [0.01, 0.1, 0.5]
chunk_sizes = [3, 7]
individual_sigmas = [True, False]
pop_divisions = [(100, 40, 60), (100, 40, 100), (100, 20, 80), (100, 20, 100)]
pop_divisions += [(40, 20, 20), (40, 20, 40), (40, 10, 30), (40, 10, 40)]

combinations = list(product(recombinations, taus, sigmas, chunk_sizes, individual_sigmas, pop_divisions))
n_combs = len(combinations)
progress = ProgressBar(n_combs, exp_id)

for i, (rec, tau, sig, chsz, isig, (ps, mu, lm)) in enumerate(combinations):
    args = default_args_ES.copy()
    args['recombination'] = rec
    args['tau_'] = tau
    args['sigma_'] = sig
    args['chunk_size'] = chsz
    args['individual_sigmas'] = isig
    args['population_size'] = ps
    args['mu_'] = mu
    args['lambda_'] = lm
    run_id = f'ES_{ps}_{mu}_{lm}_{rec}_{sig}_{tau}_{chsz}_{isig}'
    args['run_id'] = run_id

    init_ipynb(NB, args)
    main(save_to=DIRS['csv_exp'])

    progress(i)

In [None]:
df_ES = pd.DataFrame(columns=['max_avg_value', 'pop_size', 'mu', 'lambda', 'recombination', 'sigma', 'tau', 'chunk_size', 'individual_sigmas'])
df_ES.index.name = 'run_id'

for file in os.listdir(DIRS['csv_exp']):
    if not file.endswith('.csv'):
        continue
    df_ = pd.read_csv(os.path.join(DIRS['csv_exp'], file), index_col=0)
    max_val = df_.mean(axis=1).iloc[-1]
    run_id = file[3:-4]  # trim 'ES_' and '.csv'
    ps, mu, lm, rec, sig, tau, chsz, isig = run_id.split('_')
    df_ES.loc[run_id] = [max_val, ps, mu, lm, rec, sig, tau, chsz, isig]

# sort by highest value
df_ES = df_ES.sort_values(by='max_avg_value', ascending=False)

# save to csv with same name as dir where all individual csv files are stored
df_ES.to_csv(os.path.join(DIRS['csv'], f'{exp_id}.csv'))

In [None]:
fig_elitism_ES = plot_elitism(df_ES, title='$\mathbf{\mu,\lambda}$ vs $\mathbf{\mu+\lambda}$ in ES')
fig_elitism_ES.savefig(DIRS['plots'] + f'{exp_id}_elitism.png', dpi=300)