# Explore

In this notebook we create the distribution plots of the runs obtained in our `experiment` notebook.

Imports

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from utils import get_directories

In [None]:
DIRS = get_directories(os.path.join(os.path.abspath(''), 'explore.ipynb'))

This function will be used to create all of the following plots.

In [None]:
def plot_split(dfs: list[pd.DataFrame], headers: list[str], title: str) -> plt.Figure:
    """
    Takes number of (min 2, max 4) dataframes and plots the "raw" results along with a histogram in separate subplots.
    2 & 3 dfs will be shown in one row, 4 will result in a 2x2 grid.
    Headers are used as titles for the subplots, so they should be in the same order as the dataframes.
    The title is used for the whole figure.
    """
    
    assert len(dfs) > 1 and len(dfs) < 5, "Please only give 2, 3 or 4 dataframes"
    assert len(dfs) == len(headers), "Please give as many headers as dataframes"
    ndfs = len(dfs)

    if ndfs != 4:
        fig, axes = plt.subplots(1, ndfs, figsize=(5*ndfs, 5), sharey=True)
    else:
        fig, axes = plt.subplots(2, 2, figsize=(10, 10))
        # spread axes so we can loop over them
        axes = list(axes[0]) + list(axes[1])
    
    # will be used to set bounds for the histograms, as y-axis is shared
    full_df = pd.concat(dfs, ignore_index=True)
    bounds = (full_df.max_avg_value.min(), full_df.max_avg_value.max())
    # small delta to avoid clipping
    delta = (bounds[1] - bounds[0]) * 0.01
    bounds = (bounds[0] - delta, bounds[1] + delta)

    rev = lambda s: list(reversed(list(s)))
    for i, (df, ax) in enumerate(zip(dfs, axes)):
        # observations
        ax.scatter(df.index, rev(df.max_avg_value), c=f'C{i}', marker='|', s=10)
        ax.set_title(headers[i])
        if i == 0: ax.set_ylabel('validation accuracy')
        ax.set_xlabel('frequency / ranking')
        ax.set_xticks([], [])
        ax.set_ylim(bounds)
    
        # histogram
        ax2 = ax.twinx()
        bins = np.arange(bounds[0], bounds[1], (bounds[1]-bounds[0])/50)
        ax2.hist(df.max_avg_value, bins=bins, orientation='horizontal', color=f'C{i}', alpha=0.5)
        ax2.grid(False)
        ax2.set_yticks([], [])
        ax2.set_xticks([], [])
        ax2.set_title('')
        ax2.set_ylim(bounds)

    fig.suptitle(title, fontsize=16, weight='bold')
    fig.tight_layout()
    return fig

---

## Genetic Algorithm

In [None]:
df_GA = pd.read_csv(os.path.join(DIRS['csv'], 'GA_exp.csv'), index_col=0)

In [None]:
# mu+lambda vs. mu,lambda
df_comma = df_GA[df_GA['lambda'] == df_GA['pop_size']]
df_plus = df_GA[df_GA['lambda'] != df_GA['pop_size']]
fig = plot_split([df_comma, df_plus], ['\u03bc,\u03bb', '\u03bc+\u03bb'], '\u03bc,\u03bb vs. \u03bc+\u03bb in GA')
fig.savefig(DIRS['plots'] + f'elitism_GA.png', dpi=300)

In [None]:
# pop_size 40 vs 100
df_40 = df_GA[df_GA['pop_size'] == 40]
df_100 = df_GA[df_GA['pop_size'] == 100]
fig = plot_split([df_40, df_100], ['40', '100'], 'population size in GA')
fig.savefig(DIRS['plots'] + f'pop_size_GA.png', dpi=300)

In [None]:
# different selections (rw, ts, rk, su)
df_rw = df_GA[df_GA['selection'] == 'rw']
df_ts = df_GA[df_GA['selection'] == 'ts']
df_rk = df_GA[df_GA['selection'] == 'rk']
df_su = df_GA[df_GA['selection'] == 'su']
fig = plot_split([df_rw, df_ts, df_rk, df_su], ['roulette wheel', 'tournament', 'rank', 'stochastic universal'], 'selection in GA')
fig.savefig(DIRS['plots'] + f'selection_GA.png', dpi=300)

In [None]:
# different mutation methods (u in mutation column, b in mutation column)
df_u = df_GA[df_GA['mutation'].isin(['u(0.001)', 'u(0.005)', 'u(0.1)'])]
df_b = df_GA[df_GA['mutation'].isin(['b(1)', 'b(2)', 'b(3)'])]
fig = plot_split([df_u, df_b], ['uniform', 'bitflip'], 'mutation in GA')
fig.savefig(DIRS['plots'] + f'mutation_GA.png', dpi=300)

In [None]:
# different mutation rates (0.001, 0.005, 0.1)
df_mu1 = df_GA[df_GA['mutation'] == 'u(0.001)']
df_mu5 = df_GA[df_GA['mutation'] == 'u(0.005)']
df_mu10 = df_GA[df_GA['mutation'] == 'u(0.1)']
fig = plot_split([df_mu1, df_mu5, df_mu10], ['0.001', '0.005', '0.1'], 'mutation rate in GA')
fig.savefig(DIRS['plots'] + f'mutation_rate_GA.png', dpi=300)

In [None]:
# different bit flips
df_b1 = df_GA[df_GA['mutation'] == 'b(1)']
df_b2 = df_GA[df_GA['mutation'] == 'b(2)']
df_b3 = df_GA[df_GA['mutation'] == 'b(3)']
fig = plot_split([df_b1, df_b2, df_b3], ['1', '2', '3'], 'bit flips in GA')
fig.savefig(DIRS['plots'] + f'bit_flips_GA.png', dpi=300)

In [None]:
# different recombinations (kp1, kp2, kp3, u)
df_kp1 = df_GA[df_GA['recombination'] == 'kp(1)']
df_kp2 = df_GA[df_GA['recombination'] == 'kp(2)']
df_kp3 = df_GA[df_GA['recombination'] == 'kp(3)']
df_u = df_GA[df_GA['recombination'] == 'u']
fig = plot_split([df_kp1, df_kp2, df_kp3, df_u], ['kp(1)', 'kp(2)', 'kp(3)', 'uniform'], 'recombination in GA')
fig.savefig(DIRS['plots'] + f'recombination_GA.png', dpi=300)

---

## Evolutionary Strategy

In [None]:
df_ES = pd.read_csv(DIRS['csv'] + 'ES_exp.csv', index_col=0)

In [None]:
# remove "extreme" outliers (everything below 0.93)
df_ES = df_ES[df_ES.max_avg_value > 0.92]

In [None]:
# mu+lambda vs. mu,lambda
df_comma = df_ES[df_ES['lambda'] == df_ES['pop_size']]
df_plus = df_ES[df_ES['lambda'] != df_ES['pop_size']]
fig = plot_split([df_comma, df_plus], ['\u03bc,\u03bb', '\u03bc+\u03bb'], '\u03bc,\u03bb vs. \u03bc+\u03bb in ES')
fig.savefig(DIRS['plots'] + f'elitism_ES1.png', dpi=300)

In [None]:
# pop_size 40 vs 100
df_40 = df_ES[df_ES['pop_size'] == '40']
df_100 = df_ES[df_ES['pop_size'] == '100']
fig = plot_split([df_40, df_100], ['40', '100'], 'population size in ES')
fig.savefig(DIRS['plots'] + f'pop_size_ES1.png', dpi=300)

In [None]:
# different taus (0.1, 0.2, 0.5, 0.99)
df_tau1 = df_ES[df_ES['tau'] == '0.1']
df_tau2 = df_ES[df_ES['tau'] == '0.2']
df_tau5 = df_ES[df_ES['tau'] == '0.5']
df_tau99 = df_ES[df_ES['tau'] == '0.99']
fig = plot_split([df_tau1, df_tau2, df_tau5, df_tau99], ['0.1', '0.2', '0.5', '0.99'], r'$\mathbf{\tau}$ in ES')
fig.savefig(DIRS['plots'] + f'tau_ES1.png', dpi=300)

In [None]:
# different sigmas (0.01, 0.1, 0.5)
df_sig1 = df_ES[df_ES['sigma'] == '0.01']
df_sig2 = df_ES[df_ES['sigma'] == '0.1']
df_sig5 = df_ES[df_ES['sigma'] == '0.5']
fig = plot_split([df_sig1, df_sig2, df_sig5], ['0.01', '0.1', '0.5'], r'$\mathbf{\sigma}$ in ES')
fig.savefig(DIRS['plots'] + f'sigma_ES1.png', dpi=300)

In [None]:
# different recombination methods (d, i, dg, ig)
df_d = df_ES[df_ES['recombination'] == 'd']
df_i = df_ES[df_ES['recombination'] == 'i']
df_dg = df_ES[df_ES['recombination'] == 'dg']
df_ig = df_ES[df_ES['recombination'] == 'ig']
fig = plot_split([df_d, df_i, df_dg, df_ig], ['discrete', 'intermediate', 'discrete global', 'intermediate global'], 'recombination in ES')
fig.savefig(DIRS['plots'] + f'recombination_ES1.png', dpi=300)

In [None]:
# individual sigmas
df_isig = df_ES[df_ES['individual_sigmas'] == 'True']
df_no_isig = df_ES[df_ES['individual_sigmas'] == 'False']
fig = plot_split([df_isig, df_no_isig], ['True', 'False'], 'individual sigmas in ES')
fig.savefig(DIRS['plots'] + f'individual_sigmas_ES1.png', dpi=300)

In [None]:
# chunk sizes (3, 7)
df_chsz3 = df_ES[df_ES['chunk_size'] == '3']
df_chsz7 = df_ES[df_ES['chunk_size'] == '7']
fig = plot_split([df_chsz3, df_chsz7], ['3', '7'], 'chunk size in ES')
fig.savefig(DIRS['plots'] + f'chunk_size_ES1.png', dpi=300)