In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from glob import glob

In [None]:
# paths = [
#     "../results/bo/ackley53/*_iters.csv",
#      "../results/*dim53Ackley*_iters.csv"
# ]
# paths = [
#     "../results/*/*/*_iters.csv"
# ]
paths = [
    "../results/*/*/*ackley*_iters.csv"
]
res = pd.concat(
    pd.read_csv(f) for p in paths for f in glob(p)
)
res.head()

In [None]:
res['iter_total_time'] = res['iter_eval_time'] + res['iter_model_time']

In [None]:
resm = res.groupby(['approach', 'problem', 'iter_idx']).agg(
    iter_model_time_mean=pd.NamedAgg(column='iter_model_time', aggfunc=np.mean),
    iter_model_time_std=pd.NamedAgg(column='iter_model_time', aggfunc=np.std),
    iter_model_time_q05=pd.NamedAgg(column='iter_model_time', aggfunc=lambda x: np.quantile(x, 0.05)),
    iter_model_time_q95=pd.NamedAgg(column='iter_model_time', aggfunc=lambda x: np.quantile(x, 0.95)),
    iter_total_time_mean=pd.NamedAgg(column='iter_total_time', aggfunc=np.mean),
    iter_total_time_std=pd.NamedAgg(column='iter_total_time', aggfunc=np.std),
    iter_total_time_q05=pd.NamedAgg(column='iter_total_time', aggfunc=lambda x: np.quantile(x, 0.05)),
    iter_total_time_q95=pd.NamedAgg(column='iter_total_time', aggfunc=lambda x: np.quantile(x, 0.95)),
    iter_best_fitness_mean=pd.NamedAgg(column='iter_best_fitness', aggfunc=np.mean),
    iter_best_fitness_std=pd.NamedAgg(column='iter_best_fitness', aggfunc=np.std),
    iter_best_fitness_q05=pd.NamedAgg(column='iter_best_fitness', aggfunc=lambda x: np.quantile(x, 0.05)),
    iter_best_fitness_q95=pd.NamedAgg(column='iter_best_fitness', aggfunc=lambda x: np.quantile(x, 0.95)),
)
resm = resm.reset_index()
resm['cum_total_time_mean'] = resm.groupby(['approach', 'problem'])['iter_total_time_mean'].cumsum()

resm.head()

In [None]:
resmbest = resm.sort_values(by='cum_total_time_mean') 
minima = resm.groupby('problem')['cum_total_time_mean'].min()
maxima = resm.groupby('problem')['cum_total_time_mean'].max()

resmbest['temporal_best_mean'] = resmbest.groupby('problem')['iter_best_fitness_mean'].cummin()
resmbest = resmbest[resmbest['temporal_best_mean'] == resmbest['iter_best_fitness_mean']]
# So the minima are per-problem, which means we have to do some trickery to add this as a column.
# reset_index drops the problem labels, and sets it to an ascending numberical order.
# As our list is sorted by value (by the above listing) this index is not in the same order, so we need to replace it.
ct_min = minima[resmbest['problem']].reset_index(drop=True)
ct_min.index = resmbest.index
resmbest['ct_min'] = ct_min
ct_max = maxima[resmbest['problem']].reset_index(drop=True)
ct_max.index = resmbest.index
resmbest['ct_max'] = ct_max

resmbest['cum_total_time_mean_prev'] = resmbest.groupby('problem')['cum_total_time_mean'].shift(1)
# resmbest['cum_total_time_mean_prev'][resmbest['cum_total_time_mean_prev'].isna()] = resmbest['ct_min'][resmbest['cum_total_time_mean_prev'].isna()]
resmbest['cum_total_time_mean_next'] = resmbest.groupby('problem')['cum_total_time_mean'].shift(-1)
# resmbest['cum_total_time_mean_next'][resmbest['cum_total_time_mean_next'].isna()] = resmbest['ct_max'][resmbest['cum_total_time_mean_next'].isna()]
resmbest['best_start'] = np.nanmean([resmbest['cum_total_time_mean'], resmbest['cum_total_time_mean_prev']], axis=0)
starts = resmbest['cum_total_time_mean_prev'].isna()
resmbest['best_start'][starts] = resmbest['ct_min'][starts]
resmbest['best_end'] = np.nanmean([resmbest['cum_total_time_mean'], resmbest['cum_total_time_mean_next']], axis=0)
ends = resmbest['cum_total_time_mean_next'].isna()
resmbest['best_end'][ends] = resmbest['ct_max'][ends]
resmbest.head(100)

In [None]:
resmbests = pd.DataFrame({'problem': resmbest['problem'], 'approach': resmbest['approach'], 'x': zip(resmbest['best_start'], resmbest['best_end'])})
resmbests = resmbests.explode('x')
resmbests['x'] = resmbests['x'].astype(float)

In [None]:
resps = resm.groupby('problem')
resbestps = {p: d for (p, d) in resmbests.groupby('problem')}


colors = cm.get_cmap('tab10')
uniq_approaches = resm['approach'].unique()
n_approaches = len(uniq_approaches)
approachtocolor = {a: colors(i / (n_approaches - 1)) for (i, a) in enumerate(uniq_approaches)}
# margintype = 'std'
margintype = 'q5-95'

for problem, respsi in resps:
    fig = plt.figure()
    ax = fig.subplots()

    ax.set_title(problem)
    ax.set_xscale('log')
    if "SteelFoldPlate" in problem:
        ax.set_yscale('log')
    ax.set_ylabel("Fitness")
    ax.set_xlabel("Time (s)")


    bests = resbestps[problem]
    

    for (idx, (approach, sr)) in enumerate(respsi.groupby('approach')):
        # x = np.cumsum(sr['iter_model_time_mean'])
        x = np.cumsum(sr['iter_total_time_mean'])
        y = sr['iter_best_fitness_mean']
        if "SteelFoldPlate" in problem:
            y = y + 1.0

        color = approachtocolor[approach]
        ax.plot(x, y, label=approach, color=color)

        plot_margin = True
        # xstd = sr['iter_model_time_std']
        if margintype == 'std':
            # print("Plotting standard deviation")
            xstd = sr['iter_total_time_std']
            xmin = x - xstd
            xmax = x + xstd
            ystd = sr['iter_best_fitness_std']
            ymin = y - ystd
            ymax = y + ystd
        elif margintype == 'q5-95':
            # print("Plotting quantiles.")
            xmin = sr['iter_total_time_q05']
            xmax = sr['iter_total_time_q95']
            ymin = sr['iter_best_fitness_q05']
            ymax = sr['iter_best_fitness_q95']
        else:
            plot_margin = False

        if plot_margin:
            # print(f"Adding margin.\n{ymin}\n{ymax}")
            ax.fill_between(x, ymin, ymax, alpha=0.1, color=color)
        
        bests['x']
        ax.fill_between(np.array(bests['x']), 0, 0.025, color=color, alpha=0.5, where=np.array(bests['approach'] == approach), interpolate=False, transform=ax.get_xaxis_transform())

    ax.legend()