In [None]:
import numpy as np
import os
# Specify the directory of the data.
datadir = ''
import pandas as pd
import yaml
import json
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable
%matplotlib widget
import seaborn as sns
sns.set()

In [None]:
dirs = [os.path.join(datadir, x) for x in next(os.walk(datadir))[1]]
dirs.sort(key=lambda x: int(x.split('/')[-1]))

In [None]:
a = np.load(os.path.join(dirs[0], 'result.npz'))
conf = os.path.join(os.path.join(dirs[0], '.hydra'), 'hydra.yaml')
with open(conf, "r") as file:
    d = yaml.safe_load(file)

### Read experiment results

In [None]:
import pdb
def read_results(dirs, sweeped):
    alg = None
    results = {}
    for path in dirs:
        conf = os.path.join(os.path.join(path, '.hydra'), 'hydra.yaml')
        with open(conf, "r") as file:
            d = yaml.safe_load(file)
        try:
            data = dict(np.load(os.path.join(path, 'result.npz')))
            adata = dict(np.load(os.path.join(path, 'aresult.npz')))
        except:
            continue
    
        for override in d['hydra']['overrides']['task']:
            if 'alg=' in override:
                alg = override.split("=")[-1]
        if alg is None:
            with open(os.path.join(os.path.join(path, '.hydra'), 'config.yaml'), "r") as file:
                d = yaml.safe_load(file)
            alg = d['alg']['_target_'].split('.')[-1]
        aalg = 'A' + alg        
        if alg in results.keys():
            for key, val in data.items():
                results[alg][key].append(val)
            for key, val in adata.items():
                results[aalg][key].append(adata[key])
        else:
            results[alg] = {key: [val] for key, val in data.items()}
            results[aalg] = {key: [val] for key, val in adata.items()}
        alg = None
    return results

### Different fractions

In [None]:
%pdb on
results = read_results(dirs, "fraction")

In [None]:
alg = 'AHGP'
m = len(results[alg]['fraction'][0])
d_results = {}
for alg in results.keys():
    d_results[alg] = {}
    if "fraction" in results[alg].keys():
        for metric in results[alg].keys():
            d_results[alg][metric] = np.array(results[alg][metric])
    else:
        for metric in results[alg].keys():
            d_results[alg][metric] = np.array(results[alg][metric]).repeat(m).reshape(-1, m)

### Save results to csv
This saves to a .csv to produce the data necessary to recreate Figure 2 in the paper -- the figure itself is constructed in pgfplots.

In [None]:
alg = 'AHGP'
to_save = dict()
to_save['x'] = d_results[alg]['fraction']
to_save['m'] = d_results[alg]['m']
to_save['M'] = d_results[alg[1:]]['m']
# to_save['kl'] = d_results[alg]['kl']
# to_save['rmse'] = d_results[alg]['rmse']
for key in ['kl', 'rmse', 'nlpd', 'time_max', 'time_min']:
    to_save[key] = d_results[alg][key] / d_results[alg[1:]][key]
to_save = pd.DataFrame({key: val.T.flatten() for key, val in to_save.items()})
with open("adaptiveselection.csv", "w") as file:
    to_save.to_csv(file, index=False)

### Recreate Additional Table
The additional material contains a table -- this is recreated below. The table itself is TeX-primitives.

In [None]:
fractions = np.array([4, 7, 9])
Ls = np.array([3, 6, 12, 18])
d_results['AHGP'].keys()
t_results = {}
for alg in d_results.keys():
    t_results[alg] = {metric: d_results[alg][metric][Ls[:,None], fractions].flatten() for metric in d_results[alg].keys()}
t_results['HGP']['fraction'] = t_results['AHGP']['fraction']
t_results['AHGP']['m'] = t_results['HGP']['m']

ahgp_df = pd.DataFrame(t_results['AHGP']).assign(alg=['AHGP']*12)
hgp_df = pd.DataFrame(t_results['HGP']).assign(alg=['HGP']*12)
df = pd.concat([ahgp_df, hgp_df]).set_index(['m', 'fraction', 'alg']).sort_index()

aggf = lambda x, scale: "{:.3f}/{:.3f}".format(x.loc[:, :, 'AHGP'].values[0]/scale, x.loc[:, :, 'HGP'].values[0]/scale)
grouped = df.groupby(by=['m', 'fraction'], group_keys=False).agg({'kl': lambda x: aggf(x, 1e4),
                                                       'nlpd': lambda x: aggf(x, 1e2),
                                                       'rmse': lambda x: aggf(x, 1e-1),
                                                       'time_min': lambda x: aggf(x, 1),
                                                       'time_max': lambda x: aggf(x, 1)})

##### Map the colors from Figure 2 to the Table

In [None]:
import matplotlib as mpl
cmap = mpl.colors.LinearSegmentedColormap.from_list("", np.flip(np.array([[202,0,32, 255],
[244,165,130, 255],
[247,247,247, 255],
[146,197,222, 255],
[5,113,176, 255]])/255, axis=0)).resampled(5)

A = grouped.to_numpy()
B = np.vectorize(eval)(A)
time_norm = mpl.colors.Normalize(vmin=0, vmax=2)
prob_norm = mpl.colors.Normalize(vmin=1, vmax=3)
rms_norm = mpl.colors.Normalize(vmin=1, vmax=2)
C_prob = cmap(prob_norm(B[:,:2]))
C_rms = cmap(rms_norm(B[:, [2]]))
C_time = cmap(time_norm(B[:, 3:]))
C = np.concatenate([C_prob,C_rms,C_time],axis=1)

##### Recreate the Table in Figure form
Note that this is *not* how the table is created, but more or less at least.

In [None]:
import tikzplotlib
plt.close("all")
with sns.axes_style("white"):
    fig, ax = plt.subplots(1, 1, figsize=(9, 6))
    # pc = plt.pcolor(B, edgecolors='k', linestyle="dashed", linewidth=0.2, vmin=0, vmax=2, cmap=cmap.resampled(5))
    pc = plt.pcolormesh(C, edgecolor='k', linestyle="dashed", linewidth=.2)
    # ax.set_aspect('equal', 'box')
    ax.set_yticks(np.arange(B.shape[0]) + 0.5, minor=False)
    ax.set_xticks(np.arange(B.shape[1]) + 0.5, minor=False)
    pc.update_scalarmappable()
    ax = pc.axes
    fmt = "%.2f"
    for p, color, value in zip(pc.get_paths(), pc.get_facecolors().reshape(-1, 4), A.flatten()):
        x, y = p.vertices[:-2, :].mean(0)
        if np.all(color[:3] > 0.5):
            color = (0.0, 0.0, 0.0)
        else:
            color = (1.0, 1.0, 1.0)
        v = value.split("/")
        # if x < 1:
        s = value.replace("/", " (") + ")"
        txt = r'$\frac{{{}}}{{{}}}$'.format(v[0], v[1])
        ax.text(x-.166, y+.166, s, ha="center", va="center", color=color, fontsize=10)
    ax.tick_params(top=False, labeltop=True, bottom=False, labelbottom=False, left=False)
    ax.set_xticklabels([r"kl $(10^4)$", r"nlpd $(10^2)$", r"rmse $(10^{-1})$", r"$t_{min}$", r"$t_{max}$"])
    ax.invert_yaxis()
    ax.set_yticklabels(list(grouped.index.levels[1].to_numpy()) * 4)
    ax2 = ax.twinx()
    ax2.set(ylim=ax.get_ylim(), yticks=[0.5, 3.5, 6.5, 9.5])
    ax2.set_yticklabels(list(grouped.index.levels[0].to_numpy()))
    ax2.tick_params(left=False, right=False, labelleft=True, labelright=False, pad=45)
    # tikzplotlib.save("timing_table.tex", standalone=True)
    plt.show()

## Mock-ups of Figure 2
The figure will likely not be identical to the paper -- it's a random function and the predictive time depends on the platform.

In [None]:
alg = 'AHGP'
minmaxes = [(1, 3), (1, 3), (1, 2), (0, 2)]
plt.close("all")
fig, ax = plt.subplots(2, 2, figsize=(8, 8))
for i, key in enumerate(['kl', 'nlpd', 'rmse', 'time_min']):
    axi = ax[i//2, i%2]
    C = d_results[alg][key] / d_results[alg[1:]][key]
    pc = axi.pcolormesh(d_results[alg]['fraction'], d_results[alg[1:]]['m'], C, edgecolor='gray', linewidth=.1,
                        cmap=cmap,
                        vmin=minmaxes[i][0],
                        vmax=minmaxes[i][1])
    axi.set(yscale="log", xscale="log", 
            ylim=[d_results[alg[1:]]['m'].min(), d_results[alg[1:]]['m'].max()],
            xlim=[d_results[alg]['fraction'].min(), d_results[alg]['fraction'].max()],
           title=key)
plt.show()