In [4]:
import wandb
from tqdm import tqdm
import numpy as np

api = wandb.Api()

def load_lcbench_varying_cost_runs(data, acq):
    runs = api.runs(path="ziv-scully-group/StoppingBayesOptEmpirical", filters={
        "sweep": "0ajk7zy5",
        "config.dataset_name": data,
        "config.acquisition_function": acq},
        )
    
    configs_and_metrics = []
    for run in tqdm(runs):
        metric_keys = ["config id", "cumulative cost", "estimated cumulative cost", "current best id", "current best observed", "StablePBGI(1e-3) acq", "StablePBGI(1e-4) acq", "StablePBGI(1e-5) acq", "LogEIC-inv acq", "LogEIC-exp acq", "regret upper bound", "exp min regret gap", "PRB"]
        history = list(run.scan_history(keys = metric_keys, page_size=1_000_000_000))
        metrics = {k: [d[k] for d in history] for k in metric_keys}
        configs_and_metrics.append((run.config, metrics))

    return configs_and_metrics

In [5]:
dataset_names = ['Fashion-MNIST', 'adult', 'higgs', 'volkert']
acquisition_functions = {
        "LogEIC-inv": "LogEIC-inv",
        "LogEIC-exp": "LogEIC-exp",
        "StablePBGI(1e-3)": "PBGI(1e-3)",
        "StablePBGI(1e-4)": "PBGI(1e-4)", 
        "StablePBGI(1e-5)": "PBGI(1e-5)", 
        "LCB": "LCB",
        "TS": "TS"
    }

In [None]:
grouped_runs = {(d, a): load_lcbench_varying_cost_runs(d, a) for d in dataset_names for a in acquisition_functions}

In [None]:
from collections import defaultdict
import numpy as np

# Create a single nested defaultdict for all metrics
metrics_per_acq = defaultdict(lambda: defaultdict(dict))

for d in dataset_names:
    for a in acquisition_functions.keys():
        config_and_metrics_per_seed = grouped_runs[(d, a)]

        # Extract metrics along with the seeds for sorting
        seeds_and_metrics = [
            (
                config['seed'],
                m['cumulative cost'],
                m['estimated cumulative cost'],
                m['config id'],
                m['current best id'],
                m['current best observed'],
                m['StablePBGI(1e-3) acq'],
                m['StablePBGI(1e-4) acq'],
                m['StablePBGI(1e-5) acq'],
                m['LogEIC-inv acq'],
                m['LogEIC-exp acq'],
                m['exp min regret gap'],
                m['regret upper bound'],
                m['PRB']
            )
            for config, m in config_and_metrics_per_seed
            if len(m['current best observed']) > 0
        ]

        # Sort by seed
        seeds_and_metrics.sort(key=lambda x: x[0])

        # Extract sorted metrics
        metric_keys = [
            "cumulative cost",
            'estimated cumulative cost',
            "config id",
            "current best id",
            "current best observed",
            "PBGI(1e-3) acq",
            "PBGI(1e-4) acq",
            "PBGI(1e-5) acq",
            "LogEIC-inv acq",
            "LogEIC-exp acq",
            "exp min regret gap",
            "regret upper bound",
            "PRB"
        ]

        arrays = [
            np.array([item[i + 1] for item in seeds_and_metrics], dtype=int if "id" in key else float)
            for i, key in enumerate(metric_keys)
        ]

        print(d, a, arrays[1].shape)  # config_id is arrays[1]

        for key, array in zip(metric_keys, arrays):
            metrics_per_acq[d][acquisition_functions[a]][key] = array

In [None]:
import pickle
import os

# Save processed metrics to disk
save_path = "lcbench_unknown_cost_metrics_per_acq.pkl"
with open(save_path, "wb") as f:
    pickle.dump(metrics_per_acq, f)
print(f"Saved lcbench_unknown_cost_metrics_per_acq to {save_path}")

In [None]:
import pickle
import os

save_path = "lcbench_unknown_cost_metrics_per_acq.pkl"

if os.path.exists(save_path):
    with open(save_path, "rb") as f:
        metrics_per_acq = pickle.load(f)
    print(f"Loaded lcbench_unknown_cost_metrics_per_acq from {save_path}")
    skip_download = True
else:
    skip_download = False