In [1]:
import wandb
from tqdm import tqdm
import numpy as np

api = wandb.Api()

def load_impact_of_lmbda_runs(acq):
    runs = api.runs(path="ziv-scully-group/PandoraBayesOPT", filters={
        "sweep": "rsc6fhos",
        "config.amplitude": 1,
        "config.dim": 8,
        "config.kernel": "Matern52",
        "config.lengthscale": 0.1,  
        "config.policy": acq})
    
    configs_and_metrics = []
    for run in tqdm(runs):
        metric_keys = ["cumulative cost","best observed","lmbda"]
        history = run.scan_history(keys = metric_keys, page_size=1_000_000_000)
        metrics = {k: [d[k] for d in history] for k in metric_keys}
        summary_metric_keys = ["global optimum value"]
        summary_history = run.scan_history(keys = summary_metric_keys, page_size=1_000_000_000)
        metrics.update({k: [d[k] for d in summary_history] for k in summary_metric_keys})
        configs_and_metrics.append((run.config, metrics))

    return configs_and_metrics

In [2]:
acquisition_functions = [
    'Gittins_Lmbda_1_Step_Divide2',
    'Gittins_Lmbda_1_Step_Divide10',
    'Gittins_Lmbda_1_Step_Divide100',
    'Gittins_Lmbda_01_Step_Divide2',
    'Gittins_Lmbda_01_Step_Divide10',
    'Gittins_Lmbda_01_Step_Divide100',
    'Gittins_Lmbda_001_Step_Divide2',
    'Gittins_Lmbda_001_Step_Divide10',
    'Gittins_Lmbda_001_Step_Divide100',
    'Gittins_Lmbda_0001_Step_Divide2',
    'Gittins_Lmbda_0001_Step_Divide10',
    'Gittins_Lmbda_0001_Step_Divide100'
]

In [3]:
grouped_runs = {(a): load_impact_of_lmbda_runs(a) for a in acquisition_functions}

100%|██████████| 16/16 [00:14<00:00,  1.07it/s]
100%|██████████| 16/16 [00:15<00:00,  1.04it/s]
100%|██████████| 16/16 [00:15<00:00,  1.04it/s]
100%|██████████| 16/16 [00:16<00:00,  1.04s/it]
100%|██████████| 16/16 [00:39<00:00,  2.48s/it]
100%|██████████| 16/16 [00:21<00:00,  1.32s/it]
100%|██████████| 16/16 [00:15<00:00,  1.02it/s]
100%|██████████| 16/16 [00:15<00:00,  1.03it/s]
100%|██████████| 16/16 [00:16<00:00,  1.03s/it]
100%|██████████| 16/16 [00:15<00:00,  1.02it/s]
100%|██████████| 16/16 [00:15<00:00,  1.02it/s]
100%|██████████| 16/16 [00:16<00:00,  1.00s/it]


In [5]:
for a in acquisition_functions:
    config_and_metrics_per_seed = grouped_runs[a]

    cumulative_cost_per_seed = np.array([m['cumulative cost'] for (_,m) in config_and_metrics_per_seed]).T
    best_observed_per_seed = np.array([m['best observed'] for (_,m) in config_and_metrics_per_seed]).T
    # Handling potential empty arrays
    if cumulative_cost_per_seed.size == 0 or best_observed_per_seed.size == 0:
        continue  # Skip this iteration if there's no data
    global_optimum_per_seed = np.array([m['global optimum value'][0] for (_,m) in config_and_metrics_per_seed if len(m['cumulative cost'])>0 and len(m['best observed'])>0])

    regret_per_seed = global_optimum_per_seed - best_observed_per_seed
    print(a, regret_per_seed.shape)

    regret_25 = np.quantile(regret_per_seed, 0.25, axis=1)
    regret_50 = np.quantile(regret_per_seed, 0.5, axis=1)
    regret_75 = np.quantile(regret_per_seed, 0.75, axis=1)

    output = np.stack((cumulative_cost_per_seed.mean(axis=1), regret_25, regret_50, regret_75),axis=-1)

    np.savetxt(f"results/gittins_step_divide/{a}.csv", output, header="cc, q25, q50, q75", delimiter=', ', comments='')

Gittins_Lmbda_1_Step_Divide2 (200, 16)
Gittins_Lmbda_1_Step_Divide10 (200, 16)
Gittins_Lmbda_1_Step_Divide100 (200, 16)
Gittins_Lmbda_01_Step_Divide2 (200, 16)
Gittins_Lmbda_01_Step_Divide10 (200, 16)
Gittins_Lmbda_01_Step_Divide100 (200, 16)
Gittins_Lmbda_001_Step_Divide2 (200, 16)
Gittins_Lmbda_001_Step_Divide10 (200, 16)
Gittins_Lmbda_001_Step_Divide100 (200, 16)
Gittins_Lmbda_0001_Step_Divide2 (200, 16)
Gittins_Lmbda_0001_Step_Divide10 (200, 16)
Gittins_Lmbda_0001_Step_Divide100 (200, 16)


In [7]:
for a in acquisition_functions:
    config_and_metrics_per_seed = grouped_runs[a]

    lmbda_per_seed = np.array([m['lmbda'] for (_,m) in config_and_metrics_per_seed]).T
    # Handling potential empty arrays
    if lmbda_per_seed.size == 0:
        continue  # Skip this iteration if there's no data

    print(a, lmbda_per_seed.shape)

    lmbda_25 = np.quantile(lmbda_per_seed, 0.25, axis=1)
    lmbda_50 = np.quantile(lmbda_per_seed, 0.5, axis=1)
    lmbda_75 = np.quantile(lmbda_per_seed, 0.75, axis=1)

    output = np.stack((cumulative_cost_per_seed.mean(axis=1), lmbda_25, lmbda_50, lmbda_75),axis=-1)

    np.savetxt(f"results/lmbda_history/{a}.csv", output, header="cc, q25, q50, q75", delimiter=', ', comments='')

Gittins_Lmbda_1_Step_Divide2 (200, 16)
Gittins_Lmbda_1_Step_Divide10 (200, 16)
Gittins_Lmbda_1_Step_Divide100 (200, 16)
Gittins_Lmbda_01_Step_Divide2 (200, 16)
Gittins_Lmbda_01_Step_Divide10 (200, 16)
Gittins_Lmbda_01_Step_Divide100 (200, 16)
Gittins_Lmbda_001_Step_Divide2 (200, 16)
Gittins_Lmbda_001_Step_Divide10 (200, 16)
Gittins_Lmbda_001_Step_Divide100 (200, 16)
Gittins_Lmbda_0001_Step_Divide2 (200, 16)
Gittins_Lmbda_0001_Step_Divide10 (200, 16)
Gittins_Lmbda_0001_Step_Divide100 (200, 16)


In [2]:
acquisition_functions = {
    'Gittins_Lambda_1':'Gittins_Lambda1',
    'Gittins_Lambda_01':'Gittins_Lambda01',
    'Gittins_Lambda_001':'Gittins_Lambda001',
    'Gittins_Lambda_0001':'Gittins_Lambda0001',
    'Gittins_Lambda_00001':'Gittins_Lambda00001'
    }

In [3]:
grouped_runs = {(a): load_impact_of_lmbda_runs(a) for a in acquisition_functions.keys()}

100%|██████████| 16/16 [00:20<00:00,  1.25s/it]
100%|██████████| 16/16 [00:20<00:00,  1.27s/it]
100%|██████████| 16/16 [00:19<00:00,  1.20s/it]
100%|██████████| 16/16 [00:18<00:00,  1.19s/it]
100%|██████████| 16/16 [00:19<00:00,  1.21s/it]


In [4]:
for a in acquisition_functions.keys():
    config_and_metrics_per_seed = grouped_runs[a]

    cumulative_cost_per_seed = np.array([m['cumulative cost'] for (_,m) in config_and_metrics_per_seed if len(m['cumulative cost'])>0 and len(m['best observed'])>0]).T
    best_observed_per_seed = np.array([m['best observed'] for (_,m) in config_and_metrics_per_seed if len(m['cumulative cost'])>0 and len(m['best observed'])>0]).T
    # Handling potential empty arrays
    if cumulative_cost_per_seed.size == 0 or best_observed_per_seed.size == 0:
        continue  # Skip this iteration if there's no data
    global_optimum_per_seed = np.array([m['global optimum value'][0] for (_,m) in config_and_metrics_per_seed if len(m['cumulative cost'])>0 and len(m['best observed'])>0])

    regret_per_seed = global_optimum_per_seed - best_observed_per_seed
    print(a, regret_per_seed.shape)

    regret_25 = np.quantile(regret_per_seed, 0.25, axis=1)
    regret_50 = np.quantile(regret_per_seed, 0.5, axis=1)
    regret_75 = np.quantile(regret_per_seed, 0.75, axis=1)

    output = np.stack((cumulative_cost_per_seed.mean(axis=1), regret_25, regret_50, regret_75),axis=-1)

    np.savetxt(f"results/gittins_step_divide/{acquisition_functions[a]}.csv", output, header="cc, q25, q50, q75", delimiter=', ', comments='')

Gittins_Lambda_1 (200, 16)
Gittins_Lambda_01 (200, 16)
Gittins_Lambda_001 (200, 16)
Gittins_Lambda_0001 (200, 16)
Gittins_Lambda_00001 (200, 16)
