In [1]:
import wandb
from tqdm import tqdm
import numpy as np

api = wandb.Api()

def load_timing_runs(acq, dim):
    runs = api.runs(path="ziv-scully-group/PandoraBayesOPT", filters={
        "sweep": "equk6hvr",
        "config.dim": dim,
        "config.problem": "Ackley",
        "config.policy": acq})
    
    configs_and_metrics = []
    for run in tqdm(runs):
        metric_keys = ["cumulative cost","runtime"]
        history = run.scan_history(keys = metric_keys, page_size=1_000_000_000)
        metrics = {k: [d[k] for d in history] for k in metric_keys}
        summary_metric_keys = ["global optimum value"]
        summary_history = run.scan_history(keys = summary_metric_keys, page_size=1_000_000_000)
        metrics.update({k: [d[k] for d in summary_history] for k in summary_metric_keys})
        configs_and_metrics.append((run.config, metrics))

    return configs_and_metrics

In [2]:
acquisition_functions = {
    'ExpectedImprovement':'ExpectedImprovement',
    'ThompsonSampling':'ThompsonSampling', 
    'KnowledgeGradient':'KnowledgeGradient',
    'MultiStepLookaheadEI':'MultiStepLookaheadEI',
    'Gittins_Lambda_0001':'Gittins_Lambda0001',
    }
dimensions = [4, 8, 16]

In [3]:
grouped_runs = {(a,d): load_timing_runs(a,d) for a in acquisition_functions.keys() for d in (dimensions)}

100%|██████████| 16/16 [00:20<00:00,  1.28s/it]
100%|██████████| 16/16 [00:19<00:00,  1.25s/it]
100%|██████████| 16/16 [00:21<00:00,  1.32s/it]
100%|██████████| 16/16 [00:20<00:00,  1.28s/it]
100%|██████████| 16/16 [00:22<00:00,  1.40s/it]
100%|██████████| 16/16 [00:22<00:00,  1.38s/it]
100%|██████████| 16/16 [00:21<00:00,  1.35s/it]
100%|██████████| 16/16 [00:20<00:00,  1.29s/it]
100%|██████████| 16/16 [00:19<00:00,  1.21s/it]
100%|██████████| 16/16 [00:22<00:00,  1.38s/it]
100%|██████████| 16/16 [00:21<00:00,  1.32s/it]
100%|██████████| 16/16 [00:25<00:00,  1.61s/it]
100%|██████████| 16/16 [00:20<00:00,  1.26s/it]
100%|██████████| 16/16 [00:20<00:00,  1.26s/it]
100%|██████████| 16/16 [00:20<00:00,  1.30s/it]


In [4]:
for a in acquisition_functions.keys():
    for d in dimensions:
        config_and_metrics_per_seed = grouped_runs[a,d]

        cumulative_cost_per_seed = np.array([m['cumulative cost'] for (_,m) in config_and_metrics_per_seed if len(m['cumulative cost'])>0]).T 
        runtime_per_seed = np.array([m['runtime'] for (_,m) in config_and_metrics_per_seed if len(m['runtime'])>0]).T
        
        print(a, d, runtime_per_seed.shape)

        runtime_25 = np.quantile(runtime_per_seed, 0.25, axis=1)
        runtime_50 = np.quantile(runtime_per_seed, 0.5, axis=1)
        runtime_75 = np.quantile(runtime_per_seed, 0.75, axis=1)

        output = np.stack((cumulative_cost_per_seed.mean(axis=1), runtime_25, runtime_50, runtime_75),axis=-1)

        np.savetxt(f"results/quartiles/timing/Timing_d{d}_{acquisition_functions[a]}.csv", output, header="cc, q25, q50, q75", delimiter=', ', comments='')

ExpectedImprovement 4 (40, 16)
ExpectedImprovement 8 (80, 16)
ExpectedImprovement 16 (160, 16)
ThompsonSampling 4 (40, 16)
ThompsonSampling 8 (80, 16)
ThompsonSampling 16 (160, 16)
KnowledgeGradient 4 (40, 16)
KnowledgeGradient 8 (80, 16)
KnowledgeGradient 16 (160, 16)
MultiStepLookaheadEI 4 (40, 16)
MultiStepLookaheadEI 8 (80, 16)
MultiStepLookaheadEI 16 (160, 16)
Gittins_Lambda_0001 4 (40, 16)
Gittins_Lambda_0001 8 (80, 16)
Gittins_Lambda_0001 16 (160, 16)


In [1]:
import wandb
from tqdm import tqdm
import numpy as np

api = wandb.Api()

def load_timing_runs(acq, dim):
    runs = api.runs(path="ziv-scully-group/PandoraBayesOPT", filters={
        "sweep": "xii0k0tg",
        "config.dim": dim,
        "config.problem": "Ackley",
        "config.policy": acq})
    
    configs_and_metrics = []
    for run in tqdm(runs):
        metric_keys = ["cumulative cost","runtime"]
        history = run.scan_history(keys = metric_keys, page_size=1_000_000_000)
        metrics = {k: [d[k] for d in history] for k in metric_keys}
        summary_metric_keys = ["global optimum value"]
        summary_history = run.scan_history(keys = summary_metric_keys, page_size=1_000_000_000)
        metrics.update({k: [d[k] for d in summary_history] for k in summary_metric_keys})
        configs_and_metrics.append((run.config, metrics))

    return configs_and_metrics

In [2]:
acquisition_functions = ['LogExpectedImprovement']
dimensions = [4, 8, 16]

In [3]:
grouped_runs = {(a,d): load_timing_runs(a,d) for a in acquisition_functions for d in (dimensions)}

100%|██████████| 16/16 [00:13<00:00,  1.20it/s]
100%|██████████| 16/16 [00:13<00:00,  1.17it/s]
100%|██████████| 16/16 [00:13<00:00,  1.15it/s]


In [4]:
for a in acquisition_functions:
    for d in dimensions:
        config_and_metrics_per_seed = grouped_runs[a,d]

        cumulative_cost_per_seed = np.array([m['cumulative cost'] for (_,m) in config_and_metrics_per_seed if len(m['cumulative cost'])>0]).T 
        runtime_per_seed = np.array([m['runtime'] for (_,m) in config_and_metrics_per_seed if len(m['runtime'])>0]).T
        
        print(a, d, runtime_per_seed.shape)

        runtime_25 = np.quantile(runtime_per_seed, 0.25, axis=1)
        runtime_50 = np.quantile(runtime_per_seed, 0.5, axis=1)
        runtime_75 = np.quantile(runtime_per_seed, 0.75, axis=1)

        output = np.stack((cumulative_cost_per_seed.mean(axis=1), runtime_25, runtime_50, runtime_75),axis=-1)

        np.savetxt(f"results/quartiles/timing/Timing_d{d}_{a}.csv", output, header="cc, q25, q50, q75", delimiter=', ', comments='')

LogExpectedImprovement 4 (40, 16)
LogExpectedImprovement 8 (80, 16)
LogExpectedImprovement 16 (160, 16)


In [6]:
for a in acquisition_functions:
    for d in dimensions:
        config_and_metrics_per_seed = grouped_runs[a,d]

        cumulative_cost_per_seed = np.array([m['cumulative cost'] for (_,m) in config_and_metrics_per_seed if len(m['cumulative cost'])>0]).T 
        runtime_per_seed = np.array([m['runtime'] for (_,m) in config_and_metrics_per_seed if len(m['runtime'])>0]).T
        
        print(a, d, runtime_per_seed.shape)

        mean = np.mean(runtime_per_seed, axis=1)
        se = np.std(runtime_per_seed, axis=1, ddof=1) / np.sqrt(runtime_per_seed.shape[1])
        
        mean_plus_2se = mean + 2 * se
        mean_minus_2se = mean - 2 * se

        output = np.stack((cumulative_cost_per_seed.mean(axis=1), mean_minus_2se, mean, mean_plus_2se),axis=-1)

        np.savetxt(f"results/SE/timing/Timing_d{d}_{a}.csv", output, header="cc, mean-2se, mean, mean+2se", delimiter=', ', comments='')

LogExpectedImprovement 4 (40, 16)
LogExpectedImprovement 8 (80, 16)
LogExpectedImprovement 16 (160, 16)
