In [1]:
%%capture
from pprint import pprint

In [1]:
from LCBench.api import Benchmark
import os

os.makedirs("LCBench/cached", exist_ok=True)
bench_dir = "LCBench/cached/six_datasets_lw.json"
bench = Benchmark(bench_dir, cache=False)

==> Loading data...
==> No cached data found or cache set to False.
==> Reading json data...
==> Done.


In [2]:
import torch

# Set default tensor type to float64
torch.set_default_dtype(torch.float64)

In [3]:
def normalize_config(config):
    # Convert each value to a torch tensor (ensuring float type for calculations)
    batch = torch.tensor(config["batch_size"])
    lr = torch.tensor(config["learning_rate"])
    units = torch.tensor(config["max_units"])
    momentum = torch.tensor(config["momentum"])
    weight_decay = torch.tensor(config["weight_decay"])
    layers = torch.tensor(float(config["num_layers"]))
    dropout = torch.tensor(config["max_dropout"])
    
    # For log-scaled parameters: batch size, learning rate, and max units.
    batch_norm = (torch.log(batch) - torch.log(torch.tensor(16.0))) / (torch.log(torch.tensor(512.0)) - torch.log(torch.tensor(16.0)))
    lr_norm = (torch.log(lr) - torch.log(torch.tensor(1e-4))) / (torch.log(torch.tensor(1e-1)) - torch.log(torch.tensor(1e-4)))
    units_norm = (torch.log(units) - torch.log(torch.tensor(64.0))) / (torch.log(torch.tensor(1024.0)) - torch.log(torch.tensor(64.0)))
    
    # For linearly scaled parameters.
    momentum_norm = (momentum - 0.1) / (0.99 - 0.1)
    weight_decay_norm = (weight_decay - 1e-5) / (1e-1 - 1e-5)
    layers_norm = (layers - 1) / (4 - 1)
    
    # Dropout is already between 0 and 1.
    dropout_norm = dropout

    # Combine into a 7-dimensional tensor.
    normalized_vector = torch.stack([
        batch_norm, 
        lr_norm, 
        momentum_norm, 
        weight_decay_norm, 
        layers_norm, 
        units_norm, 
        dropout_norm
    ])
    
    return normalized_vector

In [4]:
all_x = []
all_y = []
all_c = []
dataset_name = "higgs"
for config_id in bench.data[dataset_name].keys():
    config = bench.query(dataset_name, "config", config_id)
    all_x.append(normalize_config(config))
    val_ce = bench.query(dataset_name, "final_val_cross_entropy", config_id)
    all_y.append(val_ce)
    runtime = bench.query(dataset_name, "time", config_id)[-1]
    all_c.append(runtime)

all_x = torch.stack(all_x)
all_y = torch.tensor(all_y).unsqueeze(1)
all_c = torch.tensor(all_c).unsqueeze(1)

In [5]:
all_x, all_y, all_c

(tensor([[0.4299, 0.4204, 0.1272,  ..., 0.6667, 0.5487, 0.0259],
         [0.9672, 0.6977, 0.0720,  ..., 1.0000, 0.9729, 0.5472],
         [0.8919, 0.1077, 0.3272,  ..., 0.0000, 0.8208, 0.3320],
         ...,
         [0.6750, 0.8598, 0.4454,  ..., 0.6667, 0.4707, 0.3635],
         [0.9691, 0.3290, 0.0093,  ..., 0.3333, 0.8684, 0.0437],
         [0.3666, 0.9906, 0.2041,  ..., 1.0000, 0.6681, 0.4045]]),
 tensor([[0.6380],
         [0.6931],
         [0.7014],
         ...,
         [0.6090],
         [0.6511],
         [0.6931]]),
 tensor([[215.9746],
         [876.6520],
         [126.5395],
         ...,
         [186.1553],
         [165.5783],
         [877.6618]]))

In [None]:
from pandora_automl.utils import fit_gp_model
import numpy as np
import math
from botorch.acquisition import LogExpectedImprovement
from pandora_automl.acquisition.log_ei_puc import LogExpectedImprovementWithCost
from botorch.acquisition import UpperConfidenceBound
from pandora_automl.acquisition.lcb import LowerConfidenceBound
from pandora_automl.acquisition.gittins import GittinsIndex
from pandora_automl.acquisition.stable_gittins import StableGittinsIndex

In [None]:
dim = 7
n_iter = 3
maximize = False
output_standardize = True
acq = "StablePBGI(1e-6)"

torch.manual_seed(15)
init_config_id = torch.randint(low=0, high=2000, size=(2*(dim+1),))
config_id_history = init_config_id.tolist()
print(f"  Initial config id: {config_id_history}")
x = all_x[init_config_id]
y = all_y[init_config_id]
c = all_c[init_config_id]
best_y_history = [y.min().item()]
best_id_history = [config_id_history[y.argmin().item()]]
cost_history = [0]
StablePBGI_1e_5_acq_history = [np.nan]
# StablePBGI_3e_6_acq_history = [np.nan]
StablePBGI_1e_6_acq_history = [np.nan]
# StablePBGI_3e_7_acq_history = [np.nan]
StablePBGI_1e_7_acq_history = [np.nan]
LogEIC_inv_acq_history = [np.nan]
LogEIC_exp_acq_history = [np.nan]
regret_upper_bound_history = [np.nan]

for i in range(n_iter):
    # 1. Fit a GP model on the current data.
    model = fit_gp_model(X=x, objective_X=y, cost_X=c, unknown_cost=True, output_standardize=output_standardize)
    
    # 2. Determine the best observed objective value.
    best_f = y.min()
        
    # 3. Define the acquisition function.
    StablePBGI_1e_5 = StableGittinsIndex(model=model, maximize=maximize, lmbda=1e-5, unknown_cost=True)
    # StablePBGI_3e_6 = StableGittinsIndex(model=model, maximize=maximize, lmbda=3e-6, unknown_cost=True)
    StablePBGI_1e_6 = StableGittinsIndex(model=model, maximize=maximize, lmbda=1e-6, unknown_cost=True)
    # StablePBGI_3e_7 = StableGittinsIndex(model=model, maximize=maximize, lmbda=3e-7, unknown_cost=True)
    StablePBGI_1e_7 = StableGittinsIndex(model=model, maximize=maximize, lmbda=1e-7, unknown_cost=True)
    LogEIC_inv = LogExpectedImprovementWithCost(model=model, best_f=best_f, maximize=maximize, unknown_cost=True, inverse_cost=True)
    LogEIC_exp = LogExpectedImprovementWithCost(model=model, best_f=best_f, maximize=maximize, unknown_cost=True, inverse_cost=False)
    single_outcome_model = fit_gp_model(X=x, objective_X=y, output_standardize=output_standardize)
    UCB = UpperConfidenceBound(model=single_outcome_model, maximize=maximize, beta=2 * np.log(dim * ((i + 1) ** 2) * (math.pi ** 2) / (6 * 0.1)) / 5)
    LCB = LowerConfidenceBound(model=single_outcome_model, maximize=maximize, beta=2 * np.log(dim * ((i + 1) ** 2) * (math.pi ** 2) / (6 * 0.1)) / 5)

    # 4. Evaluate the acquisition function on all candidate x's.
    # The unsqueeze operations add extra dimensions if required by your model.

    StablePBGI_1e_5_acq = StablePBGI_1e_5.forward(all_x.unsqueeze(1))
    StablePBGI_1e_6_acq = StablePBGI_1e_6.forward(all_x.unsqueeze(1))
    StablePBGI_1e_7_acq = StablePBGI_1e_7.forward(all_x.unsqueeze(1))
    LogEIC_inv_acq = LogEIC_inv.forward(all_x.unsqueeze(1))
    LogEIC_exp_acq = LogEIC_exp.forward(all_x.unsqueeze(1))
    UCB_acq = UCB.forward(all_x.unsqueeze(1))
    LCB_acq = LCB.forward(all_x.unsqueeze(1))

    # 5. Record information for stopping.
    num_configs = 2000
    all_ids = torch.arange(num_configs)
    mask = torch.ones(num_configs, dtype=torch.bool)
    mask[config_id_history] = False

    StablePBGI_1e_5_acq_history.append(torch.min(StablePBGI_1e_5_acq[mask]).item())
    # StablePBGI_3e_6_acq_history.append(torch.min(StablePBGI_3e_6_acq[mask]).item())
    StablePBGI_1e_6_acq_history.append(torch.min(StablePBGI_1e_6_acq[mask]).item())
    # StablePBGI_3e_7_acq_history.append(torch.min(StablePBGI_3e_7_acq[mask]).item())
    StablePBGI_1e_7_acq_history.append(torch.min(StablePBGI_1e_7_acq[mask]).item())
    LogEIC_inv_acq_history.append(torch.max(LogEIC_inv_acq[mask]).item())
    LogEIC_exp_acq_history.append(torch.max(LogEIC_exp_acq[mask]).item())
    regret_upper_bound_history.append(torch.min(UCB_acq).item() - torch.min(LCB_acq).item())

    # 6. Select the candidate with the optimal acquisition value.
    candidate_ids = all_ids[mask]

    if acq == "StablePBGI(1e-5)":
        candidate_acqs = StablePBGI_1e_5_acq[mask]
        new_config_id = candidate_ids[torch.argmin(candidate_acqs)]
        new_config_acq = torch.min(candidate_acqs)
    # if acq == "StablePBGI(3e-6)":
    #     candidate_acqs = StablePBGI_3e_6_acq[mask]
    #     new_config_id = candidate_ids[torch.argmin(candidate_acqs)]
    #     new_config_acq = torch.min(candidate_acqs)
    if acq == "StablePBGI(1e-6)":
        candidate_acqs = StablePBGI_1e_6_acq[mask]
        new_config_id = candidate_ids[torch.argmin(candidate_acqs)]
        new_config_acq = torch.min(candidate_acqs)
    # if acq == "StablePBGI(3e-7)":
    #     candidate_acqs = StablePBGI_3e_7_acq[mask]
    #     new_config_id = candidate_ids[torch.argmin(candidate_acqs)]
    #     new_config_acq = torch.min(candidate_acqs)
    if acq == "StablePBGI(1e-7)":
        candidate_acqs = StablePBGI_1e_7_acq[mask]
        new_config_id = candidate_ids[torch.argmin(candidate_acqs)]
        new_config_acq = torch.min(candidate_acqs)
    if acq == "LogEIC-inv":
        candidate_acqs = LogEIC_inv_acq[mask]
        new_config_id = candidate_ids[torch.argmax(candidate_acqs)]
        new_config_acq = torch.max(candidate_acqs)
    if acq == "LogEIC-exp":
        candidate_acqs = LogEIC_exp_acq[mask]
        new_config_id = candidate_ids[torch.argmax(candidate_acqs)]
        new_config_acq = torch.max(candidate_acqs)
    if acq == "LCB":
        candidate_acqs = LCB_acq[mask]
        new_config_id = candidate_ids[torch.argmin(candidate_acqs)]
        new_config_acq = torch.min(candidate_acqs)

    new_config_x = all_x[new_config_id]
    
    # 7. Query the objective for the new configuration.
    new_config_y = all_y[new_config_id]
    new_config_c = all_c[new_config_id]
    
    # 8. Append the new data to our training set.
    x = torch.cat([x, new_config_x.unsqueeze(0)], dim=0)
    y = torch.cat([y, new_config_y.unsqueeze(0)], dim=0)
    c = torch.cat([c, new_config_c.unsqueeze(0)], dim=0)
    config_id_history.append(new_config_id.item())
    best_y_history.append(best_f.item())
    best_id_history.append(config_id_history[y.argmin().item()])
    cost_history.append(new_config_c.item())

    print(f"Iteration {i + 1}:")
    print(f"  Selected config_id: {new_config_id}")
    print(f"  Acquisition value: {new_config_acq.item():.4f}")
    print(f"  Objective (final_val_cross_entropy): {new_config_y.item():.4f}")
    print(f"  Cost (time): {new_config_c.item():.4f}")
    print(f"  Current best observed: {best_f.item():.4f}")
    print()


best_y_history.append(y.min().item())

  Initial config id: [232, 1429, 140, 373, 247, 624, 1804, 811, 1959, 619, 1653, 470, 1727, 1453, 1409, 765]
Iteration 1:
  Selected config_id: 1897
  Acquisition value: 0.5932
  Objective (final_val_cross_entropy): 0.6171
  Cost (time): 186.8864
  Current best observed: 0.6185

Iteration 2:
  Selected config_id: 1691
  Acquisition value: 0.5886
  Objective (final_val_cross_entropy): 0.6150
  Cost (time): 163.1292
  Current best observed: 0.6171

Iteration 3:
  Selected config_id: 1578
  Acquisition value: 0.5881
  Objective (final_val_cross_entropy): 0.5781
  Cost (time): 182.8854
  Current best observed: 0.6150

