In [None]:
import numpy as np
import pandas as pd
from hpobench.benchmarks.ml.tabular_benchmark import TabularBenchmark
from ConfigSpace.hyperparameters import OrdinalHyperparameter
from ConfigSpace.configuration_space import ConfigurationSpace

In [None]:
benchmark = TabularBenchmark('nn', 31)

In [None]:
def find_nearest_ordinal(value: float, hyperparameter_type: OrdinalHyperparameter):
    valid_values = np.array(hyperparameter_type.sequence)
    nearest = np.argmin((valid_values - value)**2).item()
    order = hyperparameter_type.get_seq_order()
    return hyperparameter_type.get_value(order[nearest])

def round_to_valid_config(values: dict, space: ConfigurationSpace):
    return {hyperparameter.name:find_nearest_ordinal(values[hyperparameter.name], hyperparameter) for hyperparameter in space.get_hyperparameters()}

In [None]:
invalid_config = {"alpha": 0.001, "batch_size": 32, "depth": 2.0, "learning_rate_init": 0.001, "width": 64}
valid_config = round_to_valid_config({"alpha": 0.001, "batch_size": 32, "depth": 2.0, "learning_rate_init": 0.001, "width": 64}, benchmark.configuration_space)
result = benchmark.objective_function(valid_config)
(result['function_value'], result['cost'])

In [None]:
inputs = benchmark.table[["alpha","batch_size","depth","learning_rate_init","width","iter"]]
results = pd.json_normalize(benchmark.table["result"])

# Distillation of benchmark into an MLP

In [None]:
from collections import OrderedDict
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

In [None]:
batch_size = 128
num_epochs = 1
num_hidden_units = 256
learning_rate = 0.005

In [None]:
model_y = nn.Sequential(OrderedDict([
    ('W1b', nn.Linear(6, num_hidden_units, bias=True)),
    ('activations', nn.ELU()),
    ('W2b', nn.Linear(num_hidden_units, 1, bias=True)),
]))

model_log_c = nn.Sequential(OrderedDict([
    ('W1b', nn.Linear(6, num_hidden_units, bias=True)),
    ('activations', nn.ELU()),
    ('W2b', nn.Linear(num_hidden_units, 1, bias=True)),
]))

In [None]:
X_tensor = torch.tensor(inputs.values, dtype=torch.float32)
y_tensor = torch.tensor(results["function_value"], dtype=torch.float32)
log_c_tensor = torch.log(torch.tensor(results["cost"], dtype=torch.float32))

In [None]:
(y_tensor.mean(), y_tensor.std(), y_tensor.min(), y_tensor.max())

In [None]:
(log_c_tensor.mean(), log_c_tensor.std(), log_c_tensor.min(), log_c_tensor.max())

In [None]:
loss_fn = nn.MSELoss()
optimizer_y = torch.optim.Adam(model_y.parameters(), lr=learning_rate)
optimizer_log_c = torch.optim.Adam(model_log_c.parameters(), lr=learning_rate)

In [None]:
dataset_y = TensorDataset(X_tensor,y_tensor)
dataset_log_c = TensorDataset(X_tensor,log_c_tensor)
dataloader_y = DataLoader(dataset_y, batch_size=128, shuffle=True)
dataloader_log_c = DataLoader(dataset_log_c, batch_size=128, shuffle=True)
size = X_tensor.shape[0]

In [None]:
for batch, (X, y) in enumerate(dataloader_y):
    pred = model_y(X)
    loss = loss_fn(pred, y)
    loss.backward()
    optimizer_y.step()
    optimizer_y.zero_grad()

    if batch % 100 == 0:
        loss, current = loss.item(), (batch + 1) * len(X)
        print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
for batch, (X, c) in enumerate(dataloader_log_c):
    pred = model_log_c(X)
    loss = loss_fn(pred, c)
    loss.backward()
    optimizer_log_c.step()
    optimizer_log_c.zero_grad()

    if batch % 100 == 0:
        loss, current = loss.item(), (batch + 1) * len(X)
        print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
torch.save(model_y.state_dict(), "distilled_nn_y_model.pickle")
torch.save(model_log_c.state_dict(), "distilled_nn_log_c_model.pickle")

## Test loading the models

In [None]:
test_num_hidden_units = 256

test_model_y = nn.Sequential(OrderedDict([
    ('W1b', nn.Linear(6, test_num_hidden_units, bias=True)),
    ('activations', nn.ELU()),
    ('W2b', nn.Linear(test_num_hidden_units, 1, bias=True)),
]))

test_model_log_c = nn.Sequential(OrderedDict([
    ('W1b', nn.Linear(6, test_num_hidden_units, bias=True)),
    ('activations', nn.ELU()),
    ('W2b', nn.Linear(test_num_hidden_units, 1, bias=True)),
]))

test_model_y.load_state_dict(torch.load("distilled_nn_y_model.pickle"))
test_model_log_c.load_state_dict(torch.load("distilled_nn_log_c_model.pickle"))