In [None]:
import numpy as np
import pandas as pd
from hpobench.benchmarks.ml.tabular_benchmark import TabularBenchmark
from ConfigSpace.hyperparameters import OrdinalHyperparameter
from ConfigSpace.configuration_space import ConfigurationSpace

In [None]:
benchmark = TabularBenchmark('nn', 31)

In [None]:
def find_nearest_ordinal(value: float, hyperparameter_type: OrdinalHyperparameter):
    valid_values = np.array(hyperparameter_type.sequence)
    nearest = np.argmin((valid_values - value)**2).item()
    order = hyperparameter_type.get_seq_order()
    return hyperparameter_type.get_value(order[nearest])

def round_to_valid_config(values: dict, space: ConfigurationSpace):
    return {hyperparameter.name:find_nearest_ordinal(values[hyperparameter.name], hyperparameter) for hyperparameter in space.get_hyperparameters()}

In [None]:
invalid_config = {"alpha": 0.001, "batch_size": 32, "depth": 2.0, "learning_rate_init": 0.001, "width": 64}
valid_config = round_to_valid_config({"alpha": 0.001, "batch_size": 32, "depth": 2.0, "learning_rate_init": 0.001, "width": 64}, benchmark.configuration_space)
result = benchmark.objective_function(valid_config)
(result['function_value'], result['cost'])

In [None]:
inputs = benchmark.table[["alpha","batch_size","depth","learning_rate_init","width","iter"]]
results = pd.json_normalize(benchmark.table["result"])

# Distillation of benchmark into an MLP

In [None]:
from collections import OrderedDict
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from tqdm.notebook import tqdm

In [None]:
batch_size = 64
num_hidden_units = 128
weight_decay = 0.0001
num_epochs = 10
learning_rate = 0.001

In [None]:
model_y = nn.Sequential(OrderedDict([
    ('W1b', nn.Linear(6, num_hidden_units, bias=True)),
    ('activations', nn.ELU()),
    ('W2b', nn.Linear(num_hidden_units, 1, bias=True)),
]))

model_log_c = nn.Sequential(OrderedDict([
    ('W1b', nn.Linear(6, num_hidden_units, bias=True)),
    ('activations', nn.ELU()),
    ('W2b', nn.Linear(num_hidden_units, 1, bias=True)),
]))

In [None]:
X_tensor = torch.tensor(inputs.values, dtype=torch.float32)
y_tensor = torch.tensor(results["function_value"], dtype=torch.float32).unsqueeze(1)
log_c_tensor = torch.log(torch.tensor(results["cost"], dtype=torch.float32)).unsqueeze(1)

In [None]:
(y_tensor.mean(), y_tensor.std(), y_tensor.min(), y_tensor.max())

In [None]:
(log_c_tensor.mean(), log_c_tensor.std(), log_c_tensor.min(), log_c_tensor.max())

In [None]:
loss_fn = nn.MSELoss()
optimizer_y = torch.optim.Adam(model_y.parameters(), lr=learning_rate, weight_decay=weight_decay)
optimizer_log_c = torch.optim.Adam(model_log_c.parameters(), lr=learning_rate, weight_decay=weight_decay)

In [None]:
dataset_y = TensorDataset(X_tensor,y_tensor)
dataset_log_c = TensorDataset(X_tensor,log_c_tensor)
dataloader_y = DataLoader(dataset_y, batch_size=batch_size, shuffle=True)
dataloader_log_c = DataLoader(dataset_log_c, batch_size=batch_size, shuffle=True)
size = X_tensor.shape[0]

In [None]:
for epoch in tqdm(range(num_epochs)):
    for batch, (X, y) in (pbar := tqdm(enumerate(dataloader_y))):
        pred = model_y(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer_y.step()
        optimizer_y.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            pbar.set_description(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
((y_tensor.min(), model_y(X_tensor).min()), (y_tensor.max(), model_y(X_tensor).max()), loss_fn(model_y(X_tensor), y_tensor))

In [None]:
for epoch in tqdm(range(num_epochs)):
    for batch, (X, log_c) in (pbar := tqdm(enumerate(dataloader_log_c))):
        pred = model_log_c(X)
        loss = loss_fn(pred, log_c)
        loss.backward()
        optimizer_log_c.step()
        optimizer_log_c.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            pbar.set_description(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
((log_c_tensor.min(), model_log_c(X_tensor).min()), (log_c_tensor.max(), model_log_c(X_tensor).max()), loss_fn(model_log_c(X_tensor), log_c_tensor))

In [None]:
with torch.no_grad():
    torch.save(model_y.state_dict(), "distilled_nn_y_model.pickle")
    torch.save({"min": model_y(X_tensor).min(), "max": model_y(X_tensor).max()}, "distilled_nn_y_model_min_max.pickle")
    torch.save(model_log_c.state_dict(), "distilled_nn_log_c_model.pickle")
    torch.save({"min": model_log_c(X_tensor).min(), "max": model_log_c(X_tensor).max()}, "distilled_nn_log_c_model_min_max.pickle")

## Test loading the models

In [None]:
test_num_hidden_units = 128

test_model_y = nn.Sequential(OrderedDict([
    ('W1b', nn.Linear(6, test_num_hidden_units, bias=True)),
    ('activations', nn.ELU()),
    ('W2b', nn.Linear(test_num_hidden_units, 1, bias=True)),
]))

test_model_log_c = nn.Sequential(OrderedDict([
    ('W1b', nn.Linear(6, test_num_hidden_units, bias=True)),
    ('activations', nn.ELU()),
    ('W2b', nn.Linear(test_num_hidden_units, 1, bias=True)),
]))

test_model_y.load_state_dict(torch.load("distilled_nn_y_model.pickle"))
test_model_log_c.load_state_dict(torch.load("distilled_nn_log_c_model.pickle"))
(torch.load("distilled_nn_y_model_min_max.pickle"), torch.load("distilled_nn_log_c_model_min_max.pickle"))