In [1]:
import time

import numpy as np

# BO import
from bayes_opt import BayesianOptimization
from bayes_opt.util import acq_max
from bayes_opt import UtilityFunction
from sklearn.gaussian_process.kernels import RBF

# Kendall distance import
from scipy.stats._stats import _kendall_dis

# Kernel imports
from sklearn.gaussian_process.kernels import StationaryKernelMixin, NormalizedKernelMixin, Kernel, Hyperparameter, _check_length_scale, _approx_fprime

# Distance imports
from scipy.spatial.distance import squareform

# Other imports
import warnings

In [2]:
# Imports with the problem
from scipy.spatial.distance import pdist, cdist

# Some definitions necessary for the program

In [3]:
def kendall_distance(x,y):
    perm = np.argsort(y)  # sort on y and convert y to dense ranks
    x, y = x[perm], y[perm]
    y = np.r_[True, y[1:] != y[:-1]].cumsum(dtype=np.intp)

    # stable sort on x and convert x to dense ranks
    perm = np.argsort(x, kind='mergesort')
    x, y = x[perm], y[perm]
    x = np.r_[True, x[1:] != x[:-1]].cumsum(dtype=np.intp)

    dis = _kendall_dis(x, y)  # discordant pairs
    return dis

In [4]:
def random_key(v):
    permutation = np.argsort(v)
    return permutation

In [5]:
def black_box_function(**kwargs):
    data = np.fromiter(kwargs.values(), dtype=float)
    return np.sum(data)

In [6]:
def generate_bounds(n, lower_bound=0, upper_bound=1):
    i = 0
    pbounds = {}
    while i < n:
        xi = 'x' + str(i)
        pbounds[xi] = (lower_bound, upper_bound)
        i += 1
    return pbounds

# Definition of kernel.
### Here the problem appears

In [7]:
class PermutationRBF(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
    def __init__(self, alpha=1.0, alpha_bounds=(1e-5, 1e5)):
        self.alpha = alpha
        self.alpha_bounds = alpha_bounds

    @property
    def anisotropic(self):
        return np.iterable(self.alpha) and len(self.alpha) > 1

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return Hyperparameter("length_scale", "numeric",
                                  self.alpha_bounds,
                                  len(self.alpha))
        return Hyperparameter(
            "alpha", "numeric", self.alpha_bounds)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        alpha = _check_length_scale(X, self.alpha)
        if Y is None:
            dists = pdist(X / alpha, kendall_distance)
            K = np.exp(-.5 * dists)
            # convert from upper-triangular matrix to square matrix
            K = squareform(K)
            np.fill_diagonal(K, 1)
        else:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated when Y is None.")
            dists = cdist(X / alpha, Y / alpha, kendall_distance)
            K = np.exp(-.5 * dists)
        if eval_gradient:
            if self.hyperparameter_length_scale.fixed:
                # Hyperparameter l kept fixed
                return K, np.empty((X.shape[0], X.shape[0], 0))
            elif not self.anisotropic or alpha.shape[0] == 1:
                K_gradient = \
                    (K * squareform(dists))[:, :, np.newaxis]
                return K, K_gradient
            elif self.anisotropic:
                # We need to recompute the pairwise dimension-wise distances
                K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 \
                    / (alpha ** 2)
                K_gradient *= K[..., np.newaxis]
                return K, K_gradient
        else:
            return K

In [8]:
class RBF(StationaryKernelMixin, NormalizedKernelMixin, Kernel):
    def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)):
        self.length_scale = length_scale
        self.length_scale_bounds = length_scale_bounds

    @property
    def anisotropic(self):
        return np.iterable(self.length_scale) and len(self.length_scale) > 1

    @property
    def hyperparameter_length_scale(self):
        if self.anisotropic:
            return Hyperparameter("length_scale", "numeric",
                                  self.length_scale_bounds,
                                  len(self.length_scale))
        return Hyperparameter(
            "length_scale", "numeric", self.length_scale_bounds)

    def __call__(self, X, Y=None, eval_gradient=False):
        X = np.atleast_2d(X)
        length_scale = _check_length_scale(X, self.length_scale)
        if Y is None:
            dists = pdist(X / length_scale, metric='sqeuclidean')
            K = np.exp(-.5 * dists)
            # convert from upper-triangular matrix to square matrix
            K = squareform(K)
            np.fill_diagonal(K, 1)
        else:
            if eval_gradient:
                raise ValueError(
                    "Gradient can only be evaluated when Y is None.")
            dists = cdist(X / length_scale, Y / length_scale,
                          metric='sqeuclidean')
            K = np.exp(-.5 * dists)

        if eval_gradient:
            if self.hyperparameter_length_scale.fixed:
                # Hyperparameter l kept fixed
                return K, np.empty((X.shape[0], X.shape[0], 0))
            elif not self.anisotropic or length_scale.shape[0] == 1:
                K_gradient = \
                    (K * squareform(dists))[:, :, np.newaxis]
                return K, K_gradient
            elif self.anisotropic:
                # We need to recompute the pairwise dimension-wise distances
                K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 \
                    / (length_scale ** 2)
                K_gradient *= K[..., np.newaxis]
                return K, K_gradient
        else:
            return K

    def __repr__(self):
        if self.anisotropic:
            return "{0}(length_scale=[{1}])".format(
                self.__class__.__name__, ", ".join(map("{0:.3g}".format,
                                                   self.length_scale)))
        else:  # isotropic
            return "{0}(length_scale={1:.3g})".format(
                self.__class__.__name__, np.ravel(self.length_scale)[0])

In [9]:
class MyBayesianOptimization(BayesianOptimization):
    def suggest(self, utility_function):
        """Most promissing point to probe next"""
        if len(self._space) == 0:
            return self._space.array_to_params(self._space.random_sample())

        # Sklearn's GP throws a large number of warnings at times, but
        # we don't really need to see them here.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            self._gp.fit(self._space.params, self._space.target)

        # Finding argmax of the acquisition function.
        suggestion = acq_max(
            ac=utility_function.utility,
            gp=self._gp,
            y_max=self._space.target.max(),
            bounds=self._space.bounds,
            random_state=self._random_state,
            n_warmup=10000,
            n_iter=0 # This is the only change of the method
        )

        return self._space.array_to_params(suggestion)

# Program starts here

### Using my custom kernel (slow)

In [10]:
n = 20
seed = 0
it = 5
kappa = 2.5
xi = 0.0

In [11]:
# Bounds of each variable
pbounds = generate_bounds(n)

# Bayesian Optimizer
optimizer = MyBayesianOptimization(
    f=None,
    pbounds=pbounds,
    verbose=2,
    random_state=seed,
)

# Set the Kernel
optimizer.set_gp_params(kernel=PermutationRBF())

# Set the Acquisition function
utility = UtilityFunction(kind="ucb", kappa=kappa, xi=xi)

# Bayesian Optimization with Gaussian Process
for i in range(it):
    t_ini = time.time()
    next_point = optimizer.suggest(utility)
    t_end = time.time() - t_ini
    print('iteration: ',i)
    print('time: ', t_end)
    target = black_box_function(**next_point)
    optimizer.register(params=next_point, target=target)

iteration:  0
time:  0.0008704662322998047
iteration:  1
time:  1.2141697406768799
iteration:  2
time:  2.3469510078430176
iteration:  3
time:  3.5015127658843994
iteration:  4
time:  4.695566892623901


### Using RBF kernel (very fast)

In [12]:
n = 20
seed = 0
it = 5
kappa = 2.5
xi = 0.0

In [13]:
# Bounds of each variable
pbounds = generate_bounds(n)

# Bayesian Optimizer
optimizer = MyBayesianOptimization(
    f=None,
    pbounds=pbounds,
    verbose=2,
    random_state=seed,
)

# Set the Kernel
optimizer.set_gp_params(kernel=RBF())

# Set the Acquisition function
utility = UtilityFunction(kind="ucb", kappa=kappa, xi=xi)

# Bayesian Optimization with Gaussian Process
for i in range(it):
    t_ini = time.time()
    next_point = optimizer.suggest(utility)
    t_end = time.time() - t_ini
    print('iteration: ',i)
    print('time: ', t_end)
    target = black_box_function(**next_point)
    optimizer.register(params=next_point, target=target)

iteration:  0
time:  0.000415802001953125
iteration:  1
time:  0.020179033279418945
iteration:  2
time:  0.033345937728881836
iteration:  3
time:  0.033483028411865234
iteration:  4
time:  0.0286252498626709
