From 9a7179455a69a593fe3e5b5720bf7f4957075fa4 Mon Sep 17 00:00:00 2001 From: Contramundum Date: Fri, 2 Feb 2024 21:39:51 +0900 Subject: [PATCH 01/11] Add UCB and add tests --- optuna/_gp/acqf.py | 66 +++++++++++---------- optuna/_gp/gp.py | 12 ++-- optuna/samplers/_gp/sampler.py | 1 + tests/gp_tests/test_acqf.py | 103 +++++++++++++++++++++++++++++++++ 4 files changed, 147 insertions(+), 35 deletions(-) create mode 100644 tests/gp_tests/test_acqf.py diff --git a/optuna/_gp/acqf.py b/optuna/_gp/acqf.py index df6177e8bf..124790f0c6 100644 --- a/optuna/_gp/acqf.py +++ b/optuna/_gp/acqf.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING import numpy as np +from enum import IntEnum from optuna._gp.gp import kernel from optuna._gp.gp import kernel_at_zero_distance @@ -45,7 +46,7 @@ def standard_logei(z: torch.Tensor) -> torch.Tensor: return vals -def logei(mean: torch.Tensor, var: torch.Tensor, f0: torch.Tensor) -> torch.Tensor: +def logei(mean: torch.Tensor, var: torch.Tensor, f0: float) -> torch.Tensor: # Return E_{y ~ N(mean, var)}[max(0, y-f0)] sigma = torch.sqrt(var) st_val = standard_logei((mean - f0) / sigma) @@ -53,44 +54,35 @@ def logei(mean: torch.Tensor, var: torch.Tensor, f0: torch.Tensor) -> torch.Tens return val -def eval_logei( - kernel_params: KernelParamsTensor, - X: torch.Tensor, - is_categorical: torch.Tensor, - cov_Y_Y_inv: torch.Tensor, - cov_Y_Y_inv_Y: torch.Tensor, - max_Y: torch.Tensor, - x: torch.Tensor, - # Additional noise to prevent numerical instability. - # Usually this is set to a very small value. - stabilizing_noise: float, -) -> torch.Tensor: - cov_fx_fX = kernel(is_categorical, kernel_params, x[..., None, :], X)[..., 0, :] - cov_fx_fx = kernel_at_zero_distance(kernel_params) - (mean, var) = posterior(cov_Y_Y_inv, cov_Y_Y_inv_Y, cov_fx_fX, cov_fx_fx) - val = logei(mean, var + stabilizing_noise, max_Y) - - return val +def ucb(mean: torch.Tensor, var: torch.Tensor, beta: float) -> torch.Tensor: + return mean + torch.sqrt(beta * var) # TODO(contramundum53): consider abstraction for acquisition functions. +class AcquisitionFunctionType(IntEnum): + LOG_EI = 0 + UCB = 1 + @dataclass(frozen=True) class AcquisitionFunctionParams: - # Currently only logEI is supported. + acqf_type: AcquisitionFunctionType kernel_params: KernelParamsTensor X: np.ndarray search_space: SearchSpace cov_Y_Y_inv: np.ndarray cov_Y_Y_inv_Y: np.ndarray - max_Y: np.ndarray + max_Y: float + beta: float | None acqf_stabilizing_noise: float def create_acqf_params( + acqf_type: AcquisitionFunctionType, kernel_params: KernelParamsTensor, search_space: SearchSpace, X: np.ndarray, Y: np.ndarray, + beta: float | None = None, acqf_stabilizing_noise: float = 1e-12, ) -> AcquisitionFunctionParams: X_tensor = torch.from_numpy(X) @@ -102,30 +94,42 @@ def create_acqf_params( cov_Y_Y_inv = np.linalg.inv(cov_Y_Y) return AcquisitionFunctionParams( + acqf_type=acqf_type, kernel_params=kernel_params, X=X, search_space=search_space, cov_Y_Y_inv=cov_Y_Y_inv, cov_Y_Y_inv_Y=cov_Y_Y_inv @ Y, max_Y=np.max(Y), + beta=beta, acqf_stabilizing_noise=acqf_stabilizing_noise, ) def eval_acqf(acqf_params: AcquisitionFunctionParams, x: torch.Tensor) -> torch.Tensor: - return eval_logei( - kernel_params=acqf_params.kernel_params, - X=torch.from_numpy(acqf_params.X), - is_categorical=torch.from_numpy( + mean, var = posterior( + acqf_params.kernel_params, + torch.from_numpy(acqf_params.X), + torch.from_numpy( acqf_params.search_space.scale_types == ScaleType.CATEGORICAL - ), - cov_Y_Y_inv=torch.from_numpy(acqf_params.cov_Y_Y_inv), - cov_Y_Y_inv_Y=torch.from_numpy(acqf_params.cov_Y_Y_inv_Y), - max_Y=torch.tensor(acqf_params.max_Y, dtype=torch.float64), - x=x, - stabilizing_noise=acqf_params.acqf_stabilizing_noise, + ), + torch.from_numpy(acqf_params.cov_Y_Y_inv), + torch.from_numpy(acqf_params.cov_Y_Y_inv_Y), + x, ) + if acqf_params.acqf_type == AcquisitionFunctionType.LOG_EI: + return logei( + mean=mean, + var=var + acqf_params.acqf_stabilizing_noise, + f0=acqf_params.max_Y + ) + elif acqf_params.acqf_type == AcquisitionFunctionType.UCB: + assert acqf_params.beta is not None + return ucb(mean=mean, var=var, beta=acqf_params.beta) + else: + assert False # Unknown acquisition function type. + def eval_acqf_no_grad(acqf_params: AcquisitionFunctionParams, x: np.ndarray) -> np.ndarray: with torch.no_grad(): diff --git a/optuna/_gp/gp.py b/optuna/_gp/gp.py index 0034a1a234..ec14181749 100644 --- a/optuna/_gp/gp.py +++ b/optuna/_gp/gp.py @@ -97,14 +97,18 @@ def kernel_at_zero_distance( def posterior( + kernel_params: KernelParamsTensor, + X: torch.Tensor, # [len(trials), len(params)] + is_categorical: torch.Tensor, # bool[len(params)] cov_Y_Y_inv: torch.Tensor, # [len(trials), len(trials)] cov_Y_Y_inv_Y: torch.Tensor, # [len(trials)] - cov_fx_fX: torch.Tensor, # [(batch,) len(trials)] - cov_fx_fx: torch.Tensor, # Scalar or [(batch,)] -) -> tuple[torch.Tensor, torch.Tensor]: # [(batch,)], [(batch,)] + x: torch.Tensor, # [(batch,) len(params)] +) -> tuple[torch.Tensor, torch.Tensor]: # (mean: [(batch,)], var: [(batch,)]) + cov_fx_fX = kernel(is_categorical, kernel_params, x[..., None, :], X)[..., 0, :] + cov_fx_fx = kernel_at_zero_distance(kernel_params) + # mean = cov_fx_fX @ inv(cov_fX_fX + noise * I) @ Y # var = cov_fx_fx - cov_fx_fX @ inv(cov_fX_fX + noise * I) @ cov_fx_fX.T - mean = cov_fx_fX @ cov_Y_Y_inv_Y # [batch] var = cov_fx_fx - (cov_fx_fX * (cov_fx_fX @ cov_Y_Y_inv)).sum(dim=-1) # [batch] # We need to clamp the variance to avoid negative values due to numerical errors. diff --git a/optuna/samplers/_gp/sampler.py b/optuna/samplers/_gp/sampler.py index a195750052..8e73fd88ec 100644 --- a/optuna/samplers/_gp/sampler.py +++ b/optuna/samplers/_gp/sampler.py @@ -172,6 +172,7 @@ def sample_relative( self._kernel_params_cache = kernel_params acqf_params = acqf.create_acqf_params( + acqf_type=acqf.AcquisitionFunctionType.LOG_EI, kernel_params=kernel_params, search_space=internal_search_space, X=normalized_params, diff --git a/tests/gp_tests/test_acqf.py b/tests/gp_tests/test_acqf.py new file mode 100644 index 0000000000..428ffd9d1e --- /dev/null +++ b/tests/gp_tests/test_acqf.py @@ -0,0 +1,103 @@ +from optuna._gp.gp import KernelParamsTensor, kernel, posterior +from optuna._gp.acqf import AcquisitionFunctionType, eval_acqf, AcquisitionFunctionParams, create_acqf_params +from optuna._gp.search_space import SearchSpace, ScaleType +import numpy as np +import pytest + +from botorch.models.model import Model +from botorch.models import SingleTaskGP +from gpytorch.kernels import MaternKernel, ScaleKernel +from gpytorch.likelihoods import GaussianLikelihood +from gpytorch.likelihoods.noise_models import HomoskedasticNoise +from gpytorch.means import ZeroMean +from botorch.acquisition.analytic import LogExpectedImprovement, UpperConfidenceBound +from typing import Callable, Any +import torch + +@pytest.mark.parametrize( + "acqf_type, beta, botorch_acqf_gen", + [ + (AcquisitionFunctionType.LOG_EI, None, lambda model, acqf_params: LogExpectedImprovement(model, best_f=acqf_params.max_Y)), + (AcquisitionFunctionType.UCB, 2.0, lambda model, acqf_params: UpperConfidenceBound(model, beta=acqf_params.beta)) + ] +) +@pytest.mark.parametrize("x",[ + np.array([0.15, 0.12]), # unbatched + np.array([[0.15, 0.12], [0.0, 1.0]]) # batched +]) +def test_posterior_and_eval_acqf(acqf_type: AcquisitionFunctionType, beta: float | None, botorch_acqf_gen: Callable[[Model, AcquisitionFunctionParams], Any], x: np.ndarray) -> None: + + n_dims = 2 + X = np.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.1]]) + Y = np.array([1.0, 2.0, 3.0]) + kernel_params = KernelParamsTensor( + inverse_squared_lengthscales=torch.tensor([2.0, 3.0], dtype=torch.float64), + kernel_scale=torch.tensor(4.0, dtype=torch.float64), + noise_var=torch.tensor(0.1, dtype=torch.float64), + ) + search_space = SearchSpace( + scale_types=np.full(n_dims, ScaleType.LINEAR), + bounds=np.array([[0.0, 1.0] * n_dims]), + steps=np.zeros(n_dims) + ) + + + acqf_params = create_acqf_params( + acqf_type=acqf_type, + kernel_params=kernel_params, + search_space=search_space, + X=X, + Y=Y, + beta=beta, + acqf_stabilizing_noise=0.0, + ) + + x_tensor = torch.from_numpy(x) + x_tensor.requires_grad_(True) + + prior_cov_fX_fX = kernel(torch.zeros(n_dims, dtype=torch.bool), kernel_params, torch.from_numpy(X), torch.from_numpy(X)) + posterior_mean_fx, posterior_var_fx = posterior(kernel_params, torch.from_numpy(X), torch.zeros(n_dims, dtype=torch.bool), acqf_params.cov_Y_Y_inv, acqf_params.cov_Y_Y_inv_Y, torch.from_numpy(x)) + + acqf_value = eval_acqf(acqf_params, x_tensor) + acqf_value.sum().backward() + acqf_grad = x_tensor.grad + assert acqf_grad is not None + + + gpytorch_likelihood = GaussianLikelihood() + gpytorch_likelihood.noise_covar.noise = kernel_params.noise_var + matern_kernel = MaternKernel(nu=2.5, ard_num_dims=n_dims) + matern_kernel.lengthscale = kernel_params.inverse_squared_lengthscales.rsqrt() + covar_module = ScaleKernel(matern_kernel) + covar_module.outputscale = kernel_params.kernel_scale + + botorch_model = SingleTaskGP( + train_X = torch.from_numpy(X), + train_Y = torch.from_numpy(Y)[:, None], + likelihood=gpytorch_likelihood, + covar_module=covar_module, + mean_module=ZeroMean(), + ) + botorch_prior_fX = botorch_model(torch.from_numpy(X)) + assert torch.allclose(botorch_prior_fX.covariance_matrix, prior_cov_fX_fX) + + botorch_model.eval() + + botorch_acqf = botorch_acqf_gen(botorch_model, acqf_params) + + x_tensor = torch.from_numpy(x) + x_tensor.requires_grad_(True) + botorch_posterior_fx = botorch_model.posterior(x_tensor[..., None, :]) + assert torch.allclose(posterior_mean_fx, botorch_posterior_fx.mean[..., 0, 0]) + assert torch.allclose(posterior_var_fx, botorch_posterior_fx.variance[..., 0, 0]) + + botorch_acqf_value = botorch_acqf(x_tensor[..., None, :]) + botorch_acqf_value.sum().backward() + botorch_acqf_grad = x_tensor.grad + assert botorch_acqf_grad is not None + assert torch.allclose(acqf_value, botorch_acqf_value) + assert torch.allclose(acqf_grad, botorch_acqf_grad) + + + + From b2cec20da5b12baddf62fbee0fc6c5bd67361fa8 Mon Sep 17 00:00:00 2001 From: Contramundum Date: Fri, 2 Feb 2024 21:49:25 +0900 Subject: [PATCH 02/11] format --- optuna/_gp/acqf.py | 16 ++----- optuna/_gp/gp.py | 2 +- tests/gp_tests/test_acqf.py | 95 ++++++++++++++++++++++++------------- 3 files changed, 67 insertions(+), 46 deletions(-) diff --git a/optuna/_gp/acqf.py b/optuna/_gp/acqf.py index 124790f0c6..28b0984920 100644 --- a/optuna/_gp/acqf.py +++ b/optuna/_gp/acqf.py @@ -1,14 +1,13 @@ from __future__ import annotations from dataclasses import dataclass +from enum import IntEnum import math from typing import TYPE_CHECKING import numpy as np -from enum import IntEnum from optuna._gp.gp import kernel -from optuna._gp.gp import kernel_at_zero_distance from optuna._gp.gp import KernelParamsTensor from optuna._gp.gp import posterior from optuna._gp.search_space import ScaleType @@ -63,6 +62,7 @@ class AcquisitionFunctionType(IntEnum): LOG_EI = 0 UCB = 1 + @dataclass(frozen=True) class AcquisitionFunctionParams: acqf_type: AcquisitionFunctionType @@ -108,22 +108,16 @@ def create_acqf_params( def eval_acqf(acqf_params: AcquisitionFunctionParams, x: torch.Tensor) -> torch.Tensor: mean, var = posterior( - acqf_params.kernel_params, + acqf_params.kernel_params, torch.from_numpy(acqf_params.X), - torch.from_numpy( - acqf_params.search_space.scale_types == ScaleType.CATEGORICAL - ), + torch.from_numpy(acqf_params.search_space.scale_types == ScaleType.CATEGORICAL), torch.from_numpy(acqf_params.cov_Y_Y_inv), torch.from_numpy(acqf_params.cov_Y_Y_inv_Y), x, ) if acqf_params.acqf_type == AcquisitionFunctionType.LOG_EI: - return logei( - mean=mean, - var=var + acqf_params.acqf_stabilizing_noise, - f0=acqf_params.max_Y - ) + return logei(mean=mean, var=var + acqf_params.acqf_stabilizing_noise, f0=acqf_params.max_Y) elif acqf_params.acqf_type == AcquisitionFunctionType.UCB: assert acqf_params.beta is not None return ucb(mean=mean, var=var, beta=acqf_params.beta) diff --git a/optuna/_gp/gp.py b/optuna/_gp/gp.py index ec14181749..72b1ddb8f6 100644 --- a/optuna/_gp/gp.py +++ b/optuna/_gp/gp.py @@ -106,7 +106,7 @@ def posterior( ) -> tuple[torch.Tensor, torch.Tensor]: # (mean: [(batch,)], var: [(batch,)]) cov_fx_fX = kernel(is_categorical, kernel_params, x[..., None, :], X)[..., 0, :] cov_fx_fx = kernel_at_zero_distance(kernel_params) - + # mean = cov_fx_fX @ inv(cov_fX_fX + noise * I) @ Y # var = cov_fx_fx - cov_fx_fX @ inv(cov_fX_fX + noise * I) @ cov_fx_fX.T mean = cov_fx_fX @ cov_Y_Y_inv_Y # [batch] diff --git a/tests/gp_tests/test_acqf.py b/tests/gp_tests/test_acqf.py index 428ffd9d1e..bac92b47a2 100644 --- a/tests/gp_tests/test_acqf.py +++ b/tests/gp_tests/test_acqf.py @@ -1,32 +1,53 @@ -from optuna._gp.gp import KernelParamsTensor, kernel, posterior -from optuna._gp.acqf import AcquisitionFunctionType, eval_acqf, AcquisitionFunctionParams, create_acqf_params -from optuna._gp.search_space import SearchSpace, ScaleType -import numpy as np -import pytest +from typing import Any +from typing import Callable -from botorch.models.model import Model +from botorch.acquisition.analytic import LogExpectedImprovement +from botorch.acquisition.analytic import UpperConfidenceBound from botorch.models import SingleTaskGP -from gpytorch.kernels import MaternKernel, ScaleKernel +from botorch.models.model import Model +from gpytorch.kernels import MaternKernel +from gpytorch.kernels import ScaleKernel from gpytorch.likelihoods import GaussianLikelihood -from gpytorch.likelihoods.noise_models import HomoskedasticNoise from gpytorch.means import ZeroMean -from botorch.acquisition.analytic import LogExpectedImprovement, UpperConfidenceBound -from typing import Callable, Any +import numpy as np +import pytest import torch +from optuna._gp.acqf import AcquisitionFunctionParams +from optuna._gp.acqf import AcquisitionFunctionType +from optuna._gp.acqf import create_acqf_params +from optuna._gp.acqf import eval_acqf +from optuna._gp.gp import kernel +from optuna._gp.gp import KernelParamsTensor +from optuna._gp.gp import posterior +from optuna._gp.search_space import ScaleType +from optuna._gp.search_space import SearchSpace + + @pytest.mark.parametrize( "acqf_type, beta, botorch_acqf_gen", [ - (AcquisitionFunctionType.LOG_EI, None, lambda model, acqf_params: LogExpectedImprovement(model, best_f=acqf_params.max_Y)), - (AcquisitionFunctionType.UCB, 2.0, lambda model, acqf_params: UpperConfidenceBound(model, beta=acqf_params.beta)) - ] + ( + AcquisitionFunctionType.LOG_EI, + None, + lambda model, acqf_params: LogExpectedImprovement(model, best_f=acqf_params.max_Y), + ), + ( + AcquisitionFunctionType.UCB, + 2.0, + lambda model, acqf_params: UpperConfidenceBound(model, beta=acqf_params.beta), + ), + ], ) -@pytest.mark.parametrize("x",[ - np.array([0.15, 0.12]), # unbatched - np.array([[0.15, 0.12], [0.0, 1.0]]) # batched -]) -def test_posterior_and_eval_acqf(acqf_type: AcquisitionFunctionType, beta: float | None, botorch_acqf_gen: Callable[[Model, AcquisitionFunctionParams], Any], x: np.ndarray) -> None: - +@pytest.mark.parametrize( + "x", [np.array([0.15, 0.12]), np.array([[0.15, 0.12], [0.0, 1.0]])] # unbatched # batched +) +def test_posterior_and_eval_acqf( + acqf_type: AcquisitionFunctionType, + beta: float | None, + botorch_acqf_gen: Callable[[Model, AcquisitionFunctionParams], Any], + x: np.ndarray, +) -> None: n_dims = 2 X = np.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.1]]) Y = np.array([1.0, 2.0, 3.0]) @@ -36,12 +57,11 @@ def test_posterior_and_eval_acqf(acqf_type: AcquisitionFunctionType, beta: float noise_var=torch.tensor(0.1, dtype=torch.float64), ) search_space = SearchSpace( - scale_types=np.full(n_dims, ScaleType.LINEAR), - bounds=np.array([[0.0, 1.0] * n_dims]), - steps=np.zeros(n_dims) + scale_types=np.full(n_dims, ScaleType.LINEAR), + bounds=np.array([[0.0, 1.0] * n_dims]), + steps=np.zeros(n_dims), ) - acqf_params = create_acqf_params( acqf_type=acqf_type, kernel_params=kernel_params, @@ -55,15 +75,26 @@ def test_posterior_and_eval_acqf(acqf_type: AcquisitionFunctionType, beta: float x_tensor = torch.from_numpy(x) x_tensor.requires_grad_(True) - prior_cov_fX_fX = kernel(torch.zeros(n_dims, dtype=torch.bool), kernel_params, torch.from_numpy(X), torch.from_numpy(X)) - posterior_mean_fx, posterior_var_fx = posterior(kernel_params, torch.from_numpy(X), torch.zeros(n_dims, dtype=torch.bool), acqf_params.cov_Y_Y_inv, acqf_params.cov_Y_Y_inv_Y, torch.from_numpy(x)) + prior_cov_fX_fX = kernel( + torch.zeros(n_dims, dtype=torch.bool), + kernel_params, + torch.from_numpy(X), + torch.from_numpy(X), + ) + posterior_mean_fx, posterior_var_fx = posterior( + kernel_params, + torch.from_numpy(X), + torch.zeros(n_dims, dtype=torch.bool), + torch.from_numpy(acqf_params.cov_Y_Y_inv), + torch.from_numpy(acqf_params.cov_Y_Y_inv_Y), + torch.from_numpy(x), + ) acqf_value = eval_acqf(acqf_params, x_tensor) - acqf_value.sum().backward() + acqf_value.sum().backward() # type: ignore acqf_grad = x_tensor.grad assert acqf_grad is not None - gpytorch_likelihood = GaussianLikelihood() gpytorch_likelihood.noise_covar.noise = kernel_params.noise_var matern_kernel = MaternKernel(nu=2.5, ard_num_dims=n_dims) @@ -72,8 +103,8 @@ def test_posterior_and_eval_acqf(acqf_type: AcquisitionFunctionType, beta: float covar_module.outputscale = kernel_params.kernel_scale botorch_model = SingleTaskGP( - train_X = torch.from_numpy(X), - train_Y = torch.from_numpy(Y)[:, None], + train_X=torch.from_numpy(X), + train_Y=torch.from_numpy(Y)[:, None], likelihood=gpytorch_likelihood, covar_module=covar_module, mean_module=ZeroMean(), @@ -92,12 +123,8 @@ def test_posterior_and_eval_acqf(acqf_type: AcquisitionFunctionType, beta: float assert torch.allclose(posterior_var_fx, botorch_posterior_fx.variance[..., 0, 0]) botorch_acqf_value = botorch_acqf(x_tensor[..., None, :]) - botorch_acqf_value.sum().backward() + botorch_acqf_value.sum().backward() # type: ignore botorch_acqf_grad = x_tensor.grad assert botorch_acqf_grad is not None assert torch.allclose(acqf_value, botorch_acqf_value) assert torch.allclose(acqf_grad, botorch_acqf_grad) - - - - From 5bdbae85047684ed3262627b834781131e8fc63a Mon Sep 17 00:00:00 2001 From: Contramundum Date: Fri, 2 Feb 2024 22:32:32 +0900 Subject: [PATCH 03/11] Remove botorch dependency from test --- tests/gp_tests/test_acqf.py | 79 ++++--------------------------------- 1 file changed, 7 insertions(+), 72 deletions(-) diff --git a/tests/gp_tests/test_acqf.py b/tests/gp_tests/test_acqf.py index bac92b47a2..34f8f1c766 100644 --- a/tests/gp_tests/test_acqf.py +++ b/tests/gp_tests/test_acqf.py @@ -1,42 +1,22 @@ -from typing import Any -from typing import Callable +from __future__ import annotations -from botorch.acquisition.analytic import LogExpectedImprovement -from botorch.acquisition.analytic import UpperConfidenceBound -from botorch.models import SingleTaskGP -from botorch.models.model import Model -from gpytorch.kernels import MaternKernel -from gpytorch.kernels import ScaleKernel -from gpytorch.likelihoods import GaussianLikelihood -from gpytorch.means import ZeroMean import numpy as np import pytest import torch -from optuna._gp.acqf import AcquisitionFunctionParams from optuna._gp.acqf import AcquisitionFunctionType from optuna._gp.acqf import create_acqf_params from optuna._gp.acqf import eval_acqf -from optuna._gp.gp import kernel from optuna._gp.gp import KernelParamsTensor -from optuna._gp.gp import posterior from optuna._gp.search_space import ScaleType from optuna._gp.search_space import SearchSpace @pytest.mark.parametrize( - "acqf_type, beta, botorch_acqf_gen", + "acqf_type, beta", [ - ( - AcquisitionFunctionType.LOG_EI, - None, - lambda model, acqf_params: LogExpectedImprovement(model, best_f=acqf_params.max_Y), - ), - ( - AcquisitionFunctionType.UCB, - 2.0, - lambda model, acqf_params: UpperConfidenceBound(model, beta=acqf_params.beta), - ), + (AcquisitionFunctionType.LOG_EI, None), + (AcquisitionFunctionType.UCB, 2.0), ], ) @pytest.mark.parametrize( @@ -45,7 +25,6 @@ def test_posterior_and_eval_acqf( acqf_type: AcquisitionFunctionType, beta: float | None, - botorch_acqf_gen: Callable[[Model, AcquisitionFunctionParams], Any], x: np.ndarray, ) -> None: n_dims = 2 @@ -75,56 +54,12 @@ def test_posterior_and_eval_acqf( x_tensor = torch.from_numpy(x) x_tensor.requires_grad_(True) - prior_cov_fX_fX = kernel( - torch.zeros(n_dims, dtype=torch.bool), - kernel_params, - torch.from_numpy(X), - torch.from_numpy(X), - ) - posterior_mean_fx, posterior_var_fx = posterior( - kernel_params, - torch.from_numpy(X), - torch.zeros(n_dims, dtype=torch.bool), - torch.from_numpy(acqf_params.cov_Y_Y_inv), - torch.from_numpy(acqf_params.cov_Y_Y_inv_Y), - torch.from_numpy(x), - ) - acqf_value = eval_acqf(acqf_params, x_tensor) acqf_value.sum().backward() # type: ignore acqf_grad = x_tensor.grad assert acqf_grad is not None - gpytorch_likelihood = GaussianLikelihood() - gpytorch_likelihood.noise_covar.noise = kernel_params.noise_var - matern_kernel = MaternKernel(nu=2.5, ard_num_dims=n_dims) - matern_kernel.lengthscale = kernel_params.inverse_squared_lengthscales.rsqrt() - covar_module = ScaleKernel(matern_kernel) - covar_module.outputscale = kernel_params.kernel_scale - - botorch_model = SingleTaskGP( - train_X=torch.from_numpy(X), - train_Y=torch.from_numpy(Y)[:, None], - likelihood=gpytorch_likelihood, - covar_module=covar_module, - mean_module=ZeroMean(), - ) - botorch_prior_fX = botorch_model(torch.from_numpy(X)) - assert torch.allclose(botorch_prior_fX.covariance_matrix, prior_cov_fX_fX) - - botorch_model.eval() - - botorch_acqf = botorch_acqf_gen(botorch_model, acqf_params) - - x_tensor = torch.from_numpy(x) - x_tensor.requires_grad_(True) - botorch_posterior_fx = botorch_model.posterior(x_tensor[..., None, :]) - assert torch.allclose(posterior_mean_fx, botorch_posterior_fx.mean[..., 0, 0]) - assert torch.allclose(posterior_var_fx, botorch_posterior_fx.variance[..., 0, 0]) + assert acqf_value.shape == x.shape[:-1] - botorch_acqf_value = botorch_acqf(x_tensor[..., None, :]) - botorch_acqf_value.sum().backward() # type: ignore - botorch_acqf_grad = x_tensor.grad - assert botorch_acqf_grad is not None - assert torch.allclose(acqf_value, botorch_acqf_value) - assert torch.allclose(acqf_grad, botorch_acqf_grad) + assert torch.all(torch.isfinite(acqf_value)) + assert torch.all(torch.isfinite(acqf_grad)) From 4fc4ae5e2856ec6df7b76754aa1f987d565e4bb3 Mon Sep 17 00:00:00 2001 From: Contramundum Date: Fri, 2 Feb 2024 22:34:00 +0900 Subject: [PATCH 04/11] Rename test --- tests/gp_tests/test_acqf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gp_tests/test_acqf.py b/tests/gp_tests/test_acqf.py index 34f8f1c766..9a0805afa0 100644 --- a/tests/gp_tests/test_acqf.py +++ b/tests/gp_tests/test_acqf.py @@ -22,7 +22,7 @@ @pytest.mark.parametrize( "x", [np.array([0.15, 0.12]), np.array([[0.15, 0.12], [0.0, 1.0]])] # unbatched # batched ) -def test_posterior_and_eval_acqf( +def test_eval_acqf( acqf_type: AcquisitionFunctionType, beta: float | None, x: np.ndarray, From 0d01eb67fb208363ecab27d6dfb0e47685499b4f Mon Sep 17 00:00:00 2001 From: Contramundum Date: Tue, 6 Feb 2024 14:59:51 +0900 Subject: [PATCH 05/11] Skip tests for python 3.12 --- tests/gp_tests/test_acqf.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/gp_tests/test_acqf.py b/tests/gp_tests/test_acqf.py index 9a0805afa0..7c7feadfae 100644 --- a/tests/gp_tests/test_acqf.py +++ b/tests/gp_tests/test_acqf.py @@ -10,7 +10,7 @@ from optuna._gp.gp import KernelParamsTensor from optuna._gp.search_space import ScaleType from optuna._gp.search_space import SearchSpace - +import sys @pytest.mark.parametrize( "acqf_type, beta", @@ -22,6 +22,10 @@ @pytest.mark.parametrize( "x", [np.array([0.15, 0.12]), np.array([[0.15, 0.12], [0.0, 1.0]])] # unbatched # batched ) +@pytest.mark.skipif( + sys.version_info >= (3, 12), + reason="PyTorch does not support Python 3.12 yet.", +) def test_eval_acqf( acqf_type: AcquisitionFunctionType, beta: float | None, From 167c3bf859f82eb0f19f2562f3343b2d26ab63ba Mon Sep 17 00:00:00 2001 From: Contramundum Date: Tue, 6 Feb 2024 16:09:28 +0900 Subject: [PATCH 06/11] Skip tests for python 3.12 --- tests/gp_tests/test_acqf.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/gp_tests/test_acqf.py b/tests/gp_tests/test_acqf.py index 7c7feadfae..f5df82f754 100644 --- a/tests/gp_tests/test_acqf.py +++ b/tests/gp_tests/test_acqf.py @@ -2,7 +2,13 @@ import numpy as np import pytest -import torch + + +# TODO(contramundum53): Remove this block after torch supports Python 3.12. +try: + import torch +except ImportError: + pytest.skip() from optuna._gp.acqf import AcquisitionFunctionType from optuna._gp.acqf import create_acqf_params @@ -10,7 +16,7 @@ from optuna._gp.gp import KernelParamsTensor from optuna._gp.search_space import ScaleType from optuna._gp.search_space import SearchSpace -import sys + @pytest.mark.parametrize( "acqf_type, beta", @@ -22,10 +28,6 @@ @pytest.mark.parametrize( "x", [np.array([0.15, 0.12]), np.array([[0.15, 0.12], [0.0, 1.0]])] # unbatched # batched ) -@pytest.mark.skipif( - sys.version_info >= (3, 12), - reason="PyTorch does not support Python 3.12 yet.", -) def test_eval_acqf( acqf_type: AcquisitionFunctionType, beta: float | None, From 1e61dbacb3ea4632283c96b1b42cfcdb5a5a01ae Mon Sep 17 00:00:00 2001 From: Contramundum Date: Tue, 6 Feb 2024 16:23:24 +0900 Subject: [PATCH 07/11] fix test --- tests/gp_tests/test_acqf.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/gp_tests/test_acqf.py b/tests/gp_tests/test_acqf.py index f5df82f754..a107e9b7bd 100644 --- a/tests/gp_tests/test_acqf.py +++ b/tests/gp_tests/test_acqf.py @@ -1,14 +1,16 @@ from __future__ import annotations +import sys + import numpy as np import pytest # TODO(contramundum53): Remove this block after torch supports Python 3.12. -try: - import torch -except ImportError: - pytest.skip() +if sys.version_info >= (3, 12): + pytest.skip("PyTorch does not support python 3.12.", allow_module_level=True) + +import torch from optuna._gp.acqf import AcquisitionFunctionType from optuna._gp.acqf import create_acqf_params From f535d8ae9b75734b16ce60d7630ff41a0bf43b3f Mon Sep 17 00:00:00 2001 From: contramundum53 Date: Tue, 6 Feb 2024 17:46:33 +0900 Subject: [PATCH 08/11] Update optuna/_gp/acqf.py Co-authored-by: Yoshihiko Ozaki <30489874+y0z@users.noreply.github.com> --- optuna/_gp/acqf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optuna/_gp/acqf.py b/optuna/_gp/acqf.py index 28b0984920..2c1f2955a5 100644 --- a/optuna/_gp/acqf.py +++ b/optuna/_gp/acqf.py @@ -122,7 +122,7 @@ def eval_acqf(acqf_params: AcquisitionFunctionParams, x: torch.Tensor) -> torch. assert acqf_params.beta is not None return ucb(mean=mean, var=var, beta=acqf_params.beta) else: - assert False # Unknown acquisition function type. + assert False, "Unknown acquisition function type." def eval_acqf_no_grad(acqf_params: AcquisitionFunctionParams, x: np.ndarray) -> np.ndarray: From 841e7cbe8cc44b79200268d51dd6507d3622a3a4 Mon Sep 17 00:00:00 2001 From: contramundum53 Date: Thu, 8 Feb 2024 13:23:29 +0900 Subject: [PATCH 09/11] Update optuna/_gp/acqf.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- optuna/_gp/acqf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optuna/_gp/acqf.py b/optuna/_gp/acqf.py index 2c1f2955a5..b43209e02e 100644 --- a/optuna/_gp/acqf.py +++ b/optuna/_gp/acqf.py @@ -58,6 +58,7 @@ def ucb(mean: torch.Tensor, var: torch.Tensor, beta: float) -> torch.Tensor: # TODO(contramundum53): consider abstraction for acquisition functions. +# NOTE: Acquisition function is not class on purpose to integrate numba in the future. class AcquisitionFunctionType(IntEnum): LOG_EI = 0 UCB = 1 From dd030cd227b41b49d263fe1611e1e0486c9b3e6e Mon Sep 17 00:00:00 2001 From: contramundum53 Date: Thu, 8 Feb 2024 13:23:37 +0900 Subject: [PATCH 10/11] Update optuna/_gp/acqf.py Co-authored-by: Shuhei Watanabe <47781922+nabenabe0928@users.noreply.github.com> --- optuna/_gp/acqf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optuna/_gp/acqf.py b/optuna/_gp/acqf.py index b43209e02e..82fa62873b 100644 --- a/optuna/_gp/acqf.py +++ b/optuna/_gp/acqf.py @@ -120,7 +120,7 @@ def eval_acqf(acqf_params: AcquisitionFunctionParams, x: torch.Tensor) -> torch. if acqf_params.acqf_type == AcquisitionFunctionType.LOG_EI: return logei(mean=mean, var=var + acqf_params.acqf_stabilizing_noise, f0=acqf_params.max_Y) elif acqf_params.acqf_type == AcquisitionFunctionType.UCB: - assert acqf_params.beta is not None + assert acqf_params.beta is not None, "beta must be given to UCB." return ucb(mean=mean, var=var, beta=acqf_params.beta) else: assert False, "Unknown acquisition function type." From e0ab667cd9cacaca00042ff3fad718e77f1bafec Mon Sep 17 00:00:00 2001 From: Contramundum Date: Thu, 8 Feb 2024 13:27:05 +0900 Subject: [PATCH 11/11] Remove default value on minimum noise --- optuna/_gp/gp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optuna/_gp/gp.py b/optuna/_gp/gp.py index 72b1ddb8f6..606f2b3294 100644 --- a/optuna/_gp/gp.py +++ b/optuna/_gp/gp.py @@ -146,7 +146,7 @@ def fit_kernel_params( Y: np.ndarray, # [len(trials)] is_categorical: np.ndarray, # [len(params)] log_prior: Callable[[KernelParamsTensor], torch.Tensor], - minimum_noise: float = 0.0, + minimum_noise: float, initial_kernel_params: KernelParamsTensor | None = None, ) -> KernelParamsTensor: n_params = X.shape[1]