From 49f538984cf1addeacf2f3a105cebbde085fb5b4 Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Wed, 17 Feb 2021 13:18:00 +0100 Subject: [PATCH 01/14] Add covariance between points gradient and vectorize covariance gradient calculation --- emukit/model_wrappers/gpy_model_wrappers.py | 72 ++++++++++++++++----- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py index cf891771..f832fd8e 100644 --- a/emukit/model_wrappers/gpy_model_wrappers.py +++ b/emukit/model_wrappers/gpy_model_wrappers.py @@ -61,6 +61,19 @@ def get_joint_prediction_gradients(self, X: np.ndarray) -> Tuple[np.ndarray, np. dvariance_dx = dSigma(X, self.model.X, self.model.kern, self.model.posterior.woodbury_inv) return dmean_dx, dvariance_dx + def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray: + """ + Computes and returns model gradients of the covariance between outputs at points X1 and X2 with respect + to X1. + + :param X1: points to compute gradients at, nd array of shape (q1, d) + :param X2: points for the covariance of which to compute the gradient, nd array of shape (q2, d) + :return: gradient of the covariance matrix of shape (q1, q2) between outputs at X1 and X2 + (return shape is (q1, q2, q1, d)). + """ + dcov_dx1 = dCov(X1, X2, self.model.X, self.model.kern, self.model.posterior.woodbury_inv) + return dcov_dx1 + def set_data(self, X: np.ndarray, Y: np.ndarray) -> None: """ Sets training data in model @@ -164,24 +177,23 @@ def dSigma(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np :return: Gradient of the posterior covariance of shape (q, q, q, d) """ q, d, n = x_predict.shape[0], x_predict.shape[1], x_train.shape[0] - dkxX_dx = np.empty((q, n, d)) - dkxx_dx = np.empty((q, q, d)) + # Tensor for the gradients of (q, n) covariance matrix between x_predict and x_train with respect to + # x_predict (of shape (q, d)) + dkxX_dx = np.zeros((d, q*q, n)) + # Tensor for the gradients of full covariance matrix at points x_predict (of shape (q, q) with respect to + # x_predict (of shape (q, d)) + dkxx_dx = np.zeros((d, q*q, q)) for i in range(d): - dkxX_dx[:, :, i] = kern.dK_dX(x_predict, x_train, i) - dkxx_dx[:, :, i] = kern.dK_dX(x_predict, x_predict, i) + dkxX_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_train, i) + dkxx_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_predict, i) + dkxX_dx = dkxX_dx.reshape((d, q, q, n)) + dkxx_dx = dkxx_dx.reshape((d, q, q, q)) + dkxx_dx += dkxx_dx.transpose((0, 1, 3, 2)) + dkxx_dx.reshape((d, q, -1))[:, :, ::q + 1] = 0. + K = kern.K(x_predict, x_train) - - dsigma = np.zeros((q, q, q, d)) - for i in range(q): - for j in range(d): - Ks = np.zeros((q, n)) - Ks[i, :] = dkxX_dx[i, :, j] - dKss_dxi = np.zeros((q, q)) - dKss_dxi[i, :] = dkxx_dx[i, :, j] - dKss_dxi[:, i] = dkxx_dx[i, :, j].T - dKss_dxi[i, i] = 0 - dsigma[:, :, i, j] = dKss_dxi - Ks @ w_inv @ K.T - K @ w_inv @ Ks.T - return dsigma + dsigma = dkxx_dx - K @ w_inv @ dkxX_dx.transpose((0, 1, 3, 2)) - dkxX_dx @ w_inv @ K.T + return dsigma.transpose((2, 3, 1, 0)) def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np.ndarray) -> np.ndarray: @@ -203,6 +215,34 @@ def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np. dmu[j, j, i] = (dkxX_dx[j, :, i][None, :] @ w_vec[:, None]).flatten() return dmu + +def dCov(x1: np.ndarray, x2: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np.ndarray) -> np.ndarray: + """ + Compute the derivative of the posterior covariance matrix between prediction inputs x1 and x2 + (of shape (q1, q2)) with respect to x1 + + :param x1: Prediction inputs of shape (q1, d) + :param x2: Prediction inputs of shape (q2, d) + :param x_train: Training inputs of shape (n, d) + :param kern: Covariance of the GP model + :param w_inv: Woodbury inverse of the posterior fit of the GP + :return: nd array of shape (q1, q2, q1, d) representing the gradient of the posterior covariance between x1 and x2, + where res[:, :, i, j] is the gradient of the covariance between outputs at x1 and x2 with respect to x1[i, j] + """ + q1, q2, d, n = x1.shape[0], x2.shape[0], x1.shape[1], x_train.shape[0] + dkx1X_dx = np.zeros((d, q1*q1, n)) + dkx1x2_dx = np.zeros((d, q1*q1, q2)) + for i in range(d): + dkx1X_dx[i, ::q1 + 1, :] = kern.dK_dX(x1, x_train, i) + dkx1x2_dx[i, ::q1 + 1, :] = kern.dK_dX(x1, x2, i) + dkx1X_dx = dkx1X_dx.reshape((d, q1, q1, n)) + dkx1x2_dx = dkx1x2_dx.reshape((d, q1, q1, q2)) + + K_Xx2 = kern.K(x_train, x2) + dcov = dkx1x2_dx - dkx1X_dx @ w_inv @ K_Xx2 + return dcov.transpose((2, 3, 1, 0)) + + class GPyMultiOutputWrapper(IModel, IDifferentiable, ICalculateVarianceReduction, IEntropySearchModel): """ A wrapper around GPy multi-output models. From d0e25c8c786ce2a0f45d18725e15aa804af3b55c Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Wed, 17 Feb 2021 13:54:06 +0100 Subject: [PATCH 02/14] Add tests for the gradients --- .../emukit/models/test_gpy_model_wrappers.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 tests/emukit/models/test_gpy_model_wrappers.py diff --git a/tests/emukit/models/test_gpy_model_wrappers.py b/tests/emukit/models/test_gpy_model_wrappers.py new file mode 100644 index 00000000..ca31fd58 --- /dev/null +++ b/tests/emukit/models/test_gpy_model_wrappers.py @@ -0,0 +1,53 @@ +import GPy +import numpy as np +import pytest + +from emukit.model_wrappers.gpy_model_wrappers import GPyModelWrapper + + +@pytest.fixture +def test_data(gpy_model): + np.random.seed(42) + return np.random.randn(5, gpy_model.X.shape[1]) + + +@pytest.fixture +def test_data2(gpy_model): + np.random.seed(42) + return np.random.randn(4, gpy_model.X.shape[1]) + + +def test_joint_prediction_gradients(gpy_model, test_data): + epsilon = 1e-5 + mean, cov = gpy_model.predict_with_full_covariance(test_data) + # Get the gradients + mean_dx, cov_dx = gpy_model.get_joint_prediction_gradients(test_data) + + for i in range(test_data.shape[0]): # Iterate over each test point + for j in range(test_data.shape[1]): # Iterate over each dimension + # Approximate the gradient numerically + perturbed_input = test_data.copy() + perturbed_input[i, j] += epsilon + mean_perturbed, cov_perturbed = gpy_model.predict_with_full_covariance(perturbed_input) + mean_dx_numerical = (mean_perturbed - mean) / epsilon + cov_dx_numerical = (cov_perturbed - cov) / epsilon + # Check that numerical approx. similar to true gradient + assert pytest.approx(mean_dx_numerical, abs=1e-8, rel=1e-3) == mean_dx[:, :, i, j] + assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-3) == cov_dx[:, :, i, j] + + +def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data2): + epsilon = 1e-5 + cov = gpy_model.get_covariance_between_points(test_data, test_data2) + # Get the gradients + cov_dx = gpy_model.get_covariance_between_points(test_data, test_data2) + + for i in range(test_data.shape[0]): # Iterate over each test point + for j in range(test_data.shape[1]): # Iterate over each dimension + # Approximate the gradient numerically + perturbed_input = test_data.copy() + perturbed_input[i, j] += epsilon + cov_perturbed = gpy_model.get_covariance_between_points(perturbed_input, test_data2) + cov_dx_numerical = (cov_perturbed - cov) / epsilon + # Check that numerical approx. similar to true gradient + assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-3) == cov_dx[:, :, i, j] From bda97ad4fc51852bcc37e364c3f8f09f76bf741d Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Wed, 17 Feb 2021 14:27:36 +0100 Subject: [PATCH 03/14] Fix shapes in gradient tests --- tests/emukit/models/test_gpy_model_wrappers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/emukit/models/test_gpy_model_wrappers.py b/tests/emukit/models/test_gpy_model_wrappers.py index ca31fd58..375e9f08 100644 --- a/tests/emukit/models/test_gpy_model_wrappers.py +++ b/tests/emukit/models/test_gpy_model_wrappers.py @@ -32,15 +32,15 @@ def test_joint_prediction_gradients(gpy_model, test_data): mean_dx_numerical = (mean_perturbed - mean) / epsilon cov_dx_numerical = (cov_perturbed - cov) / epsilon # Check that numerical approx. similar to true gradient - assert pytest.approx(mean_dx_numerical, abs=1e-8, rel=1e-3) == mean_dx[:, :, i, j] - assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-3) == cov_dx[:, :, i, j] + assert pytest.approx(mean_dx_numerical.ravel(), abs=1e-8, rel=1e-2) == mean_dx[:, i, j] + assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[:, :, i, j] def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data2): epsilon = 1e-5 cov = gpy_model.get_covariance_between_points(test_data, test_data2) # Get the gradients - cov_dx = gpy_model.get_covariance_between_points(test_data, test_data2) + cov_dx = gpy_model.get_covariance_between_points_gradients(test_data, test_data2) for i in range(test_data.shape[0]): # Iterate over each test point for j in range(test_data.shape[1]): # Iterate over each dimension @@ -50,4 +50,4 @@ def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data cov_perturbed = gpy_model.get_covariance_between_points(perturbed_input, test_data2) cov_dx_numerical = (cov_perturbed - cov) / epsilon # Check that numerical approx. similar to true gradient - assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-3) == cov_dx[:, :, i, j] + assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[:, :, i, j] From 6ed9094c86522a6f072834989f5f3ca60f41087b Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Mon, 12 Apr 2021 15:10:05 +0100 Subject: [PATCH 04/14] Rewrite the covariance gradient calculation code --- emukit/model_wrappers/gpy_model_wrappers.py | 74 +++++++++---------- .../emukit/models/test_gpy_model_wrappers.py | 4 +- 2 files changed, 36 insertions(+), 42 deletions(-) diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py index 80fc2f9c..69588e17 100644 --- a/emukit/model_wrappers/gpy_model_wrappers.py +++ b/emukit/model_wrappers/gpy_model_wrappers.py @@ -61,19 +61,6 @@ def get_joint_prediction_gradients(self, X: np.ndarray) -> Tuple[np.ndarray, np. dvariance_dx = dSigma(X, self.model.X, self.model.kern, self.model.posterior.woodbury_inv) return dmean_dx, dvariance_dx - def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray: - """ - Computes and returns model gradients of the covariance between outputs at points X1 and X2 with respect - to X1. - - :param X1: points to compute gradients at, nd array of shape (q1, d) - :param X2: points for the covariance of which to compute the gradient, nd array of shape (q2, d) - :return: gradient of the covariance matrix of shape (q1, q2) between outputs at X1 and X2 - (return shape is (q1, q2, q1, d)). - """ - dcov_dx1 = dCov(X1, X2, self.model.X, self.model.kern, self.model.posterior.woodbury_inv) - return dcov_dx1 - def set_data(self, X: np.ndarray, Y: np.ndarray) -> None: """ Sets training data in model @@ -120,6 +107,40 @@ def get_covariance_between_points(self, X1: np.ndarray, X2: np.ndarray) -> np.nd """ return self.model.posterior_covariance_between_points(X1, X2) + def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray: + """ + Compute the derivative of the posterior covariance matrix between prediction at inputs x1 and x2 + with respect to x1. + + :param x1: Prediction inputs of shape (q1, d) + :param x2: Prediction inputs of shape (q2, d) + :param x_train: Training inputs of shape (n_train, d) + :param kern: Covariance of the GP model + :param w_inv: Woodbury inverse of the posterior fit of the GP + :return: nd array of shape (q1, q2, d) representing the gradient of the posterior covariance between x1 and x2 + with respect to x1. res[i, j, k] is the gradient of Cov(y1[i], y2[j]) with respect to x1[i, k] + """ + # Get the relevent shapes + q1, q2, input_dim, n_train = X1.shape[0], X2.shape[0], X1.shape[1], self.model.X.shape[0] + # Instatiate an array to hold gradients of prior covariance between outputs at X1 and X_train + cov_X1_Xtrain_grad = np.zeros((input_dim, q1, n_train)) + # Instatiate an array to hold gradients of prior covariance between outputs at X1 and X2 + cov_X1_X2_grad = np.zeros((input_dim, q1, q2)) + # Calculate the gradient wrt. X1 of these prior covariances. GPy API allows for doing so + # only one dimension at a time, hence need to iterate over all input dimensions + for i in range(input_dim): + # Calculate the gradient wrt. X1 of the prior covariance between X1 and X_train + cov_X1_Xtrain_grad[i, :, :] = self.model.kern.dK_dX(X1, self.model.X, i) + # Calculate the gradient wrt. X1 of the prior covariance between X1 and X2 + cov_X1_X2_grad[i, :, :] = self.model.kern.dK_dX(X1, X2, i) + + # Get the prior covariance between outputs at x_train and X2 + cov_Xtrain_X2 = self.model.kern.K(self.model.X, X2) + # Calculate the gradient of the posterior covariance between outputs at X1 and X2 + cov_grad = cov_X1_X2_grad - cov_X1_Xtrain_grad @ self.model.posterior.woodbury_inv @ cov_Xtrain_X2 + return cov_grad.transpose((1, 2, 0)) + + @property def X(self) -> np.ndarray: """ @@ -223,33 +244,6 @@ def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np. return dmu -def dCov(x1: np.ndarray, x2: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np.ndarray) -> np.ndarray: - """ - Compute the derivative of the posterior covariance matrix between prediction inputs x1 and x2 - (of shape (q1, q2)) with respect to x1 - - :param x1: Prediction inputs of shape (q1, d) - :param x2: Prediction inputs of shape (q2, d) - :param x_train: Training inputs of shape (n, d) - :param kern: Covariance of the GP model - :param w_inv: Woodbury inverse of the posterior fit of the GP - :return: nd array of shape (q1, q2, q1, d) representing the gradient of the posterior covariance between x1 and x2, - where res[:, :, i, j] is the gradient of the covariance between outputs at x1 and x2 with respect to x1[i, j] - """ - q1, q2, d, n = x1.shape[0], x2.shape[0], x1.shape[1], x_train.shape[0] - dkx1X_dx = np.zeros((d, q1*q1, n)) - dkx1x2_dx = np.zeros((d, q1*q1, q2)) - for i in range(d): - dkx1X_dx[i, ::q1 + 1, :] = kern.dK_dX(x1, x_train, i) - dkx1x2_dx[i, ::q1 + 1, :] = kern.dK_dX(x1, x2, i) - dkx1X_dx = dkx1X_dx.reshape((d, q1, q1, n)) - dkx1x2_dx = dkx1x2_dx.reshape((d, q1, q1, q2)) - - K_Xx2 = kern.K(x_train, x2) - dcov = dkx1x2_dx - dkx1X_dx @ w_inv @ K_Xx2 - return dcov.transpose((2, 3, 1, 0)) - - class GPyMultiOutputWrapper(IModel, IDifferentiable, ICalculateVarianceReduction, IEntropySearchModel): """ A wrapper around GPy multi-output models. diff --git a/tests/emukit/models/test_gpy_model_wrappers.py b/tests/emukit/models/test_gpy_model_wrappers.py index 375e9f08..cce5207f 100644 --- a/tests/emukit/models/test_gpy_model_wrappers.py +++ b/tests/emukit/models/test_gpy_model_wrappers.py @@ -48,6 +48,6 @@ def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data perturbed_input = test_data.copy() perturbed_input[i, j] += epsilon cov_perturbed = gpy_model.get_covariance_between_points(perturbed_input, test_data2) - cov_dx_numerical = (cov_perturbed - cov) / epsilon + cov_dx_numerical = (cov_perturbed[i] - cov[i]) / epsilon # Check that numerical approx. similar to true gradient - assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[:, :, i, j] + assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[i, :, j] From e7b436be555b9ff1f2d55e8141470ba199295c5b Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Wed, 17 Feb 2021 13:18:00 +0100 Subject: [PATCH 05/14] Add covariance between points gradient and vectorize covariance gradient calculation --- emukit/model_wrappers/gpy_model_wrappers.py | 72 ++++++++++++++++----- 1 file changed, 56 insertions(+), 16 deletions(-) diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py index 0bb9560e..80fc2f9c 100644 --- a/emukit/model_wrappers/gpy_model_wrappers.py +++ b/emukit/model_wrappers/gpy_model_wrappers.py @@ -61,6 +61,19 @@ def get_joint_prediction_gradients(self, X: np.ndarray) -> Tuple[np.ndarray, np. dvariance_dx = dSigma(X, self.model.X, self.model.kern, self.model.posterior.woodbury_inv) return dmean_dx, dvariance_dx + def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray: + """ + Computes and returns model gradients of the covariance between outputs at points X1 and X2 with respect + to X1. + + :param X1: points to compute gradients at, nd array of shape (q1, d) + :param X2: points for the covariance of which to compute the gradient, nd array of shape (q2, d) + :return: gradient of the covariance matrix of shape (q1, q2) between outputs at X1 and X2 + (return shape is (q1, q2, q1, d)). + """ + dcov_dx1 = dCov(X1, X2, self.model.X, self.model.kern, self.model.posterior.woodbury_inv) + return dcov_dx1 + def set_data(self, X: np.ndarray, Y: np.ndarray) -> None: """ Sets training data in model @@ -171,24 +184,23 @@ def dSigma(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np :return: Gradient of the posterior covariance of shape (q, q, q, d) """ q, d, n = x_predict.shape[0], x_predict.shape[1], x_train.shape[0] - dkxX_dx = np.empty((q, n, d)) - dkxx_dx = np.empty((q, q, d)) + # Tensor for the gradients of (q, n) covariance matrix between x_predict and x_train with respect to + # x_predict (of shape (q, d)) + dkxX_dx = np.zeros((d, q*q, n)) + # Tensor for the gradients of full covariance matrix at points x_predict (of shape (q, q) with respect to + # x_predict (of shape (q, d)) + dkxx_dx = np.zeros((d, q*q, q)) for i in range(d): - dkxX_dx[:, :, i] = kern.dK_dX(x_predict, x_train, i) - dkxx_dx[:, :, i] = kern.dK_dX(x_predict, x_predict, i) + dkxX_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_train, i) + dkxx_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_predict, i) + dkxX_dx = dkxX_dx.reshape((d, q, q, n)) + dkxx_dx = dkxx_dx.reshape((d, q, q, q)) + dkxx_dx += dkxx_dx.transpose((0, 1, 3, 2)) + dkxx_dx.reshape((d, q, -1))[:, :, ::q + 1] = 0. + K = kern.K(x_predict, x_train) - - dsigma = np.zeros((q, q, q, d)) - for i in range(q): - for j in range(d): - Ks = np.zeros((q, n)) - Ks[i, :] = dkxX_dx[i, :, j] - dKss_dxi = np.zeros((q, q)) - dKss_dxi[i, :] = dkxx_dx[i, :, j] - dKss_dxi[:, i] = dkxx_dx[i, :, j].T - dKss_dxi[i, i] = 0 - dsigma[:, :, i, j] = dKss_dxi - Ks @ w_inv @ K.T - K @ w_inv @ Ks.T - return dsigma + dsigma = dkxx_dx - K @ w_inv @ dkxX_dx.transpose((0, 1, 3, 2)) - dkxX_dx @ w_inv @ K.T + return dsigma.transpose((2, 3, 1, 0)) def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np.ndarray) -> np.ndarray: @@ -210,6 +222,34 @@ def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np. dmu[j, j, i] = (dkxX_dx[j, :, i][None, :] @ w_vec[:, None]).flatten() return dmu + +def dCov(x1: np.ndarray, x2: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np.ndarray) -> np.ndarray: + """ + Compute the derivative of the posterior covariance matrix between prediction inputs x1 and x2 + (of shape (q1, q2)) with respect to x1 + + :param x1: Prediction inputs of shape (q1, d) + :param x2: Prediction inputs of shape (q2, d) + :param x_train: Training inputs of shape (n, d) + :param kern: Covariance of the GP model + :param w_inv: Woodbury inverse of the posterior fit of the GP + :return: nd array of shape (q1, q2, q1, d) representing the gradient of the posterior covariance between x1 and x2, + where res[:, :, i, j] is the gradient of the covariance between outputs at x1 and x2 with respect to x1[i, j] + """ + q1, q2, d, n = x1.shape[0], x2.shape[0], x1.shape[1], x_train.shape[0] + dkx1X_dx = np.zeros((d, q1*q1, n)) + dkx1x2_dx = np.zeros((d, q1*q1, q2)) + for i in range(d): + dkx1X_dx[i, ::q1 + 1, :] = kern.dK_dX(x1, x_train, i) + dkx1x2_dx[i, ::q1 + 1, :] = kern.dK_dX(x1, x2, i) + dkx1X_dx = dkx1X_dx.reshape((d, q1, q1, n)) + dkx1x2_dx = dkx1x2_dx.reshape((d, q1, q1, q2)) + + K_Xx2 = kern.K(x_train, x2) + dcov = dkx1x2_dx - dkx1X_dx @ w_inv @ K_Xx2 + return dcov.transpose((2, 3, 1, 0)) + + class GPyMultiOutputWrapper(IModel, IDifferentiable, ICalculateVarianceReduction, IEntropySearchModel): """ A wrapper around GPy multi-output models. From 8e1699030cc3415c51b12db1e85c89940a2c46d1 Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Wed, 17 Feb 2021 13:54:06 +0100 Subject: [PATCH 06/14] Add tests for the gradients --- .../emukit/models/test_gpy_model_wrappers.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 tests/emukit/models/test_gpy_model_wrappers.py diff --git a/tests/emukit/models/test_gpy_model_wrappers.py b/tests/emukit/models/test_gpy_model_wrappers.py new file mode 100644 index 00000000..ca31fd58 --- /dev/null +++ b/tests/emukit/models/test_gpy_model_wrappers.py @@ -0,0 +1,53 @@ +import GPy +import numpy as np +import pytest + +from emukit.model_wrappers.gpy_model_wrappers import GPyModelWrapper + + +@pytest.fixture +def test_data(gpy_model): + np.random.seed(42) + return np.random.randn(5, gpy_model.X.shape[1]) + + +@pytest.fixture +def test_data2(gpy_model): + np.random.seed(42) + return np.random.randn(4, gpy_model.X.shape[1]) + + +def test_joint_prediction_gradients(gpy_model, test_data): + epsilon = 1e-5 + mean, cov = gpy_model.predict_with_full_covariance(test_data) + # Get the gradients + mean_dx, cov_dx = gpy_model.get_joint_prediction_gradients(test_data) + + for i in range(test_data.shape[0]): # Iterate over each test point + for j in range(test_data.shape[1]): # Iterate over each dimension + # Approximate the gradient numerically + perturbed_input = test_data.copy() + perturbed_input[i, j] += epsilon + mean_perturbed, cov_perturbed = gpy_model.predict_with_full_covariance(perturbed_input) + mean_dx_numerical = (mean_perturbed - mean) / epsilon + cov_dx_numerical = (cov_perturbed - cov) / epsilon + # Check that numerical approx. similar to true gradient + assert pytest.approx(mean_dx_numerical, abs=1e-8, rel=1e-3) == mean_dx[:, :, i, j] + assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-3) == cov_dx[:, :, i, j] + + +def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data2): + epsilon = 1e-5 + cov = gpy_model.get_covariance_between_points(test_data, test_data2) + # Get the gradients + cov_dx = gpy_model.get_covariance_between_points(test_data, test_data2) + + for i in range(test_data.shape[0]): # Iterate over each test point + for j in range(test_data.shape[1]): # Iterate over each dimension + # Approximate the gradient numerically + perturbed_input = test_data.copy() + perturbed_input[i, j] += epsilon + cov_perturbed = gpy_model.get_covariance_between_points(perturbed_input, test_data2) + cov_dx_numerical = (cov_perturbed - cov) / epsilon + # Check that numerical approx. similar to true gradient + assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-3) == cov_dx[:, :, i, j] From 38a0691b9883884b933ee16e6d69be598a263d65 Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Wed, 17 Feb 2021 14:27:36 +0100 Subject: [PATCH 07/14] Fix shapes in gradient tests --- tests/emukit/models/test_gpy_model_wrappers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/emukit/models/test_gpy_model_wrappers.py b/tests/emukit/models/test_gpy_model_wrappers.py index ca31fd58..375e9f08 100644 --- a/tests/emukit/models/test_gpy_model_wrappers.py +++ b/tests/emukit/models/test_gpy_model_wrappers.py @@ -32,15 +32,15 @@ def test_joint_prediction_gradients(gpy_model, test_data): mean_dx_numerical = (mean_perturbed - mean) / epsilon cov_dx_numerical = (cov_perturbed - cov) / epsilon # Check that numerical approx. similar to true gradient - assert pytest.approx(mean_dx_numerical, abs=1e-8, rel=1e-3) == mean_dx[:, :, i, j] - assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-3) == cov_dx[:, :, i, j] + assert pytest.approx(mean_dx_numerical.ravel(), abs=1e-8, rel=1e-2) == mean_dx[:, i, j] + assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[:, :, i, j] def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data2): epsilon = 1e-5 cov = gpy_model.get_covariance_between_points(test_data, test_data2) # Get the gradients - cov_dx = gpy_model.get_covariance_between_points(test_data, test_data2) + cov_dx = gpy_model.get_covariance_between_points_gradients(test_data, test_data2) for i in range(test_data.shape[0]): # Iterate over each test point for j in range(test_data.shape[1]): # Iterate over each dimension @@ -50,4 +50,4 @@ def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data cov_perturbed = gpy_model.get_covariance_between_points(perturbed_input, test_data2) cov_dx_numerical = (cov_perturbed - cov) / epsilon # Check that numerical approx. similar to true gradient - assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-3) == cov_dx[:, :, i, j] + assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[:, :, i, j] From d6d6b0f140267519dcc33508df5c2bf75d01b628 Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Mon, 12 Apr 2021 15:10:05 +0100 Subject: [PATCH 08/14] Rewrite the covariance gradient calculation code --- emukit/model_wrappers/gpy_model_wrappers.py | 74 +++++++++---------- .../emukit/models/test_gpy_model_wrappers.py | 4 +- 2 files changed, 36 insertions(+), 42 deletions(-) diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py index 80fc2f9c..69588e17 100644 --- a/emukit/model_wrappers/gpy_model_wrappers.py +++ b/emukit/model_wrappers/gpy_model_wrappers.py @@ -61,19 +61,6 @@ def get_joint_prediction_gradients(self, X: np.ndarray) -> Tuple[np.ndarray, np. dvariance_dx = dSigma(X, self.model.X, self.model.kern, self.model.posterior.woodbury_inv) return dmean_dx, dvariance_dx - def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray: - """ - Computes and returns model gradients of the covariance between outputs at points X1 and X2 with respect - to X1. - - :param X1: points to compute gradients at, nd array of shape (q1, d) - :param X2: points for the covariance of which to compute the gradient, nd array of shape (q2, d) - :return: gradient of the covariance matrix of shape (q1, q2) between outputs at X1 and X2 - (return shape is (q1, q2, q1, d)). - """ - dcov_dx1 = dCov(X1, X2, self.model.X, self.model.kern, self.model.posterior.woodbury_inv) - return dcov_dx1 - def set_data(self, X: np.ndarray, Y: np.ndarray) -> None: """ Sets training data in model @@ -120,6 +107,40 @@ def get_covariance_between_points(self, X1: np.ndarray, X2: np.ndarray) -> np.nd """ return self.model.posterior_covariance_between_points(X1, X2) + def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray: + """ + Compute the derivative of the posterior covariance matrix between prediction at inputs x1 and x2 + with respect to x1. + + :param x1: Prediction inputs of shape (q1, d) + :param x2: Prediction inputs of shape (q2, d) + :param x_train: Training inputs of shape (n_train, d) + :param kern: Covariance of the GP model + :param w_inv: Woodbury inverse of the posterior fit of the GP + :return: nd array of shape (q1, q2, d) representing the gradient of the posterior covariance between x1 and x2 + with respect to x1. res[i, j, k] is the gradient of Cov(y1[i], y2[j]) with respect to x1[i, k] + """ + # Get the relevent shapes + q1, q2, input_dim, n_train = X1.shape[0], X2.shape[0], X1.shape[1], self.model.X.shape[0] + # Instatiate an array to hold gradients of prior covariance between outputs at X1 and X_train + cov_X1_Xtrain_grad = np.zeros((input_dim, q1, n_train)) + # Instatiate an array to hold gradients of prior covariance between outputs at X1 and X2 + cov_X1_X2_grad = np.zeros((input_dim, q1, q2)) + # Calculate the gradient wrt. X1 of these prior covariances. GPy API allows for doing so + # only one dimension at a time, hence need to iterate over all input dimensions + for i in range(input_dim): + # Calculate the gradient wrt. X1 of the prior covariance between X1 and X_train + cov_X1_Xtrain_grad[i, :, :] = self.model.kern.dK_dX(X1, self.model.X, i) + # Calculate the gradient wrt. X1 of the prior covariance between X1 and X2 + cov_X1_X2_grad[i, :, :] = self.model.kern.dK_dX(X1, X2, i) + + # Get the prior covariance between outputs at x_train and X2 + cov_Xtrain_X2 = self.model.kern.K(self.model.X, X2) + # Calculate the gradient of the posterior covariance between outputs at X1 and X2 + cov_grad = cov_X1_X2_grad - cov_X1_Xtrain_grad @ self.model.posterior.woodbury_inv @ cov_Xtrain_X2 + return cov_grad.transpose((1, 2, 0)) + + @property def X(self) -> np.ndarray: """ @@ -223,33 +244,6 @@ def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np. return dmu -def dCov(x1: np.ndarray, x2: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np.ndarray) -> np.ndarray: - """ - Compute the derivative of the posterior covariance matrix between prediction inputs x1 and x2 - (of shape (q1, q2)) with respect to x1 - - :param x1: Prediction inputs of shape (q1, d) - :param x2: Prediction inputs of shape (q2, d) - :param x_train: Training inputs of shape (n, d) - :param kern: Covariance of the GP model - :param w_inv: Woodbury inverse of the posterior fit of the GP - :return: nd array of shape (q1, q2, q1, d) representing the gradient of the posterior covariance between x1 and x2, - where res[:, :, i, j] is the gradient of the covariance between outputs at x1 and x2 with respect to x1[i, j] - """ - q1, q2, d, n = x1.shape[0], x2.shape[0], x1.shape[1], x_train.shape[0] - dkx1X_dx = np.zeros((d, q1*q1, n)) - dkx1x2_dx = np.zeros((d, q1*q1, q2)) - for i in range(d): - dkx1X_dx[i, ::q1 + 1, :] = kern.dK_dX(x1, x_train, i) - dkx1x2_dx[i, ::q1 + 1, :] = kern.dK_dX(x1, x2, i) - dkx1X_dx = dkx1X_dx.reshape((d, q1, q1, n)) - dkx1x2_dx = dkx1x2_dx.reshape((d, q1, q1, q2)) - - K_Xx2 = kern.K(x_train, x2) - dcov = dkx1x2_dx - dkx1X_dx @ w_inv @ K_Xx2 - return dcov.transpose((2, 3, 1, 0)) - - class GPyMultiOutputWrapper(IModel, IDifferentiable, ICalculateVarianceReduction, IEntropySearchModel): """ A wrapper around GPy multi-output models. diff --git a/tests/emukit/models/test_gpy_model_wrappers.py b/tests/emukit/models/test_gpy_model_wrappers.py index 375e9f08..cce5207f 100644 --- a/tests/emukit/models/test_gpy_model_wrappers.py +++ b/tests/emukit/models/test_gpy_model_wrappers.py @@ -48,6 +48,6 @@ def test_get_covariance_between_points_gradients(gpy_model, test_data, test_data perturbed_input = test_data.copy() perturbed_input[i, j] += epsilon cov_perturbed = gpy_model.get_covariance_between_points(perturbed_input, test_data2) - cov_dx_numerical = (cov_perturbed - cov) / epsilon + cov_dx_numerical = (cov_perturbed[i] - cov[i]) / epsilon # Check that numerical approx. similar to true gradient - assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[:, :, i, j] + assert pytest.approx(cov_dx_numerical, abs=1e-8, rel=1e-2) == cov_dx[i, :, j] From bcf4416f0755b779ea7352bef32bec13afb710e1 Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Sat, 1 Jan 2022 13:26:21 +0100 Subject: [PATCH 09/14] Fix typos and remove redundant args in doc-strings --- emukit/model_wrappers/gpy_model_wrappers.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py index 6f7a14e3..dba0de69 100644 --- a/emukit/model_wrappers/gpy_model_wrappers.py +++ b/emukit/model_wrappers/gpy_model_wrappers.py @@ -123,15 +123,13 @@ def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray Compute the derivative of the posterior covariance matrix between prediction at inputs x1 and x2 with respect to x1. - :param x1: Prediction inputs of shape (q1, d) - :param x2: Prediction inputs of shape (q2, d) - :param x_train: Training inputs of shape (n_train, d) - :param kern: Covariance of the GP model - :param w_inv: Woodbury inverse of the posterior fit of the GP - :return: nd array of shape (q1, q2, d) representing the gradient of the posterior covariance between x1 and x2 - with respect to x1. res[i, j, k] is the gradient of Cov(y1[i], y2[j]) with respect to x1[i, k] - """ - # Get the relevent shapes + :param X1: Prediction inputs of shape (q1, d) + :param X2: Prediction inputs of shape (q2, d) + :return: nd array of shape (q1, q2, d) representing the gradient of the posterior covariance + between x1 and x2 with respect to x1. res[i, j, k] is the gradient of Cov(y1[i], y2[j]) + with respect to x1[i, k] + """ + # Get the relevant shapes q1, q2, input_dim, n_train = X1.shape[0], X2.shape[0], X1.shape[1], self.model.X.shape[0] # Instatiate an array to hold gradients of prior covariance between outputs at X1 and X_train cov_X1_Xtrain_grad = np.zeros((input_dim, q1, n_train)) From 26960ee65bedba801af849a52ddd778bce196efa Mon Sep 17 00:00:00 2001 From: Bruno Kacper Mlodozeniec Date: Sat, 1 Jan 2022 13:26:36 +0100 Subject: [PATCH 10/14] Fix typo in emukit/model_wrappers/gpy_model_wrappers.py Co-authored-by: Andrei Paleyes --- emukit/model_wrappers/gpy_model_wrappers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py index dba0de69..be19750a 100644 --- a/emukit/model_wrappers/gpy_model_wrappers.py +++ b/emukit/model_wrappers/gpy_model_wrappers.py @@ -133,7 +133,7 @@ def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray q1, q2, input_dim, n_train = X1.shape[0], X2.shape[0], X1.shape[1], self.model.X.shape[0] # Instatiate an array to hold gradients of prior covariance between outputs at X1 and X_train cov_X1_Xtrain_grad = np.zeros((input_dim, q1, n_train)) - # Instatiate an array to hold gradients of prior covariance between outputs at X1 and X2 + # Instantiate an array to hold gradients of prior covariance between outputs at X1 and X2 cov_X1_X2_grad = np.zeros((input_dim, q1, q2)) # Calculate the gradient wrt. X1 of these prior covariances. GPy API allows for doing so # only one dimension at a time, hence need to iterate over all input dimensions From 2bb36d21bf900f14dc6aff24bf3b893d7143cd9f Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Sat, 1 Jan 2022 13:39:13 +0100 Subject: [PATCH 11/14] Rename variable names to be more informative and verbose in dSigma() --- emukit/model_wrappers/gpy_model_wrappers.py | 24 +++++++++++---------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py index dba0de69..5d57727b 100644 --- a/emukit/model_wrappers/gpy_model_wrappers.py +++ b/emukit/model_wrappers/gpy_model_wrappers.py @@ -212,22 +212,24 @@ def dSigma(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np :return: Gradient of the posterior covariance of shape (q, q, q, d) """ q, d, n = x_predict.shape[0], x_predict.shape[1], x_train.shape[0] - # Tensor for the gradients of (q, n) covariance matrix between x_predict and x_train with respect to - # x_predict (of shape (q, d)) - dkxX_dx = np.zeros((d, q*q, n)) + # Tensor for the gradients of (q, n) cross-covariance matrix between x_predict and x_train with respect to + # x_predict (of shape (q, d)): + d_cross_cov_xpredict_xtrain_dx = np.zeros((d, q*q, n)) # Tensor for the gradients of full covariance matrix at points x_predict (of shape (q, q) with respect to # x_predict (of shape (q, d)) - dkxx_dx = np.zeros((d, q*q, q)) + d_cov_xpredict_dx = np.zeros((d, q*q, q)) for i in range(d): - dkxX_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_train, i) - dkxx_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_predict, i) - dkxX_dx = dkxX_dx.reshape((d, q, q, n)) - dkxx_dx = dkxx_dx.reshape((d, q, q, q)) - dkxx_dx += dkxx_dx.transpose((0, 1, 3, 2)) - dkxx_dx.reshape((d, q, -1))[:, :, ::q + 1] = 0. + # Fill d_cross_cov_xpredict_xtrain_dx such that entry [i, j] is the derivative of the cross-covariance + # between x_predict and x_train (of shape (q, d)) with respect to scalar x_predict[j, i] + d_cross_cov_xpredict_xtrain_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_train, i) + d_cov_xpredict_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_predict, i) + d_cross_cov_xpredict_xtrain_dx = d_cross_cov_xpredict_xtrain_dx.reshape((d, q, q, n)) + d_cov_xpredict_dx = d_cov_xpredict_dx.reshape((d, q, q, q)) + d_cov_xpredict_dx += d_cov_xpredict_dx.transpose((0, 1, 3, 2)) + d_cov_xpredict_dx.reshape((d, q, -1))[:, :, ::q + 1] = 0. K = kern.K(x_predict, x_train) - dsigma = dkxx_dx - K @ w_inv @ dkxX_dx.transpose((0, 1, 3, 2)) - dkxX_dx @ w_inv @ K.T + dsigma = d_cov_xpredict_dx - K @ w_inv @ d_cross_cov_xpredict_xtrain_dx.transpose((0, 1, 3, 2)) - d_cross_cov_xpredict_xtrain_dx @ w_inv @ K.T return dsigma.transpose((2, 3, 1, 0)) From 9d52426408e58defbd7dbc443eee09b7fd1148a3 Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Sat, 1 Jan 2022 21:01:44 +0100 Subject: [PATCH 12/14] Add futher documentation to gradients of covariance calculations --- emukit/model_wrappers/gpy_model_wrappers.py | 24 +++++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py index 5d57727b..7151c249 100644 --- a/emukit/model_wrappers/gpy_model_wrappers.py +++ b/emukit/model_wrappers/gpy_model_wrappers.py @@ -209,7 +209,8 @@ def dSigma(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np :param x_train: Training inputs of shape (n, d) :param kern: Covariance of the GP model :param w_inv: Woodbury inverse of the posterior fit of the GP - :return: Gradient of the posterior covariance of shape (q, q, q, d) + :return: Gradient of the posterior covariance of shape (q, q, q, d). Here, res[i, j, k, l] is the derivative + of the [i, j]-th entry of the posterior covariance matrix with respect to x_predict[k, l] """ q, d, n = x_predict.shape[0], x_predict.shape[1], x_train.shape[0] # Tensor for the gradients of (q, n) cross-covariance matrix between x_predict and x_train with respect to @@ -219,9 +220,12 @@ def dSigma(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np # x_predict (of shape (q, d)) d_cov_xpredict_dx = np.zeros((d, q*q, q)) for i in range(d): - # Fill d_cross_cov_xpredict_xtrain_dx such that entry [i, j] is the derivative of the cross-covariance - # between x_predict and x_train (of shape (q, d)) with respect to scalar x_predict[j, i] + # Fill d_cross_cov_xpredict_xtrain_dx such that after reshaping to (d, q, q, n), entry [i, j] is + # the derivative of the cross-covariance between x_predict and x_train (of shape (q, n)) with respect + # to scalar x_predict[j, i] d_cross_cov_xpredict_xtrain_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_train, i) + # Fill d_cov_xpredict_dx such that after reshaping to (d, q, q, q), entry [i, j] is the derivative + # of the prior covariance at x_predict (of shape (q, q)) with respect to the scalar x_predict[j, i] d_cov_xpredict_dx[i, ::q + 1, :] = kern.dK_dX(x_predict, x_predict, i) d_cross_cov_xpredict_xtrain_dx = d_cross_cov_xpredict_xtrain_dx.reshape((d, q, q, n)) d_cov_xpredict_dx = d_cov_xpredict_dx.reshape((d, q, q, q)) @@ -229,7 +233,11 @@ def dSigma(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_inv: np d_cov_xpredict_dx.reshape((d, q, -1))[:, :, ::q + 1] = 0. K = kern.K(x_predict, x_train) - dsigma = d_cov_xpredict_dx - K @ w_inv @ d_cross_cov_xpredict_xtrain_dx.transpose((0, 1, 3, 2)) - d_cross_cov_xpredict_xtrain_dx @ w_inv @ K.T + dsigma = ( + d_cov_xpredict_dx + - K @ w_inv @ d_cross_cov_xpredict_xtrain_dx.transpose((0, 1, 3, 2)) + - d_cross_cov_xpredict_xtrain_dx @ w_inv @ K.T + ) return dsigma.transpose((2, 3, 1, 0)) @@ -244,12 +252,14 @@ def dmean(x_predict: np.ndarray, x_train: np.ndarray, kern: GPy.kern, w_vec: np. :return: Gradient of the posterior mean of shape (q, q, d) """ q, d, n = x_predict.shape[0], x_predict.shape[1], x_train.shape[0] - dkxX_dx = np.empty((q, n, d)) + # Tensor with derivative of the (prior) cross-covariance between x_predict and x_train with respect + # to x_predict + d_cross_cov_xpredict_xtrain_dx = np.empty((q, n, d)) dmu = np.zeros((q, q, d)) for i in range(d): - dkxX_dx[:, :, i] = kern.dK_dX(x_predict, x_train, i) + d_cross_cov_xpredict_xtrain_dx[:, :, i] = kern.dK_dX(x_predict, x_train, i) for j in range(q): - dmu[j, j, i] = (dkxX_dx[j, :, i][None, :] @ w_vec[:, None]).flatten() + dmu[j, j, i] = (d_cross_cov_xpredict_xtrain_dx[j, :, i][None, :] @ w_vec[:, None]).flatten() return dmu From c8b9f83da2958ddcb5e2c8c98e69273547f5789f Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Sat, 1 Jan 2022 21:13:50 +0100 Subject: [PATCH 13/14] Add an interface for differentiable cross-covariance models --- emukit/core/interfaces/models.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/emukit/core/interfaces/models.py b/emukit/core/interfaces/models.py index 0926e699..7f813d11 100644 --- a/emukit/core/interfaces/models.py +++ b/emukit/core/interfaces/models.py @@ -72,6 +72,35 @@ def get_joint_prediction_gradients(self, X: np.ndarray) -> Tuple[np.ndarray, np. raise NotImplementedError +class ICrossCovarianceDifferentiable: + def get_covariance_between_points(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray: + """ + Calculate posterior covariance between two sets of points. + + :param X1: An array of shape n_points1 x n_dimensions. This is the first argument of the + posterior covariance function. + :param X2: An array of shape n_points2 x n_dimensions. This is the second argument of the + posterior covariance function. + :return: An array of shape n_points1 x n_points2 of posterior covariances between X1 and X2. + Namely, [i, j]-th entry of the returned array will represent the posterior covariance + between i-th point in X1 and j-th point in X2. + """ + raise NotImplementedError + + def get_covariance_between_points_gradients(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray: + """ + Compute the derivative of the posterior covariance matrix between prediction at inputs x1 and x2 + with respect to x1. + + :param X1: Prediction inputs of shape (q1, d) + :param X2: Prediction inputs of shape (q2, d) + :return: nd array of shape (q1, q2, d) representing the gradient of the posterior covariance + between x1 and x2 with respect to x1. res[i, j, k] is the gradient of Cov(y1[i], y2[j]) + with respect to x1[i, k] + """ + raise NotImplementedError + + class IPriorHyperparameters: def generate_hyperparameters_samples(self, n_samples: int, n_burnin: int, subsample_interval: int, step_size: float, leapfrog_steps: int) -> np.ndarray: From f3453428e59dd7f2f66e6398980f59cabe5f4665 Mon Sep 17 00:00:00 2001 From: Bruno Mlodozeniec Date: Sat, 1 Jan 2022 21:17:12 +0100 Subject: [PATCH 14/14] Incorporate interface into GPyModel --- emukit/core/interfaces/__init__.py | 1 + emukit/model_wrappers/gpy_model_wrappers.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/emukit/core/interfaces/__init__.py b/emukit/core/interfaces/__init__.py index 4f0537ee..699e62e7 100644 --- a/emukit/core/interfaces/__init__.py +++ b/emukit/core/interfaces/__init__.py @@ -8,4 +8,5 @@ IPriorHyperparameters, # noqa: F401 IJointlyDifferentiable, # noqa: F401 IModelWithNoise, # noqa: F401 + ICrossCovarianceDifferentiable, # noqa: F401 ) diff --git a/emukit/model_wrappers/gpy_model_wrappers.py b/emukit/model_wrappers/gpy_model_wrappers.py index 7151c249..bf33cddd 100644 --- a/emukit/model_wrappers/gpy_model_wrappers.py +++ b/emukit/model_wrappers/gpy_model_wrappers.py @@ -7,13 +7,21 @@ import numpy as np import GPy -from ..core.interfaces import IModel, IDifferentiable, IJointlyDifferentiable, IPriorHyperparameters, IModelWithNoise +from ..core.interfaces import ( + IModel, + IDifferentiable, + IJointlyDifferentiable, + IPriorHyperparameters, + IModelWithNoise, + ICrossCovarianceDifferentiable, +) from ..experimental_design.interfaces import ICalculateVarianceReduction from ..bayesian_optimization.interfaces import IEntropySearchModel class GPyModelWrapper( - IModel, IDifferentiable, IJointlyDifferentiable, ICalculateVarianceReduction, IEntropySearchModel, IPriorHyperparameters, IModelWithNoise + IModel, IDifferentiable, IJointlyDifferentiable, ICrossCovarianceDifferentiable, ICalculateVarianceReduction, + IEntropySearchModel, IPriorHyperparameters, IModelWithNoise, ): """ This is a thin wrapper around GPy models to allow users to plug GPy models into Emukit