In [None]:
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Understanding Bayesian Optimization

[Bayesian optimization](https://arxiv.org/abs/1012.2599) is a powerful strategy for **finding the extrema of objective functions** that are expensive to evaluate. It is particularly useful when these evaluations are costly, when one does not have access to derivatives, or when the problem is non-convex.

- **Objective function**

- **Surrogate function**: Bayesian approximation of the objective function that can be sampled efficiently.

- **Acquisition function**: Technique by which the posterior is used to select the next sample from the search space.

The **Bayesian Optimization algorithm** can be summarized as follows.

- 1. Select a sample by optimizing the Acquisition function.

- 2. Evaluate the sample with the Objective function.

- 3. Update data and, in turn, the Surrogate function.

- 4. Go to 1.

Based on URL: [Step-by-Step Guide to Bayesian Optimization: A Python-based Approach](https://medium.com/@okanyenigun/step-by-step-guide-to-bayesian-optimization-a-python-based-approach-3558985c6818)

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

matplotlib.rcParams['figure.figsize'] = (6, 4)
np.random.seed(42)

def plot_bbf(x, y, title):
    plt.plot(x, y, color='black', linestyle='dotted', label='Black Box Function')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title(title)

# 1. Black box function = Objective function

In [None]:
def black_box_function(x):
    y = np.sin(x) + np.cos(2*x)
    return y

# range of x values
x_range = np.linspace(-2*np.pi, 2*np.pi, 100)

# output for each x value
black_box_output = black_box_function(x_range)

# plot
plot_bbf(x_range, black_box_output, 'Black Box Function Output')

# 2. Initial Sample - just a few expensive points

In [None]:
# random x values for sampling
num_samples = 10
sample_x = np.random.choice(x_range, size=num_samples)

# output for each sampled x value
sample_y = black_box_function(sample_x)

# plot
plot_bbf(x_range, black_box_function(x_range), 'Sampled Points')
plt.scatter(sample_x, sample_y, color='red', label='Samples')
plt.legend()

# 3. Modeling a surrogate function by Gaussian Process with Initial Sample

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF

# Gaussian process regressor with an RBF kernel
kernel = RBF(length_scale=1.0)
gp_model = GaussianProcessRegressor(kernel=kernel)

# Fit the Gaussian process model to the sampled points
gp_model.fit(sample_x.reshape(-1, 1), sample_y)

# Generate predictions using the Gaussian process model
y_pred, y_std = gp_model.predict(x_range.reshape(-1, 1), return_std=True)

# Plot 
plot_bbf(x_range, black_box_function(x_range), 'Black Box Function with Gaussian Process Surrogate Model')
plt.scatter(sample_x, sample_y, color='red', label='Samples')
plt.plot(x_range, y_pred, color='blue', label='Gaussian Process')
plt.fill_between(x_range, y_pred - 2*y_std, y_pred + 2*y_std, color='orange', alpha=0.2)
plt.legend()

# 3. Acquisition Function
Acquisition functions determine the next point or set of points to evaluate in the search space. 

### 3.1 Expected Improvement (EI) 
selects points that have the potential to improve upon the best-observed value. 

In [None]:
from scipy.stats import norm

def expected_improvement(x, gp_model, best_y):
    y_pred, y_std = gp_model.predict(x.reshape(-1, 1), return_std=True)
    z = (y_pred - best_y) / y_std
    ei = (y_pred - best_y) * norm.cdf(z) + y_std * norm.pdf(z)
    return ei

# Determine the point with the highest observed function value
best_idx = np.argmax(sample_y)
best_x = sample_x[best_idx]
best_y = sample_y[best_idx]

ei = expected_improvement(x_range, gp_model, best_y)

# Plot the expected improvement
plt.plot(x_range, ei, color='green', label='Expected Improvement')
plt.xlabel('x')
plt.ylabel('Expected Improvement')
plt.title('Expected Improvement')
plt.legend()

### 3.2 Upper Confidence Bound (UCB) 

trades off exploration and exploitation by balancing the mean prediction of the surrogate model and an exploration term proportional to the uncertainty. It selects points that offer a good balance between predicted high values and exploration of uncertain regions.

In [None]:
def upper_confidence_bound(x, gp_model, beta):
    y_pred, y_std = gp_model.predict(x.reshape(-1, 1), return_std=True)
    ucb = y_pred + beta * y_std
    return ucb

# Set the value of beta for the UCB acquisition function
beta = 2.0

# UCB
ucb = upper_confidence_bound(x_range, gp_model, beta)

# plot
plt.plot(x_range, ucb, color='green', label='UCB')
plt.xlabel('x')
plt.ylabel('UCB')
plt.title('UCB')
plt.legend()

### 3.3 Probability of Improvement (PI) 
estimates the probability that a point will improve upon the current best value. It considers the difference between the mean prediction and the current best value, taking into account the uncertainty in the surrogate model.

In [None]:
def probability_of_improvement(x, gp_model, best_y):
    y_pred, y_std = gp_model.predict(x.reshape(-1, 1), return_std=True)
    z = (y_pred - best_y) / y_std
    pi = norm.cdf(z)
    return pi

# Probability of Improvement
pi = probability_of_improvement(x_range, gp_model, best_y)

# plot
plt.plot(x_range, pi, color='green', label='PI')
plt.xlabel('x')
plt.ylabel('PI')
plt.title('PI')
plt.legend()

# 4. Keep learning and improving - in every iteration

In [None]:
num_iterations = 5

for i in range(num_iterations):
    # Fit the Gaussian process model to the sampled points
    gp_model.fit(sample_x.reshape(-1, 1), sample_y)

    # Determine the point with the highest observed function value
    best_idx = np.argmax(sample_y)
    best_x = sample_x[best_idx]
    best_y = sample_y[best_idx]

    # Set the value of beta for the UCB acquisition function
    beta = 2.0

    # Generate the Upper Confidence Bound (UCB) using the Gaussian process model
    ucb = upper_confidence_bound(x_range, gp_model, beta)

    # Plot the black box function, surrogate function, previous points, and new points
    title = f"Iteration #{i+1}"
    plot_bbf(x_range, black_box_function(x_range), title)
    plt.plot(x_range, ucb, color='blue', label='Surrogate Function')
    plt.scatter(sample_x, sample_y, color='red', label='Previous Points')
    if i < num_iterations - 1:
        new_x = x_range[np.argmax(ucb)]  # Select the next point based on UCB
        new_y = black_box_function(new_x)
        sample_x = np.append(sample_x, new_x)
        sample_y = np.append(sample_y, new_y)
        plt.scatter(new_x, new_y, color='green', label='New Points')
    plt.legend()
    plt.show()