In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
import pandas as pd
from scipy.optimize import minimize
from capstone_library import *

# Hints
## High-dimensional Optimisation
High-dimensional black-box optimisation can be very difficult, so sticking to local solutions is not the worst idea here.

# Let's go!

Let's load the data.

In [2]:
X = np.load('initial_data/function_8/initial_inputs.npy')
y = np.load('initial_data/function_8/initial_outputs.npy')

In [3]:
# loading new data
new_queries = get_function_data_from_file('new_data/queries.txt', 8)
new_observ = get_function_data_from_file('new_data/observations.txt', 8)

In [4]:
# adding new_queries to X
new_queries = np.array(new_queries).reshape(-1, 8)
X = np.concatenate((X, new_queries), axis=0)

# adding new_observ to Y
new_observ = np.array(new_observ).reshape(-1)
y = np.concatenate((y, new_observ), axis=0)

## Visualizing the data and thinking of the problem

In [5]:
# visualising the data as a table
df = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))), columns=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'y'])
df.head(100)

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,y
0,0.604994,0.292215,0.908453,0.355506,0.201669,0.575338,0.310311,0.734281,7.398721
1,0.178007,0.566223,0.994862,0.210325,0.320153,0.707909,0.635384,0.107132,7.005227
2,0.009077,0.811626,0.52052,0.075687,0.265112,0.091652,0.592415,0.36732,8.459482
3,0.506028,0.65373,0.363411,0.177981,0.093728,0.197425,0.755827,0.292472,8.284008
4,0.359909,0.249076,0.495997,0.709215,0.114987,0.289207,0.557295,0.593882,8.606117
5,0.778818,0.003419,0.337983,0.519528,0.820907,0.537247,0.551347,0.660032,8.541748
6,0.908649,0.06225,0.23826,0.766604,0.132336,0.990244,0.688068,0.742496,7.327435
7,0.586371,0.880736,0.745021,0.546035,0.009649,0.748992,0.230907,0.097916,7.299872
8,0.761137,0.854672,0.382124,0.337352,0.689708,0.309853,0.63138,0.041956,7.957875
9,0.984933,0.699506,0.998885,0.180148,0.580143,0.231087,0.490827,0.313683,5.592193


In [6]:

# sort the data by the output, with the best value at the top
df = df.sort_values(by=['y'], ascending=False)
df.head(100)

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,y
51,0.19631,0.263867,0.168181,0.190897,0.579993,0.602231,0.280189,0.632524,9.914818
50,0.19631,0.263867,0.168181,0.190897,0.579993,0.602231,0.280189,0.632524,9.914818
52,0.161983,0.132279,0.12939,0.269199,0.516103,0.631162,0.310181,0.62385,9.895954
49,0.17672,0.191974,0.156996,0.196604,0.556229,0.823709,0.209507,0.677535,9.846826
40,0.210273,0.29017,0.134932,0.279054,0.43014,0.552179,0.318519,0.488237,9.838841
48,0.245435,0.0,0.085235,0.19517,0.252149,0.581288,0.334944,0.677033,9.733454
41,0.265549,0.200695,0.16959,0.358663,0.487994,0.645308,0.486508,0.854705,9.653925
46,0.062713,0.130847,0.292516,0.149428,0.465503,0.571193,0.528947,0.518116,9.639523
45,0.062713,0.130847,0.292516,0.149428,0.465503,0.571193,0.528947,0.518116,9.639523
44,0.062713,0.130847,0.292516,0.149428,0.465503,0.571193,0.528947,0.518116,9.639523


In [7]:
# Define the acquisition function to be optimized (negative UCB in this case)
def negative_acquisition(X_new, gpr, kappa):
    X_new = X_new.reshape(-1, len(X[0]))
    mean, std = gpr.predict(X_new, return_std=True)
    ucb = mean + kappa * std
    return -ucb  # we want to maximize UCB, so minimize negative UCB

def get_next_query(kappa, X, y):
    # Initialize and fit the gpr
    gpr = GaussianProcessRegressor()
    gpr.fit(X, y)

    # Define the bounds of the optimization problem, and a random initial point
    bounds = [(0, 0.999999), (0, 0.999999), (0, 0.999999), (0, 0.999999), (0, 0.999999), (0, 0.999999), (0, 0.999999), (0, 0.999999)]
    x0 = np.random.uniform(0, 1, size=8)  # random initialization

    # Perform the optimization using L-BFGS
    result = minimize(negative_acquisition, x0=x0, args=(gpr, kappa), bounds=bounds, method='L-BFGS-B')

    # The next query point is the one that maximizes the acquisition function
    next_query = result.x
    return next_query

In [8]:
next_query = get_next_query(1, X, y)
print(format_query(next_query))

0.209184-0.268544-0.120294-0.164296-0.549751-0.711747-0.342668-0.601690
