In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
import pandas as pd
from scipy.optimize import minimize
from capstone_library import *

# Hints
## Cake and Stuff
Time to get cooking! You are optimising a cake recipe. There are five ingredients. The outputs correspond to the sum of different objectives: flavor, consistency, calories, waste and cost. Each objective receives negative points by our expert taster. You want this sum to be as close to zero as possible!

# Let's go!

Let's load the data.

In [2]:
X = np.load('initial_data/function_6/initial_inputs.npy')
y = np.load('initial_data/function_6/initial_outputs.npy')

In [3]:
# loading new data
new_queries = get_function_data_from_file('new_data/queries.txt', 6)
new_observ = get_function_data_from_file('new_data/observations.txt', 6)

In [4]:
# adding new_queries to X
new_queries = np.array(new_queries).reshape(-1, 5)
X = np.concatenate((X, new_queries), axis=0)

# adding new_observ to Y
new_observ = np.array(new_observ).reshape(-1)
y = np.concatenate((y, new_observ), axis=0)

## Visualizing the data and thinking of the problem

In [5]:
# visualising the data as a table
df = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))), columns=['x1', 'x2', 'x3', 'x4', 'x5', 'y'])
df.head(100)

Unnamed: 0,x1,x2,x3,x4,x5,y
0,0.728186,0.154693,0.732552,0.693997,0.056401,-0.714265
1,0.242384,0.8441,0.577809,0.679021,0.501953,-1.209955
2,0.729523,0.748106,0.679775,0.356552,0.671054,-1.6722
3,0.77062,0.114404,0.04678,0.648324,0.273549,-1.536058
4,0.618812,0.331802,0.187288,0.756238,0.328835,-0.829237
5,0.784958,0.910682,0.70812,0.959225,0.004911,-1.247049
6,0.145111,0.896685,0.896322,0.726272,0.236272,-1.233786
7,0.945069,0.288459,0.978806,0.961656,0.598016,-1.694343
8,0.12572,0.862725,0.028544,0.246605,0.751206,-2.57117
9,0.757594,0.355831,0.016523,0.434207,0.112433,-1.309116


In [6]:


# sort the data by the output, with the best value at the top
df = df.sort_values(by=['y'], ascending=False)
df.head(100)

Unnamed: 0,x1,x2,x3,x4,x5,y
24,0.399365,0.394971,0.473561,0.739051,0.090497,-0.20156
25,0.399365,0.394971,0.473561,0.739051,0.090497,-0.275589
26,0.399365,0.394971,0.473561,0.739051,0.090497,-0.28405
23,0.399365,0.394971,0.473561,0.739051,0.090497,-0.295477
22,0.399365,0.394971,0.473561,0.739051,0.090497,-0.336811
28,0.360105,0.226128,0.566352,0.535602,0.170006,-0.530082
0,0.728186,0.154693,0.732552,0.693997,0.056401,-0.714265
4,0.618812,0.331802,0.187288,0.756238,0.328835,-0.829237
21,0.355092,0.0,0.861367,0.999999,0.264773,-0.906339
17,0.78288,0.536336,0.443284,0.8597,0.010326,-0.935757


In [7]:
# Define the acquisition function to be optimized (negative UCB in this case)
def negative_acquisition(X_new, gpr, kappa):
    X_new = X_new.reshape(-1, len(X[0]))
    mean, std = gpr.predict(X_new, return_std=True)
    ucb = mean + kappa * std
    return -ucb  # we want to maximize UCB, so minimize negative UCB

def get_next_query(kappa, X, y):
    # Initialize and fit the gpr
    gpr = GaussianProcessRegressor()
    gpr.fit(X, y)

    # Define the bounds of the optimization problem, and a random initial point
    bounds = [(0, 0.999999), (0, 0.999999), (0, 0.999999), (0, 0.999999), (0, 0.999999)]
    x0 = np.random.uniform(0, 1, size=5)  # random initialization

    # Perform the optimization using L-BFGS
    result = minimize(negative_acquisition, x0=x0, args=(gpr, kappa), bounds=bounds, method='L-BFGS-B')

    # The next query point is the one that maximizes the acquisition function
    next_query = result.x
    return next_query

In [8]:
next_query = get_next_query(1, X, y)
print(format_query(next_query))

0.153811-0.402302-0.450219-0.862514-0.174621
