In [458]:
# Implementing freeze-thaw bayesian optimization with two-level Gaussian processes
# A global GP models the asymptotic mean of the learning curves of each HP-config
# Local GPs model the learning curves of each HP-config

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, Kernel, Sum, WhiteKernel
import numpy as np
import matplotlib.pyplot as plt
from ft_testfunction import global_xD, local_xD
import scipy as sc

# Hyper-Hyperparameters of Freeze-Thaw Bayesian Optimization
ALPHA,BETA = 1,0.5
NOISE = 0.1
B_OLD=10 # 10
B_NEW=3 # 3
MAX_EPOCHS=100
EPOCH_STEP=1
N_SAMPLES_MC=1000 # number of samples for Monte Carlo integration
N_FANT=5 # 5 # number of observations we fantasize for each point in the basket
N_INIT_CONFIGS=10 # number of random initializations for the optimization
N_INIT_EPOCHS=5 # number of epochs trained for initial configs
INFERRED_MEAN = 0.8 # inferred mean of the global GP
MATER_NU = 2.5 # Matern kernel parameter
EI_N_SAMPLES = 1000 #10000 # number of samples for EI optimization
PRED_EPOCH = 1 # how many epochs to predict for

# Meta-Parameters of the task
OBS_MIN = 0 # minimal loss value
OBS_MAX = 1 # maximal loss value


In [459]:
'''
Equation 3: Expected Improvement-formula
Equation 4: Entropy Search-formula
Equation 5&6: Exponential Decay-kernel for Gaussian Process
Equation 12,13,17 & 18: Posterior distribution of global Gaussian Process
Equation 14 & 19: Posterior predictive distribution of global Gaussian Process
Equation 15 & 20: Posterior predictive distribution of a local Gaussian Process with existing oberservations of this HP-config
Equation 16 & 21: Posterior predicitve distribution of a local Gaussian Process without existing oberservations of this HP-config
'''


'\nEquation 3: Expected Improvement-formula\nEquation 4: Entropy Search-formula\nEquation 5&6: Exponential Decay-kernel for Gaussian Process\nEquation 12,13,17 & 18: Posterior distribution of global Gaussian Process\nEquation 14 & 19: Posterior predictive distribution of global Gaussian Process\nEquation 15 & 20: Posterior predictive distribution of a local Gaussian Process with existing oberservations of this HP-config\nEquation 16 & 21: Posterior predicitve distribution of a local Gaussian Process without existing oberservations of this HP-config\n'

In [460]:
# Definition of exponential-decay kernel for local GPs
class ExponentialDecayNoiseKernel(Kernel):
    def __init__(self, alpha=1.0,beta=0.5,noise=0.1):
        self.beta=beta
        self.alpha=alpha
        self.noise = noise
    def __call__(self, X, Y=None,eval_gradient=False):
        if Y is None:
            Y = X
        # if eval_gradient:
        #     return (self.beta**self.alpha)/((X.flatten()[:,None]+Y.flatten()[None,:])+self.beta)**self.alpha,np.identity(X.shape[0])
        X=np.array(X)
        Y=np.array(Y)
        return ((self.beta**self.alpha)/((X[None,:]+Y[:,None])+self.beta)**self.alpha + self.noise*np.where(X[None,:]-Y[:,None] == 0, 1, 0)).T
    def diag(self, X):
        return np.diag(self(X))
    def is_stationary(self):
        return False

In [461]:
# Start by observing N_INIT_CONFIGS random configurations for N_INIT_EPOCHS epoch each
bounds={'HP1':(1.,5.),'HP2':(1.,5.)}
observed_configs_dicts={}
observed_configs_list=[]
for i in range(N_INIT_CONFIGS):
    new_config=np.empty(0)
    for key in bounds.keys():
        new_config=np.append(new_config,np.round(np.random.uniform(bounds[key][0],bounds[key][1]),2))
    # Observe the new configuration for N_INIT_EPOCHS epochs
    f_space = np.linspace(1,N_INIT_EPOCHS,N_INIT_EPOCHS)
    experimental_data=local_xD(np.array(new_config),f_space,noise=0.01)
    observed_configs_dicts['_'.join([str(config) for config in new_config])]=(f_space,experimental_data)


    observed_configs_list.append(new_config)
observed_configs_list=np.array(observed_configs_list)



print(observed_configs_dicts)
print(observed_configs_list)



{'3.51_4.47': (array([1., 2., 3., 4., 5.]), array([0.93, 0.87, 0.81, 0.75, 0.71])), '4.43_3.91': (array([1., 2., 3., 4., 5.]), array([0.95, 0.89, 0.87, 0.82, 0.77])), '1.24_3.12': (array([1., 2., 3., 4., 5.]), array([0.93, 0.9 , 0.86, 0.81, 0.77])), '3.89_4.91': (array([1., 2., 3., 4., 5.]), array([0.92, 0.87, 0.82, 0.77, 0.72])), '3.99_1.65': (array([1., 2., 3., 4., 5.]), array([0.93, 0.87, 0.83, 0.78, 0.73])), '1.84_2.32': (array([1., 2., 3., 4., 5.]), array([0.94, 0.87, 0.83, 0.79, 0.75])), '2.7_2.27': (array([1., 2., 3., 4., 5.]), array([0.95, 0.85, 0.81, 0.76, 0.68])), '2.2_3.34': (array([1., 2., 3., 4., 5.]), array([0.93, 0.86, 0.81, 0.77, 0.72])), '4.6_2.24': (array([1., 2., 3., 4., 5.]), array([0.96, 0.92, 0.87, 0.84, 0.82])), '3.02_1.94': (array([1., 2., 3., 4., 5.]), array([0.91, 0.86, 0.81, 0.76, 0.7 ]))}
[[3.51 4.47]
 [4.43 3.91]
 [1.24 3.12]
 [3.89 4.91]
 [3.99 1.65]
 [1.84 2.32]
 [2.7  2.27]
 [2.2  3.34]
 [4.6  2.24]
 [3.02 1.94]]


In [462]:
# from botorch.models import SingleTaskGP
# from gpytorch.kernels import MaternKernel, ScaleKernel
# from gpytorch.means import ConstantMean
# import torch
#MaternKernel(nu=2.5)
# print(torch.from_numpy(observed_configs_list))


# Define the kernel for the global GP
kernel_global = Matern(nu=MATER_NU)
kernel_local = ExponentialDecayNoiseKernel(alpha=ALPHA,beta=BETA,noise=NOISE)

def compute_k_x(x,new_x,kernel):
    k_x = kernel(x)
    k_x_star = kernel(x,new_x)
    k_star_star = kernel(new_x)
    return k_x,k_x_star,k_star_star

def compute_k_t_n(x,new_x,kernel):
    k_t=kernel(x)
    k_t_star=kernel(x,new_x)
    k_star_star=kernel(new_x)
    return k_t,k_t_star,k_star_star

def compute_k_x_star_star(new_x,kernel):
    return kernel(new_x)

def compute_k_t(x,x_dict):
    k_ts=[]
    for x_n in x:
        k_ts.append(kernel_local(x_dict['_'.join([str(c) for c in x_n])][0]))
    return sc.linalg.block_diag(*k_ts)

def compute_o(config_list,config_dict):
    o=[]
    for config in config_list:
        observations=config_dict['_'.join([str(c) for c in config])][1]
        o=sc.linalg.block_diag(*([o,np.ones((len(observations),1))]))
    return o[1:]

def compute_lambda(o,k_t_inv):
    return o.T@k_t_inv@o

def compute_gamma(o,k_t_inv,y,m):
    return o.T@k_t_inv@(y-o@m)

def constant_mean(x):
    return INFERRED_MEAN*np.ones(x.shape[0])

def compute_y_vector(config_list,config_dict):
    y_vec=np.empty(0)
    for config in config_list:
        observations=config_dict['_'.join([str(c) for c in config])][1]
        y_vec=np.append(y_vec,observations)
    return y_vec

def compute_c(k_x_inv,lambd):
    return np.linalg.inv(k_x_inv+lambd)

def compute_omega(k_t_n_star,k_t_n_inv):
    obs_steps=k_t_n_inv.shape[0]
    predict_steps=k_t_n_star.shape[1]
    return np.ones(predict_steps)-k_t_n_star.T@k_t_n_inv@np.ones(obs_steps)

def compute_mu(m,c,gamma):
    return (m+c@gamma)

# Equation 14/19:
def compute_mu_x_star(m,k_x_star,k_x_inv,mu,means_vec):
    return m+k_x_star.T@k_x_inv@(mu-means_vec)
def compute_sigma_x_star_star(k_x_star_star,k_x_star,k_x,lambd_inv):
    return k_x_star_star-k_x_star.T@np.linalg.inv(k_x+lambd_inv)@k_x_star

# Equation 16/21:
def compute_mu_n_star_new(mu_n,x_new):
    return mu_n*np.ones(x_new.shape[0])
def compute_sigma_n_star_new(k_t_star_star,sigma_star_star):
    return k_t_star_star+np.identity(k_t_star_star.shape[0])*sigma_star_star

# Equation 15/20:
def compute_mu_n_star_ex(k_t_n_star,k_t_n_inv,y_n,omega_n,mu_n):
    return k_t_n_star.T@k_t_n_inv@y_n+(omega_n*mu_n)
def compute_sigma_n_star_ex(k_t_n_star_star,k_t_n_star,k_t_n_inv,omega_n,c_nn):
    return k_t_n_star_star-k_t_n_star.T@k_t_n_inv@k_t_n_star+omega_n@(c_nn*omega_n.T)

def compute_entropy(mu_vector,var_vector,n_samples):
    var_mat=np.diag(var_vector)
    mc_bins=np.zeros(mu_vector.shape[0])
    for i in range(n_samples):
        global_gp_samples = np.random.multivariate_normal(mu_vector,var_mat)
        mc_bins[np.argmin(global_gp_samples)]+=1/n_samples
    return sc.stats.entropy(mc_bins)

In [463]:
new_x = np.array([[observed_configs_list[0,0],2],[3,2],[1,2]]) # Placeholer
print(observed_configs_list)
print(new_x)
k_x_n,k_x_star,k_x_star_star = compute_k_x(observed_configs_list,new_x,kernel_global)
k_x_inv = np.linalg.inv(k_x_n)
# print(f"Kx:\n{k_x}\n Kx*:\n{k_x_star}\n Kx**:\n{k_x_star_star}")

new_x_n=np.array([100]) # Placeholer
curve_n=observed_configs_dicts["_".join([str(x) for x in observed_configs_list[0]])]
print(observed_configs_list[0])
# print(curve)
# print(new_x_n)
k_t_n,k_t_n_star,k_t_n_star_star=compute_k_t_n(curve_n[0],new_x_n,kernel_local)
k_t_n_inv=np.linalg.inv(k_t_n)
# print(f"Ktn:\n{k_t_n}\n Ktn*:\n{k_t_n_star}\n Ktn**:\n{k_t_n_star_star}")

k_t=compute_k_t(observed_configs_list,observed_configs_dicts) # Placeholer
k_t_inv=np.linalg.inv(k_t)
# print(f"Kt:\n {k_t}")

o = compute_o(observed_configs_list,observed_configs_dicts)
# print(f"O:\n{o}")

lambd = compute_lambda(o,k_t_inv)
lambd_inv = np.linalg.inv(lambd)
# print(f"Lambda:\n{lambd}")

means_vec = constant_mean(observed_configs_list)
# print(f"Means:\n{means_vec}")

y_vec = compute_y_vector(observed_configs_list,observed_configs_dicts)
# print(f"Y:\n{y_vec}")

gamma = compute_gamma(o,k_t_inv,y_vec,means_vec)
# print(f"Gamma:\n{gamma}")

c = compute_c(k_x_inv,lambd)
# print(f"C:\n{c}")

mu_global = compute_mu(means_vec,c,gamma)
# print(f"Mu:\n{mu_global}")

omega_n = compute_omega(k_t_n_star,k_t_n_inv)
# print(f"Omega n:\n{omega_n}")

mu = compute_mu_x_star(constant_mean(new_x),k_x_star,k_x_inv,mu_global,means_vec)
print(f"μx*:\n{mu}")

var = compute_sigma_x_star_star(k_x_star_star,k_x_star,k_x_n,lambd_inv)
print(f"Σx**:\n{var}")

mu_n_star_ex = compute_mu_n_star_ex(k_t_n_star,k_t_n_inv,curve_n[1],omega_n,mu_global[0])
print(f"μn* (existing):\n{mu_n_star_ex}")

sigma_n_star_ex = compute_sigma_n_star_ex(k_t_n_star_star,k_t_n_star,k_t_n_inv,omega_n,c[0,0])
print(f"Σn* (existing):\n{sigma_n_star_ex}")

mu_n_star_new = compute_mu_n_star_new(mu_global[1],np.array([1]))
print(f"μn* (new):\n{mu_n_star_new}")

sigma_n_star_new = compute_sigma_n_star_new(compute_k_x_star_star(np.array([1]),kernel_local),var[1,1])
print(f"Σn* (new):\n{sigma_n_star_new}")

[[3.51 4.47]
 [4.43 3.91]
 [1.24 3.12]
 [3.89 4.91]
 [3.99 1.65]
 [1.84 2.32]
 [2.7  2.27]
 [2.2  3.34]
 [4.6  2.24]
 [3.02 1.94]]
[[3.51 2.  ]
 [3.   2.  ]
 [1.   2.  ]]
[3.51 4.47]
μx*:
[0.76909309 0.75502312 0.80738742]
Σx**:
[[0.18824243 0.05007778 0.00460365]
 [0.05007778 0.06071811 0.00229454]
 [0.00460365 0.00229454 0.62650824]]
μn* (existing):
[0.76163948]
Σn* (existing):
[[0.16823853]]
μn* (new):
[0.81442183]
Σn* (new):
[[0.36071811]]


In [464]:
def compute_EI_at_x(mu,var,best_mu):
    z=(best_mu-mu)/np.sqrt(var)
    return np.sqrt(var)*(z*sc.stats.norm.cdf(z))+sc.stats.norm.pdf(z)


In [465]:

'''
Our Bayesian optimization strategy proceeds by maintaining a basket of B = Bold + Bnew candidate
models. Bold represents some number of models that have already been trained to some degree, while Bnew
represents some number of brand new models. In practice, we set Bold = 10 and Bnew = 3. The entire
basket is chosen using models with the maximum EI at the asymptote, which is computed using Equations
19 and 3. Each round, after a new observation has been collected, the basket is re-built using possibly
different models. This step is essentially standard Bayesian optimization using EI.
'''

# Fill the basket with configs
basket_new=np.empty((0,len(bounds.keys())))
basket_old=np.empty((0,len(bounds.keys())))
basket_new_mu_var=[]
basket_old_mu_var=[]
basket_old_c=[]

# Get the best yet observed configuration
best_observation = np.min(np.concatenate([observed_configs_dicts['_'.join([str(c) for c in config])][1] for config in observed_configs_list]))
# print(f"Best observation: {best_observation}")

# Calculate EI for many configs to find the best ones

# Sample N_EI_SAMPLES new configurations
ei_configs = []
for i in range(EI_N_SAMPLES):
    while True:
        new_config=np.empty(0)
        for key in bounds.keys():
            new_config=np.append(new_config,np.round(np.random.uniform(bounds[key][0],bounds[key][1]),2))
        if not new_config in observed_configs_list:
            break
    ei_configs.append(new_config)
if len(basket_old)<B_OLD:
    ei_configs = np.concatenate([ei_configs,observed_configs_list])
ei_configs=np.array(ei_configs)
# print(ei_configs)

# Calculate the mean and variance at the asymptote for each config using equation 19
k_x_n,k_x_star,k_x_star_star = compute_k_x(observed_configs_list,ei_configs,kernel_global)
k_x_inv = np.linalg.inv(k_x_n)
k_t=compute_k_t(observed_configs_list,observed_configs_dicts)
k_t_inv=np.linalg.inv(k_t)
means_vec = constant_mean(observed_configs_list)
o=compute_o(observed_configs_list,observed_configs_dicts)
lambd = compute_lambda(o,k_t_inv)
lambd_inv = np.linalg.inv(lambd)
c=compute_c(k_x_inv,lambd)
y_vec = compute_y_vector(observed_configs_list,observed_configs_dicts)
gamma = compute_gamma(o,k_t_inv,y_vec,means_vec)
mu_global = compute_mu(means_vec,c,gamma)
mu = compute_mu_x_star(constant_mean(ei_configs),k_x_star,k_x_inv,mu_global,means_vec)
# print(f"μx*:\n{mu}")
cov = compute_sigma_x_star_star(k_x_star_star,k_x_star,k_x_n,lambd_inv)
var=np.diag(cov)
# print(f"Σx**:\n{var}")

# Calculate the EI scores for each config using equation 3
ei_scores=compute_EI_at_x(mu,var,best_observation)
sort_indices = np.argsort(ei_scores)[::-1]
ei_configs_ranked = ei_configs[sort_indices]
# print(ei_configs_ranked)


# Greedily choose the best HP-config using Equation 19 & 3 until B_OLD existing configs and B_NEW new configs are found

for n_sample,sampled_EI_config in enumerate(ei_configs_ranked):
    # Sample another config from EI and try to add it to the basket
    # If it is already in the basket, or the basket is full, skip it
    if not np.any(np.all(np.isin(observed_configs_list,sampled_EI_config),axis=1)) and (basket_new.shape[0]==0 or B_NEW>basket_new.shape[0] and not np.any(np.all(np.isin(basket_new,sampled_EI_config),axis=1))):
        # print(f"Adding new config to basket_new: {sampled_EI_config}")
        basket_new=np.vstack([basket_new,sampled_EI_config])
        basket_new_mu_var.append([mu[n_sample],var[n_sample]])
    elif np.any(np.all(np.isin(observed_configs_list,sampled_EI_config),axis=1)) and (basket_old.shape[0]==0 or B_OLD>basket_old.shape[0] and not np.any(np.all(np.isin(basket_old,sampled_EI_config),axis=1))):
        # print(f"Adding new config to basket_old: {sampled_EI_config}")
        basket_old=np.vstack([basket_old,sampled_EI_config])
        basket_old_c.append(c[np.where((observed_configs_list==sampled_EI_config).all(axis=1)),np.where((observed_configs_list==sampled_EI_config).all(axis=1))])
        basket_old_mu_var.append([mu[n_sample],var[n_sample]])

basket_new_mu_var=np.array(basket_new_mu_var)
basket_old_mu_var=np.array(basket_old_mu_var)
basket_old_c=np.array(basket_old_c)
baskets_combined=np.vstack([basket_new,basket_old])


# print(f"New basket:\n{basket_new}")
# print(f"Old basket:\n{basket_old}")
# print(f"New basket mu var:\n{basket_new_mu_var}")
# print(f"Old basket mu var:\n{basket_old_mu_var}")

In [466]:
# Compute the entropy of the basket via Monte Carlo sampling
baskets_mu_var=np.concatenate([basket_new_mu_var,basket_old_mu_var])
h_p_min=compute_entropy(baskets_mu_var[:,0],baskets_mu_var[:,1],N_SAMPLES_MC)
print(f"Entropy of P_min: {h_p_min}")
a=np.zeros(basket_new.shape[0]+basket_old.shape[0])
pred_epochs=np.linspace(1,PRED_EPOCH,PRED_EPOCH)

Entropy of P_min: 2.298286582802216


In [467]:
# For each config in the basket, N_FANT times fantasize an observation and recompute the information gain from it, collecting it in a
for k_config,chosen_config in enumerate(basket_new):
    
    # Fantasize an observation using Equation 21
    print(f"Chosen new config: {chosen_config}")
    mu_n_star_new = compute_mu_n_star_new(basket_new_mu_var[k_config][0],pred_epochs)
    # print(f"μn* (new):\n{mu_n_star_new}")
    sigma_n_star_new = compute_sigma_n_star_new(compute_k_x_star_star(pred_epochs,kernel_local),basket_new_mu_var[k_config][1])
    # print(f"Σn* (new):\n{sigma_n_star_new}")

    for f_n in range(N_FANT):
        # Fantasize an observation using the mu and sigma of the new config
        fantasized_observation = np.random.multivariate_normal(mu_n_star_new,sigma_n_star_new)
        # print(f"Fantasized observation: {fantasized_observation}")

        # Compute the global mus and sigmas now including the fantasized observation
        observations_incl_list=np.vstack([observed_configs_list,chosen_config])
        observations_incl_dicts=observed_configs_dicts.copy()
        observations_incl_dicts['_'.join([str(c) for c in chosen_config])]=(pred_epochs,fantasized_observation)
        # print(observations_incl_dicts['_'.join([str(c) for c in chosen_config])])

        k_x_n_incl,k_x_star_incl,k_x_star_star_incl = compute_k_x(observations_incl_list,baskets_combined,kernel_global)
        k_x_inv_incl = np.linalg.inv(k_x_n_incl)
        k_t_incl=compute_k_t(observations_incl_list,observations_incl_dicts)
        k_t_inv_incl=np.linalg.inv(k_t_incl)
        means_vec_incl = constant_mean(observations_incl_list)
        o=compute_o(observations_incl_list,observations_incl_dicts)
        lambd_incl = compute_lambda(o,k_t_inv_incl)
        lambd_inv_incl = np.linalg.inv(lambd_incl)
        c_incl=compute_c(k_x_inv_incl,lambd_incl)
        y_vec_incl = compute_y_vector(observations_incl_list,observations_incl_dicts)
        gamma_incl = compute_gamma(o,k_t_inv_incl,y_vec_incl,means_vec_incl)
        mu_global_incl = compute_mu(means_vec_incl,c_incl,gamma_incl)
        mu_y = compute_mu_x_star(constant_mean(baskets_combined),k_x_star_incl,k_x_inv_incl,mu_global_incl,means_vec_incl)
        # print(f"μx*:\n{mu}")
        cov_incl = compute_sigma_x_star_star(k_x_star_star_incl,k_x_star_incl,k_x_n_incl,lambd_inv_incl)
        var_y=np.diag(cov_incl)
        # print(f"Σx**:\n{var}")
        
        # Compute the new entropy of p_min_y, H(P_min_y)
        h_p_min_y=compute_entropy(mu_y,var_y,N_SAMPLES_MC)
        # print(f"Entropy of P_min: {h_p_min_y}")
        a[k_config]+=(h_p_min_y-h_p_min)/N_FANT


Chosen new config: [2.79 2.55]
Chosen new config: [2.88 2.77]
Chosen new config: [2.71 2.6 ]


In [468]:

for k_config,chosen_config in enumerate(basket_old):
    print(f"Chosen existing config: {chosen_config}")
    pred_epochs_n=pred_epochs+observed_configs_dicts['_'.join([str(c) for c in chosen_config])][0][-1]
    curve_n=observed_configs_dicts['_'.join([str(c) for c in chosen_config])]

    k_t_n,k_t_n_star,k_t_n_star_star=compute_k_t_n(curve_n[0],pred_epochs_n,kernel_local)
    k_t_n_inv=np.linalg.inv(k_t_n)
    omega_n = compute_omega(k_t_n_star,k_t_n_inv)
    mu_n_star_ex = compute_mu_n_star_ex(k_t_n_star,k_t_n_inv,curve_n[1],omega_n,basket_old_mu_var[k_config][0])
    # print(f"μn* (existing):\n{mu_n_star_ex}")
    sigma_n_star_ex = compute_sigma_n_star_ex(k_t_n_star_star,k_t_n_star,k_t_n_inv,omega_n,basket_old_c[k_config])
    # print(f"Σn* (existing):\n{sigma_n_star_ex}")

    for f_n in range(N_FANT):
        # Fantasize an observation using Equation 20
        # TODO fantasize an observation
        fantasized_observation = np.random.multivariate_normal(mu_n_star_new,sigma_n_star_new)
        # print(f"Fantasized observation: {fantasized_observation}")

        # Compute the global mus and sigmas now including the fantasized observation
        observations_incl_list=observed_configs_list
        observations_incl_dicts=observed_configs_dicts.copy()
        old_config_entry=observations_incl_dicts['_'.join([str(c) for c in chosen_config])]
        observations_incl_dicts['_'.join([str(c) for c in chosen_config])]=(np.append(old_config_entry[0],pred_epochs_n),np.append(old_config_entry[1],fantasized_observation))
        # print(observations_incl_dicts['_'.join([str(c) for c in chosen_config])])

        k_x_n_incl,k_x_star_incl,k_x_star_star_incl = compute_k_x(observations_incl_list,baskets_combined,kernel_global)
        k_x_inv_incl = np.linalg.inv(k_x_n_incl)
        k_t_incl=compute_k_t(observations_incl_list,observations_incl_dicts)
        k_t_inv_incl=np.linalg.inv(k_t_incl)
        means_vec_incl = constant_mean(observations_incl_list)
        o=compute_o(observations_incl_list,observations_incl_dicts)
        lambd_incl = compute_lambda(o,k_t_inv_incl)
        lambd_inv_incl = np.linalg.inv(lambd_incl)
        c_incl=compute_c(k_x_inv_incl,lambd_incl)
        y_vec_incl = compute_y_vector(observations_incl_list,observations_incl_dicts)
        gamma_incl = compute_gamma(o,k_t_inv_incl,y_vec_incl,means_vec_incl)
        mu_global_incl = compute_mu(means_vec_incl,c_incl,gamma_incl)
        mu_y = compute_mu_x_star(constant_mean(baskets_combined),k_x_star_incl,k_x_inv_incl,mu_global_incl,means_vec_incl)
        # print(f"μx*:\n{mu}")
        cov_incl = compute_sigma_x_star_star(k_x_star_star_incl,k_x_star_incl,k_x_n_incl,lambd_inv_incl)
        var_y=np.diag(cov_incl)
        # print(f"Σx**:\n{var}")
        
        # Compute the new entropy of p_min_y, H(P_min_y)
        h_p_min_y=compute_entropy(mu_y,var_y,N_SAMPLES_MC)
        # print(f"Entropy of P_min: {h_p_min_y}")

        a[k_config+B_NEW]+=(h_p_min_y-h_p_min)/N_FANT


# Select the config with the highest information gain
best_config=(np.concatenate([basket_new,basket_old]))[np.argmax(a)]
print(f"Best config: {best_config} ({'new' if np.argmax(a)<B_NEW else 'old'})")


Chosen existing config: [2.7  2.27]
Chosen existing config: [3.02 1.94]
Chosen existing config: [3.51 4.47]
Chosen existing config: [2.2  3.34]
Chosen existing config: [3.89 4.91]
Chosen existing config: [3.99 1.65]
Chosen existing config: [1.84 2.32]
Chosen existing config: [1.24 3.12]
Chosen existing config: [4.43 3.91]
Chosen existing config: [4.6  2.24]
Best config: [2.7  2.27] (old)
