This repository has been archived by the owner on Feb 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 261
/
bo.py
398 lines (332 loc) · 18.9 KB
/
bo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
# Copyright (c) 2016, the GPyOpt Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import GPyOpt
import collections
import numpy as np
import time
import csv
from ..util.general import best_value, normalize
from ..util.duplicate_manager import DuplicateManager
from ..core.errors import InvalidConfigError
from ..core.task.cost import CostModel
from ..optimization.acquisition_optimizer import ContextManager
try:
from ..plotting.plots_bo import plot_acquisition, plot_convergence
except:
pass
class BO(object):
"""
Runner of Bayesian optimization loop. This class wraps the optimization loop around the different handlers.
:param model: GPyOpt model class.
:param space: GPyOpt space class.
:param objective: GPyOpt objective class.
:param acquisition: GPyOpt acquisition class.
:param evaluator: GPyOpt evaluator class.
:param X_init: 2d numpy array containing the initial inputs (one per row) of the model.
:param Y_init: 2d numpy array containing the initial outputs (one per row) of the model.
:param cost: GPyOpt cost class (default, none).
:param normalize_Y: whether to normalize the outputs before performing any optimization (default, True).
:param model_update_interval: interval of collected observations after which the model is updated (default, 1).
:param de_duplication: GPyOpt DuplicateManager class. Avoids re-evaluating the objective at previous, pending or infeasible locations (default, False).
"""
def __init__(self, model, space, objective, acquisition, evaluator, X_init, Y_init=None, cost = None, normalize_Y = True, model_update_interval = 1, de_duplication = False):
self.model = model
self.space = space
self.objective = objective
self.acquisition = acquisition
self.evaluator = evaluator
self.normalize_Y = normalize_Y
self.model_update_interval = model_update_interval
self.X = X_init
self.Y = Y_init
self.cost = CostModel(cost)
self.normalization_type = 'stats' ## not added in the API
self.de_duplication = de_duplication
self.model_parameters_iterations = None
self.context = None
self.num_acquisitions = 0
def suggest_next_locations(self, context = None, pending_X = None, ignored_X = None):
"""
Run a single optimization step and return the next locations to evaluate the objective.
Number of suggested locations equals to batch_size.
:param context: fixes specified variables to a particular context (values) for the optimization run (default, None).
:param pending_X: matrix of input configurations that are in a pending state (i.e., do not have an evaluation yet) (default, None).
:param ignored_X: matrix of input configurations that the user black-lists, i.e., those configurations will not be suggested again (default, None).
"""
self.model_parameters_iterations = None
self.num_acquisitions = 0
self.context = context
self._update_model(self.normalization_type)
suggested_locations = self._compute_next_evaluations(pending_zipped_X = pending_X, ignored_zipped_X = ignored_X)
return suggested_locations
def run_optimization(self, max_iter = 0, max_time = np.inf, eps = 1e-8, context = None, verbosity=False, save_models_parameters= True, report_file = None, evaluations_file = None, models_file=None):
"""
Runs Bayesian Optimization for a number 'max_iter' of iterations (after the initial exploration data)
:param max_iter: exploration horizon, or number of acquisitions. If nothing is provided optimizes the current acquisition.
:param max_time: maximum exploration horizon in seconds.
:param eps: minimum distance between two consecutive x's to keep running the model.
:param context: fixes specified variables to a particular context (values) for the optimization run (default, None).
:param verbosity: flag to print the optimization results after each iteration (default, False).
:param report_file: file to which the results of the optimization are saved (default, None).
:param evaluations_file: file to which the evalations are saved (default, None).
:param models_file: file to which the model parameters are saved (default, None).
"""
if self.objective is None:
raise InvalidConfigError("Cannot run the optimization loop without the objective function")
# --- Save the options to print and save the results
self.verbosity = verbosity
self.save_models_parameters = save_models_parameters
self.report_file = report_file
self.evaluations_file = evaluations_file
self.models_file = models_file
self.model_parameters_iterations = None
self.context = context
# --- Check if we can save the model parameters in each iteration
if self.save_models_parameters == True:
if not (isinstance(self.model, GPyOpt.models.GPModel) or isinstance(self.model, GPyOpt.models.GPModel_MCMC)):
print('Models printout after each iteration is only available for GP and GP_MCMC models')
self.save_models_parameters = False
# --- Setting up stop conditions
self.eps = eps
if (max_iter is None) and (max_time is None):
self.max_iter = 0
self.max_time = np.inf
elif (max_iter is None) and (max_time is not None):
self.max_iter = np.inf
self.max_time = max_time
elif (max_iter is not None) and (max_time is None):
self.max_iter = max_iter
self.max_time = np.inf
else:
self.max_iter = max_iter
self.max_time = max_time
# --- Initial function evaluation and model fitting
if self.X is not None and self.Y is None:
self.Y, cost_values = self.objective.evaluate(self.X)
if self.cost.cost_type == 'evaluation_time':
self.cost.update_cost_model(self.X, cost_values)
# --- Initialize iterations and running time
self.time_zero = time.time()
self.cum_time = 0
self.num_acquisitions = 0
self.suggested_sample = self.X
self.Y_new = self.Y
# --- Initialize time cost of the evaluations
while (self.max_time > self.cum_time):
# --- Update model
try:
self._update_model(self.normalization_type)
except np.linalg.linalg.LinAlgError:
break
if (self.num_acquisitions >= self.max_iter
or (len(self.X) > 1 and self._distance_last_evaluations() <= self.eps)):
break
self.suggested_sample = self._compute_next_evaluations()
# --- Augment X
self.X = np.vstack((self.X,self.suggested_sample))
# --- Evaluate *f* in X, augment Y and update cost function (if needed)
self.evaluate_objective()
# --- Update current evaluation time and function evaluations
self.cum_time = time.time() - self.time_zero
self.num_acquisitions += 1
if verbosity:
print("num acquisition: {}, time elapsed: {:.2f}s".format(
self.num_acquisitions, self.cum_time))
# --- Stop messages and execution time
self._compute_results()
# --- Print the desired result in files
if self.report_file is not None:
self.save_report(self.report_file)
if self.evaluations_file is not None:
self.save_evaluations(self.evaluations_file)
if self.models_file is not None:
self.save_models(self.models_file)
def _print_convergence(self):
"""
Prints the reason why the optimization stopped.
"""
if self.verbosity:
if (self.num_acquisitions == self.max_iter) and (not self.initial_iter):
print(' ** Maximum number of iterations reached **')
return 1
elif (self._distance_last_evaluations() < self.eps) and (not self.initial_iter):
print(' ** Two equal location selected **')
return 1
elif (self.max_time < self.cum_time) and not (self.initial_iter):
print(' ** Evaluation time reached **')
return 0
if self.initial_iter:
print('** GPyOpt Bayesian Optimization class initialized successfully **')
self.initial_iter = False
def evaluate_objective(self):
"""
Evaluates the objective
"""
self.Y_new, cost_new = self.objective.evaluate(self.suggested_sample)
self.cost.update_cost_model(self.suggested_sample, cost_new)
self.Y = np.vstack((self.Y,self.Y_new))
def _compute_results(self):
"""
Computes the optimum and its value.
"""
self.Y_best = best_value(self.Y)
self.x_opt = self.X[np.argmin(self.Y),:]
self.fx_opt = np.min(self.Y)
def _distance_last_evaluations(self):
"""
Computes the distance between the last two evaluations.
"""
if self.X.shape[0] < 2:
# less than 2 evaluations
return np.inf
return np.sqrt(np.sum((self.X[-1, :] - self.X[-2, :]) ** 2))
def _compute_next_evaluations(self, pending_zipped_X=None, ignored_zipped_X=None):
"""
Computes the location of the new evaluation (optimizes the acquisition in the standard case).
:param pending_zipped_X: matrix of input configurations that are in a pending state (i.e., do not have an evaluation yet).
:param ignored_zipped_X: matrix of input configurations that the user black-lists, i.e., those configurations will not be suggested again.
:return:
"""
## --- Update the context if any
self.acquisition.optimizer.context_manager = ContextManager(self.space, self.context)
### --- Activate de_duplication
if self.de_duplication:
duplicate_manager = DuplicateManager(space=self.space, zipped_X=self.X, pending_zipped_X=pending_zipped_X, ignored_zipped_X=ignored_zipped_X)
else:
duplicate_manager = None
### We zip the value in case there are categorical variables
return self.space.zip_inputs(self.evaluator.compute_batch(duplicate_manager=duplicate_manager, context_manager= self.acquisition.optimizer.context_manager))
def _update_model(self, normalization_type='stats'):
"""
Updates the model (when more than one observation is available) and saves the parameters (if available).
"""
if self.num_acquisitions % self.model_update_interval == 0:
# input that goes into the model (is unziped in case there are categorical variables)
X_inmodel = self.space.unzip_inputs(self.X)
# Y_inmodel is the output that goes into the model
if self.normalize_Y:
Y_inmodel = normalize(self.Y, normalization_type)
else:
Y_inmodel = self.Y
self.model.updateModel(X_inmodel, Y_inmodel, None, None)
# Save parameters of the model
self._save_model_parameter_values()
def _save_model_parameter_values(self):
if self.model_parameters_iterations is None:
self.model_parameters_iterations = self.model.get_model_parameters()
else:
self.model_parameters_iterations = np.vstack((self.model_parameters_iterations,self.model.get_model_parameters()))
def plot_acquisition(self, filename=None, label_x=None, label_y=None):
"""
Plots the model and the acquisition function.
if self.input_dim = 1: Plots data, mean and variance in one plot and the acquisition function in another plot
if self.input_dim = 2: as before but it separates the mean and variance of the model in two different plots
:param filename: name of the file where the plot is saved
:param label_x: Graph's x-axis label, if None it is renamed to 'x' (1D) or 'X1' (2D)
:param label_y: Graph's y-axis label, if None it is renamed to 'f(x)' (1D) or 'X2' (2D)
"""
if self.model.model is None:
from copy import deepcopy
model_to_plot = deepcopy(self.model)
if self.normalize_Y:
Y = normalize(self.Y, self.normalization_type)
else:
Y = self.Y
model_to_plot.updateModel(self.X, Y, self.X, Y)
else:
model_to_plot = self.model
return plot_acquisition(self.acquisition.space.get_bounds(),
model_to_plot.model.X.shape[1],
model_to_plot.model,
model_to_plot.model.X,
model_to_plot.model.Y,
self.acquisition.acquisition_function,
self.suggest_next_locations(),
filename,
label_x,
label_y)
def plot_convergence(self,filename=None):
"""
Makes twp plots to evaluate the convergence of the model:
plot 1: Iterations vs. distance between consecutive selected x's
plot 2: Iterations vs. the mean of the current model in the selected sample.
:param filename: name of the file where the plot is saved
"""
return plot_convergence(self.X,self.Y_best,filename)
def get_evaluations(self):
return self.X.copy(), self.Y.copy()
def save_report(self, report_file= None):
"""
Saves a report with the main results of the optimization.
:param report_file: name of the file in which the results of the optimization are saved.
"""
with open(report_file,'w') as file:
import GPyOpt
import time
file.write('-----------------------------' + ' GPyOpt Report file ' + '-----------------------------------\n')
file.write('GPyOpt Version ' + str(GPyOpt.__version__) + '\n')
file.write('Date and time: ' + time.strftime("%c")+'\n')
if self.num_acquisitions==self.max_iter:
file.write('Optimization completed: ' +'YES, ' + str(self.X.shape[0]).strip('[]') + ' samples collected.\n')
file.write('Number initial samples: ' + str(self.initial_design_numdata) +' \n')
else:
file.write('Optimization completed: ' +'NO,' + str(self.X.shape[0]).strip('[]') + ' samples collected.\n')
file.write('Number initial samples: ' + str(self.initial_design_numdata) +' \n')
file.write('Tolerance: ' + str(self.eps) + '.\n')
file.write('Optimization time: ' + str(self.cum_time).strip('[]') +' seconds.\n')
file.write('\n')
file.write('--------------------------------' + ' Problem set up ' + '------------------------------------\n')
file.write('Problem name: ' + self.objective_name +'\n')
file.write('Problem dimension: ' + str(self.space.dimensionality) +'\n')
file.write('Number continuous variables ' + str(len(self.space.get_continuous_dims()) ) +'\n')
file.write('Number discrete variables ' + str(len(self.space.get_discrete_dims())) +'\n')
file.write('Number bandits ' + str(self.space.get_bandit().shape[0]) +'\n')
file.write('Noiseless evaluations: ' + str(self.exact_feval) +'\n')
file.write('Cost used: ' + self.cost.cost_type +'\n')
file.write('Constraints: ' + str(self.constraints==True) +'\n')
file.write('\n')
file.write('------------------------------' + ' Optimization set up ' + '---------------------------------\n')
file.write('Normalized outputs: ' + str(self.normalize_Y) + '\n')
file.write('Model type: ' + str(self.model_type).strip('[]') + '\n')
file.write('Model update interval: ' + str(self.model_update_interval) + '\n')
file.write('Acquisition type: ' + str(self.acquisition_type).strip('[]') + '\n')
file.write('Acquisition optimizer: ' + str(self.acquisition_optimizer.optimizer_name).strip('[]') + '\n')
file.write('Acquisition type: ' + str(self.acquisition_type).strip('[]') + '\n')
if hasattr(self, 'acquisition_optimizer') and hasattr(self.acquisition_optimizer, 'optimizer_name'):
file.write('Acquisition optimizer: ' + str(self.acquisition_optimizer.optimizer_name).strip('[]') + '\n')
else:
file.write('Acquisition optimizer: None\n')
file.write('Evaluator type (batch size): ' + str(self.evaluator_type).strip('[]') + ' (' + str(self.batch_size) + ')' + '\n')
file.write('Cores used: ' + str(self.num_cores) + '\n')
file.write('\n')
file.write('---------------------------------' + ' Summary ' + '------------------------------------------\n')
file.write('Value at minimum: ' + str(min(self.Y)).strip('[]') +'\n')
file.write('Best found minimum location: ' + str(self.X[np.argmin(self.Y),:]).strip('[]') +'\n')
file.write('----------------------------------------------------------------------------------------------\n')
file.close()
def _write_csv(self, filename, data):
with open(filename, 'w') as csv_file:
writer = csv.writer(csv_file, delimiter='\t')
writer.writerows(data)
def save_evaluations(self, evaluations_file = None):
"""
Saves evaluations at each iteration of the optimization
:param evaluations_file: name of the file in which the results are saved.
"""
iterations = np.array(range(1, self.Y.shape[0] + 1))[:, None]
results = np.hstack((iterations, self.Y, self.X))
header = ['Iteration', 'Y'] + ['var_' + str(k) for k in range(1, self.X.shape[1] + 1)]
data = [header] + results.tolist()
self._write_csv(evaluations_file, data)
def save_models(self, models_file):
"""
Saves model parameters at each iteration of the optimization
:param models_file: name of the file or a file buffer, in which the results are saved.
"""
if self.model_parameters_iterations is None:
raise ValueError("No iterations have been carried out yet and hence no iterations of the BO can be saved")
iterations = np.array(range(1,self.model_parameters_iterations.shape[0]+1))[:,None]
results = np.hstack((iterations,self.model_parameters_iterations))
header = ['Iteration'] + self.model.get_model_parameters_names()
data = [header] + results.tolist()
self._write_csv(models_file, data)