GPflow · icouckuy · Aug 7, 2017 · Aug 3, 2017 · Aug 4, 2017 · Aug 5, 2017
diff --git a/GPflowOpt/acquisition/acquisition.py b/GPflowOpt/acquisition/acquisition.py
@@ -34,16 +34,19 @@ class Acquisition(Parameterized):
     In Bayesian Optimization this function is typically optimized over the optimization domain
     to determine the next point for evaluation.
 
-    An object of this class holds a list of GPflow models. For single objective optimization this is typically a 
-    single model. Subclasses implement a build_acquisition function which computes the acquisition function (usually 
-    from the predictive distribution) using TensorFlow. Each model is automatically optimized when an acquisition object
-    is constructed or when set_data is called.
+    An object of this class holds a list of GPflow models. Subclasses implement a build_acquisition function
+    which computes the acquisition function (usually from the predictive distribution) using TensorFlow.
+    Each model is automatically optimized when an acquisition object is constructed or when set_data is called.
 
-    Acquisition functions can be combined through addition or multiplication to construct joint criteria 
-    (for instance for constrained optimization)
+    Acquisition functions can be combined through addition or multiplication to construct joint criteria. 
+    For instance, for constrained optimization.
     """
 
     def __init__(self, models=[], optimize_restarts=5):
+        """
+        :param models: list of GPflow models representing our beliefs about the problem
+        :param optimize_restarts: number of optimization restarts to use when training the models
+        """
         super(Acquisition, self).__init__()
         self._models = ParamList([DataScaler(m) for m in np.atleast_1d(models).tolist()])
         self._default_params = list(map(lambda m: m.get_free_state(), self._models))
@@ -56,10 +59,11 @@ def _optimize_models(self):
         """
         Optimizes the hyperparameters of all models that the acquisition function is based on.
 
-        It is called after initialization and set_data(), and before optimizing the acquisition function itself.
+        It is called automatically during initialization and each time set_data() is called.
+        When using the high-level :class:`..BayesianOptimizer` class calling set_data() is taken care of.
 
         For each model the hyperparameters of the model at the time it was passed to __init__() are used as initial
-        point and optimized. If optimize_restarts was configured to values larger than one additional randomization
+        point and optimized. If optimize_restarts is set to >1, additional randomization
         steps are performed.
 
         As a special case, if optimize_restarts is set to zero, the hyperparameters of the models are not optimized.
@@ -82,14 +86,15 @@ def _optimize_models(self):
             best_idx = np.argmin([r.fun for r in runs])
             model.set_state(runs[best_idx].x)
 
-    def build_acquisition(self):
+    def build_acquisition(self, Xcand):
         raise NotImplementedError
 
     def enable_scaling(self, domain):
         """
         Enables and configures the :class:`.DataScaler` objects wrapping the GP models.
+
         :param domain: :class:`.Domain` object, the input transform of the data scalers is configured as a transform
-         from domain to the unit cube with the same dimensionality.
+            from domain to the unit cube with the same dimensionality.
         """
         n_inputs = self.data[0].shape[1]
         assert (domain.size == n_inputs)
@@ -103,11 +108,11 @@ def set_data(self, X, Y):
         Update the training data of the contained models. Automatically triggers a hyperparameter optimization
         step by calling _optimize_all() and an update of pre-computed quantities by calling setup().
 
-        Consider Q to be the the sum of the output dimensions of the contained models, Y should have a minimum of
+        Let Q be the the sum of the output dimensions of all contained models, Y should have a minimum of
         Q columns. Only the first Q columns of Y are used while returning the scalar Q
 
         :param X: input data N x D
-        :param Y: Responses N x M (M >= Q)
+        :param Y: output data N x R (R >= Q)
         :return: Q (sum of output dimensions of contained models)
         """
         num_outputs_sum = 0
@@ -120,23 +125,30 @@ def set_data(self, X, Y):
             model.Y = Ypart
 
         self._optimize_models()
+
+        # Only call setup for the high-level acquisition function
         if self.highest_parent == self:
             self.setup()
         return num_outputs_sum
 
     @property
     def models(self):
+        """
+        The GPflow models representing our beliefs of the optimization problem.
+
+        :return: list of GPflow models 
+        """
         return self._models
 
     @property
     def data(self):
         """
-        Property for accessing the training data of the models.
+        The training data of the models.
 
         Corresponds to the input data X which is the same for every model,
         and column-wise concatenation of the Y data over all models
 
-        :return: X, Y tensors (if in tf_mode) or X, Y numpy arrays.
+        :return: tuple X, Y of tensors (if in tf_mode) or numpy arrays.
         """
         if self._tf_mode:
             return self.models[0].X, tf.concat(list(map(lambda model: model.Y, self.models)), 1)
@@ -153,32 +165,39 @@ def constraint_indices(self):
     def objective_indices(self):
         """
         Method returning the indices of the model outputs which are objective functions.
-        By default all outputs are objectives
+
+        By default all outputs are objectives.
+
+        :return: indices to the objectives, size R
         """
         return np.setdiff1d(np.arange(self.data[1].shape[1]), self.constraint_indices())
 
     def feasible_data_index(self):
         """
         Returns a boolean array indicating which data points are considered feasible (according to the acquisition
         function(s) ) and which not.
-        By default all data is considered feasible
-        :return: boolean ndarray, N
+
+        By default all data is considered feasible.
+
+        :return: logical indices to the feasible data points, size N
         """
         return np.ones(self.data[0].shape[0], dtype=bool)
 
     def setup(self):
         """
-        Method triggered after calling set_data().
-
-        Override for pre-calculation of quantities used later in
-        the evaluation of the acquisition function for candidate points
+        Pre-calculation of quantities used later in the evaluation of the acquisition function for candidate points.
+
+        Automatically triggered by :meth:`~.Acquisition.set_data`.
         """
         pass
 
     @AutoFlow((float_type, [None, None]))
     def evaluate_with_gradients(self, Xcand):
         """
         AutoFlow method to compute the acquisition scores for candidates, also returns the gradients.
+
+        :return: acquisition scores, size N x 1
+            the gradients of the acquisition scores, size N x D 
         """
         acq = self.build_acquisition(Xcand)
         return acq, tf.gradients(acq, [Xcand], name="acquisition_gradient")[0]
@@ -187,6 +206,8 @@ def evaluate_with_gradients(self, Xcand):
     def evaluate(self, Xcand):
         """
         AutoFlow method to compute the acquisition scores for candidates, without returning the gradients.
+
+        :return: acquisition scores, size N x 1
         """
         return self.build_acquisition(Xcand)
 
@@ -198,7 +219,6 @@ def __add__(self, other):
         >>> a2 = GPflowOpt.acquisition.ProbabilityOfFeasibility(m2)
         >>> type(a1 + a2)
         <type 'GPflowOpt.acquisition.AcquisitionSum'>
-
         """
         if isinstance(other, AcquisitionSum):
             return AcquisitionSum([self] + other.operands.sorted_params)
@@ -212,7 +232,6 @@ def __mul__(self, other):
         >>> a2 = GPflowOpt.acquisition.ProbabilityOfFeasibility(m2)
         >>> type(a1 * a2)
         <type 'GPflowOpt.acquisition.AcquisitionProduct'>
-
         """
         if isinstance(other, AcquisitionProduct):
             return AcquisitionProduct([self] + other.operands.sorted_params)
@@ -221,12 +240,11 @@ def __mul__(self, other):
 
 class AcquisitionAggregation(Acquisition):
     """
-    Special acquisition implementation for aggregating multiple others, using a TensorFlow reduce operation.
+    Aggregates multiple acquisition functions, using a TensorFlow reduce operation.
     """
 
     def __init__(self, operands, oper):
         """
-        Constructor
         :param operands: list of acquisition objects
         :param oper: a tf.reduce operation (e.g., tf.reduce_sum) for aggregating the returned scores of each operand.
         """
@@ -310,11 +328,12 @@ def __mul__(self, other):
 
 class MCMCAcquistion(AcquisitionSum):
     """
-    Acquisition object to apply MCMC over the hyperparameters of the models. The models of the acquisition object passed
-    into an object of this class is optimized with MLE, and then sampled with HMC. These hyperparameter samples are then
-    set in copies of the acquisition.
+    Apply MCMC over the hyperparameters of an acquisition function (= over the hyperparameters of the contained models).
+
+    The models passed into an object of this class are optimized with MLE, and then further sampled with HMC.
+    These hyperparameter samples are then set in copies of the acquisition.
 
-    To compute the acquisition, the predictions of the acquisition copies are averaged.
+    For evaluating the underlying acquisition function, the predictions of the acquisition copies are averaged.
     """
     def __init__(self, acquisition, n_slices, **kwargs):
         assert isinstance(acquisition, Acquisition)

diff --git a/GPflowOpt/acquisition/ei.py b/GPflowOpt/acquisition/ei.py
@@ -45,14 +45,17 @@ class ExpectedImprovement(Acquisition):
        }
 
     This acquisition function is the expectation of the improvement over the current best observation
-    w.r.t. the predictive distribution. The definition is closely related to the Probability of Improvement,
+    w.r.t. the predictive distribution. The definition is closely related to the :class:`.ProbabilityOfImprovement`,
     but adds a multiplication with the improvement w.r.t the current best observation to the integral.
 
     .. math::
        \\alpha(\\mathbf x_{\\star}) = \\int \\max(f_{\\min} - f_{\\star}, 0) \\, p( f_{\\star}\\,|\\, \\mathbf x, \\mathbf y, \\mathbf x_{\\star} ) \\, d f_{\\star}
     """
 
     def __init__(self, model):
+        """
+        :param model: GPflow model (single output) representing our belief of the objective
+        """
         super(ExpectedImprovement, self).__init__(model)
         assert (isinstance(model, Model))
         self.fmin = DataHolder(np.zeros(1))
@@ -74,4 +77,4 @@ def build_acquisition(self, Xcand):
         normal = tf.contrib.distributions.Normal(candidate_mean, tf.sqrt(candidate_var))
         t1 = (self.fmin - candidate_mean) * normal.cdf(self.fmin)
         t2 = candidate_var * normal.prob(self.fmin)
-        return tf.add(t1, t2, name=self.__class__.__name__)
+        return tf.add(t1, t2, name=self.__class__.__name__)
diff --git a/GPflowOpt/acquisition/lcb.py b/GPflowOpt/acquisition/lcb.py
@@ -27,6 +27,10 @@ class LowerConfidenceBound(Acquisition):
     """
 
     def __init__(self, model, sigma=2.0):
+        """
+        :param model: GPflow model (single output) representing our belief of the objective 
+        :param sigma: See formula, the higher the more exploration
+        """
         super(LowerConfidenceBound, self).__init__(model)
         self.sigma = sigma
 

diff --git a/GPflowOpt/acquisition/pof.py b/GPflowOpt/acquisition/pof.py
@@ -29,33 +29,29 @@ class ProbabilityOfFeasibility(Acquisition):
     Bayesian Optimization with black-box expensive constraints.
 
     Key reference:
-
+    
     ::
-
-       @article{parr2012infill,
-            title={Infill sampling criteria for surrogate-based optimization with constraint handling},
-            author={Parr, JM and Keane, AJ and Forrester, Alexander IJ and Holden, CME},
-            journal={Engineering Optimization},
-            volume={44},
-            number={10},
-            pages={1147--1166},
-            year={2012},
-            publisher={Taylor & Francis}
-       }
-
-    The acquisition function measures the probability of the latent function being smaller than 0 for a candidate point.
+
+        @article{Schonlau:1997,
+            title={Computer experiments and global optimization},
+            author={Schonlau, Matthias},
+            year={1997},
+            publisher={University of Waterloo}
+        }
+
+    The acquisition function measures the probability of the latent function 
+    being smaller than a threshold for a candidate point.
 
     .. math::
        \\alpha(\\mathbf x_{\\star}) = \\int_{-\\infty}^{0} \\, p(f_{\\star}\\,|\\, \\mathbf x, \\mathbf y, \\mathbf x_{\\star} ) \\, d f_{\\star}
     """
 
     def __init__(self, model, threshold=0.0, minimum_pof=0.5):
         """
-
-        :param model: GPflow model (single output) for computing the PoF
-        :param threshold: threshold value. Observed values lower than this value are considered valid
-        :param minimum_pof: minimum pof score required for a point to be valid. For more information, see docstring
-        of feasible_data_index
+        :param model: GPflow model (single output) representing our belief of the constraint
+        :param threshold: Observed values lower than the threshold are considered valid
+        :param minimum_pof: minimum pof score required for a point to be valid.
+            For more information, see docstring of feasible_data_index
         """
         super(ProbabilityOfFeasibility, self).__init__(model)
         self.threshold = threshold
@@ -66,18 +62,19 @@ def constraint_indices(self):
 
     def feasible_data_index(self):
         """
-        Returns a boolean array indicating which points are feasible (True) and which are not (False)
+        Returns a boolean array indicating which points are feasible (True) and which are not (False).
+
         Answering the question *which points are feasible?* is slightly troublesome in case noise is present.
         Directly relying on the noisy data and comparing it to self.threshold does not make much sense.
 
-        Instead, we rely on the model belief. More specifically, we evaluate the PoF (score between 0 and 1).
+        Instead, we rely on the model belief using the PoF (a probability between 0 and 1).
         As the implementation of the PoF corresponds to the cdf of the (normal) predictive distribution in
         a point evaluated at the threshold, requiring a minimum pof of 0.5 implies the mean of the predictive
         distribution is below the threshold, hence it is marked as feasible. A minimum pof of 0 marks all points valid.
         Setting it to 1 results in all invalid.
-        :return: boolean ndarray, size N
+
+        :return: boolean ndarray (size N)
         """
-        # In
         pred = self.evaluate(self.data[0])
         return pred.ravel() > self.minimum_pof
 

diff --git a/GPflowOpt/acquisition/poi.py b/GPflowOpt/acquisition/poi.py
@@ -32,6 +32,9 @@ class ProbabilityOfImprovement(Acquisition):
     """
 
     def __init__(self, model):
+        """
+        :param model: GPflow model (single output) representing our belief of the objective 
+        """
         super(ProbabilityOfImprovement, self).__init__(model)
         self.fmin = DataHolder(np.zeros(1))
         self.setup()