Move multioutput

GPflow · Oct 16, 2018 · 99e31e6 · 99e31e6
1 parent 6761391
commit 99e31e6
Show file tree

Hide file tree

Showing 20 changed files with 183 additions and 1,196 deletions.
diff --git a/gpflow/conditionals/__init__.py b/gpflow/conditionals/__init__.py
@@ -0,0 +1,10 @@
+
+from .dispatch import conditional, sample_conditional
+
+from . import conditionals
+from . import mo_conditionals
+
+from . import sample_conditionals
+from . import mo_sample_conditionals
+
+from .uncertain_conditionals import uncertain_conditional
diff --git a/gpflow/conditionals/conditionals.py b/gpflow/conditionals/conditionals.py
diff --git a/gpflow/dispatch.py b/gpflow/dispatch.py
diff --git a/gpflow/expectations/expectations.py b/gpflow/expectations/expectations.py
@@ -60,7 +60,7 @@ def quadrature_expectation(p, obj1, obj2=None, num_gauss_hermite_points=None):
     if isinstance(p, tuple):
         assert len(p) == 2
 
-        if   p[1].shape.ndims == 2:
+        if p[1].shape.ndims == 2:
             p = DiagonalGaussian(*p)
         elif p[1].shape.ndims == 3:
             p = Gaussian(*p)

diff --git a/gpflow/features/__init__.py b/gpflow/features/__init__.py
@@ -0,0 +1,4 @@
+from .features import (InducingFeature, InducingPoints, InducingPointsBase,
+                       Multiscale)
+from .mo_features import (MixedKernelSharedMof, Mof, SeparateIndependentMof,
+                          SharedIndependentMof)
diff --git a/gpflow/features/features.py b/gpflow/features/features.py
@@ -1,4 +1,4 @@
-# Copyright 2017 st--, Mark van der Wilk
+# Copyright 2017 GPflow 
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,9 +19,9 @@
 import tensorflow as tf
 
 from . import kernels, settings
-from ..base import Module, Parameter
+from ..base import Module, Parameter, positive
 from ..util import create_logger, default_float
-from ..dispatch import dispatch
+
 
 logger = create_logger()
 
@@ -37,7 +37,7 @@ def __len__(self) -> int:
         Returns the number of features, relevant for example to determine the
         size of the variational distribution.
         """
-        raise NotImplementedError()
+        pass
 
 
 class InducingPointsBase(InducingFeature):
@@ -59,16 +59,6 @@ def __len__(self):
 class InducingPoints(InducingPointsBase):
     pass
 
-@dispatch(InducingPoints, kernels.Kernel)
-def Kuu(feat, kern, *, jitter=0.0):
-    Kzz = kern(feat.Z)
-    Kzz += jitter * tf.eye(len(feat), dtype=default_float())
-    return Kzz
-
-@dispatch(InducingPoints, kernels.Kernel, object)
-def Kuf(feat, kern, Xnew):
-    return kern(feat.Z, Xnew)
-
 
 class Multiscale(InducingPointsBase):
     """
@@ -83,13 +73,11 @@ class Multiscale(InducingPointsBase):
         booktitle = {Advances in Neural Information Processing Systems 22},
         year = {2009},
       }
-
     """
-
     def __init__(self, Z, scales):
         super().__init__(Z)
-        self.scales = Parameter(scales,
-                                transform=transforms.positive)  # Multi-scale feature widths (std. dev. of Gaussian)
+        # Multi-scale feature widths (std. dev. of Gaussian)
+        self.scales = Parameter(scales, transform=positive())
         if self.Z.shape != scales.shape:
             raise ValueError("Input locations `Z` and `scales` must have the same shape.")  # pragma: no cover
 
@@ -101,45 +89,3 @@ def _cust_square_dist(A, B, sc):
         """
         return tf.reduce_sum(tf.square((tf.expand_dims(A, 1) - tf.expand_dims(B, 0)) / sc), 2)
 
-
-@dispatch(Multiscale, kernels.RBF, object)
-def Kuf(feat, kern, Xnew):
-    Xnew, _ = kern.slice(Xnew, None)
-    Zmu, Zlen = kern.slice(feat.Z, feat.scales)
-    idlengthscales = kern.lengthscales + Zlen
-    d = feat._cust_square_dist(Xnew, Zmu, idlengthscales)
-    Kuf = tf.transpose(kern.variance * tf.exp(-d / 2) *
-                        tf.reshape(tf.reduce_prod(kern.lengthscales / idlengthscales, 1),
-                                    (1, -1)))
-    return Kuf
-
-@dispatch(Multiscale, kernels.RBF)
-def Kuu(feat, kern, *, jitter=0.0):
-    Zmu, Zlen = kern.slice(feat.Z, feat.scales)
-    idlengthscales2 = tf.square(kern.lengthscales + Zlen)
-    sc = tf.sqrt(
-        tf.expand_dims(idlengthscales2, 0) + tf.expand_dims(idlengthscales2, 1) - tf.square(
-            kern.lengthscales))
-    d = feat._cust_square_dist(Zmu, Zmu, sc)
-    Kzz = kern.variance * tf.exp(-d / 2) * tf.reduce_prod(kern.lengthscales / sc, 2)
-    Kzz += jitter * tf.eye(len(feat), dtype=default_float())
-    return Kzz
-
-
-def inducingpoint_wrapper(feat, Z):
-    """
-    Models which used to take only Z can now pass `feat` and `Z` to this method. This method will
-    check for consistency and return the correct feature. This allows backwards compatibility in
-    for the methods.
-    """
-    if feat is not None and Z is not None:
-        raise ValueError("Cannot pass both an InducingFeature instance and Z values")  # pragma: no cover
-    elif feat is None and Z is None:
-        raise ValueError("You must pass either an InducingFeature instance or Z values")  # pragma: no cover
-    elif Z is not None:
-        feat = InducingPoints(Z)
-    elif isinstance(feat, np.ndarray):
-        feat = InducingPoints(feat)
-    else:
-        assert isinstance(feat, InducingFeature)  # pragma: no cover
-    return feat
diff --git a/gpflow/kernels/__init__.py b/gpflow/kernels/__init__.py
@@ -1,9 +1,11 @@
 from .base import Combination, Kernel, Product, Sum
 from .linears import Linear, Polynomial
 from .misc import ArcCosine, Coregion, Periodic
+from .mo_kernels import (Mok, SeparateIndependentMok, SeparateMixedMok,
+                         SharedIndependentMok)
 from .statics import Constant, Static, White
 from .stationaries import (RBF, Cosine, Exponential, Matern12, Matern32,
                            Matern52, RationalQuadratic, Stationary)
 
 Bias = Constant
-SquaredExponential = RBF
+SquaredExponential = RBF
diff --git a/gpflow/kernels/mo_kernels.py b/gpflow/kernels/mo_kernels.py
@@ -0,0 +1,106 @@
+# Copyright 2018 GPflow authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import abc
+import tensorflow as tf
+
+from .kernels import Kernel, Combination
+from ..base import Parameter
+
+
+class Mok(metaclass=abc.ABCMeta):
+    pass
+
+
+class SharedIndependentMok(Kernel, Mok):
+    """
+    - Shared: we use the same kernel for each latent GP
+    - Independent: Latents are uncorrelated a priori.
+
+    Note: this class is created only for testing and comparison purposes.
+    Use `gpflow.kernels` instead for more efficient code.
+    """
+    def __init__(self, kern: Kernel, output_dimensionality, name=None):
+        super().__init__(name)
+        self.kern = kern
+        self.P = output_dimensionality
+
+    def K(self, X, X2=None, full_output_cov=True):
+        K = self.kern(X, X2)  # N x N2
+        if full_output_cov:
+            Ks = tf.tile(K[..., None], [1, 1, self.P])  # N x N2 x P
+            return tf.transpose(tf.matrix_diag(Ks), [0, 2, 1, 3])  # N x P x N2 x P
+        else:
+            return tf.tile(K[None, ...], [self.P, 1, 1])  # P x N x N2
+
+    def Kdiag(self, X, full_output_cov=True):
+        K = self.kern(X)  # N
+        Ks = tf.tile(K[:, None], [1, self.P])  # N x P
+        return tf.matrix_diag(Ks) if full_output_cov else Ks  # N x P x P or N x P
+
+
+class SeparateIndependentMok(Combination, Mok):
+    """
+    - Separate: we use different kernel for each output latent
+    - Independent: Latents are uncorrelated a priori.
+    """
+    def __init__(self, kernels, name=None):
+        super().__init__(kernels, name)
+
+    def K(self, X, X2=None, full_output_cov=True):
+        if full_output_cov:
+            Kxxs = tf.stack([k(X, X2) for k in self.kernels], axis=2)  # N x N2 x P
+            return tf.transpose(tf.matrix_diag(Kxxs), [0, 2, 1, 3])  # N x P x N2 x P
+        else:
+            return tf.stack([k(X, X2) for k in self.kernels], axis=0)  # P x N x N2
+
+    def Kdiag(self, X, full_output_cov=False):
+        stacked = tf.stack([k(X) for k in self.kernels], axis=1)  # N x P
+        return tf.matrix_diag(stacked) if full_output_cov else stacked  # N x P x P  or  N x P
+
+
+class SeparateMixedMok(Combination, Mok):
+    """
+    Linear mixing of the latent GPs to form the output
+    """
+
+    def __init__(self, kernels, W, name=None):
+        super().__init__(kernels, name)
+        self.W = Parameter(W)  # P x L
+
+    def Kgg(self, X, X2):
+        return tf.stack([k(X, X2) for k in self.kernels], axis=0)  # L x N x N2
+
+    def K(self, X, X2=None, full_output_cov=True):
+        Kxx = self.Kgg(X, X2)  # L x N x N2
+        KxxW = Kxx[None, :, :, :] * self.W[:, :, None, None]  # P x L x N x N2
+        if full_output_cov:
+            # return tf.einsum('lnm,kl,ql->nkmq', Kxx, self.W, self.W)
+            WKxxW = tf.tensordot(self.W, KxxW, [[1], [1]])  # P x P x N x N2
+            return tf.transpose(WKxxW, [2, 0, 3, 1])  # N x P x N2 x P
+        else:
+            # return tf.einsum('lnm,kl,kl->knm', Kxx, self.W, self.W)
+            return tf.reduce_sum(self.W[:, :, None, None] * KxxW, [1])  # P x N x N2
+
+    def Kdiag(self, X, full_output_cov=True):
+        K = tf.stack([k(X) for k in self.kernels], axis=1)  # N x L
+        if full_output_cov:
+            # Can currently not use einsum due to unknown shape from `tf.stack()`
+            # return tf.einsum('nl,lk,lq->nkq', K, self.W, self.W)  # N x P x P
+            Wt = tf.transpose(self.W)  # L x P
+            return tf.reduce_sum(K[:, :, None, None] * Wt[None, :, :, None] * Wt[None, :, None, :], axis=1)  # N x P x P
+        else:
+            # return tf.einsum('nl,lk,lk->nkq', K, self.W, self.W)  # N x P
+            return tf.matmul(K, self.W ** 2.0, transpose_b=True)  # N x L  *  L x P  ->  N x P
diff --git a/gpflow/likelihoods/likelihoods.py b/gpflow/likelihoods/likelihoods.py
@@ -30,6 +30,7 @@ def inv_probit(x):
     return 0.5 * (1.0 + tf.erf(x / np.sqrt(2.0))) * (1 - 2 * jitter) + jitter
 
 
+
 class Likelihood(Module):
     def __init__(self):
         super().__init__()

diff --git a/gpflow/models/gplvm.py b/gpflow/models/gplvm.py
@@ -116,7 +116,6 @@ def __init__(self, X_mean, X_var, Y, kern, M, Z=None, X_prior_mean=None, X_prior
         assert self.X_prior_var.shape[0] == self.num_data
         assert self.X_prior_var.shape[1] == self.num_latent
 
-    @params_as_tensors
     def _build_likelihood(self):
         """
         Construct a tensorflow function to compute the bound on the marginal
@@ -162,7 +161,7 @@ def _build_likelihood(self):
         bound -= KL
         return bound
 
-    @params_as_tensors
+
     def _build_predict(self, Xnew, full_cov=False):
         """
         Compute the mean and variance of the latent function at some new points.

diff --git a/gpflow/models/gpmc.py b/gpflow/models/gpmc.py
@@ -68,7 +68,7 @@ def compile(self, session=None):
 
         return super(GPMC, self).compile(session=session)
 
-    @params_as_tensors
+
     def _build_likelihood(self):
         """
         Construct a tf function to compute the likelihood of a general GP
@@ -84,7 +84,7 @@ def _build_likelihood(self):
 
         return tf.reduce_sum(self.likelihood.logp(F, self.Y))
 
-    @params_as_tensors
+
     def _build_predict(self, Xnew, full_cov=False):
         """
         Xnew is a data matrix, point at which we want to predict

diff --git a/gpflow/models/sgpmc.py b/gpflow/models/sgpmc.py
@@ -74,7 +74,7 @@ def __init__(self, X, Y, kern, likelihood, feat=None,
         self.V = Parameter(np.zeros((len(self.feature), self.num_latent)))
         self.V.prior = Gaussian(0., 1.)
 
-    @params_as_tensors
+
     def _build_likelihood(self):
         """
         This function computes the optimal density for v, q*(v), up to a constant
@@ -83,7 +83,7 @@ def _build_likelihood(self):
         fmean, fvar = self._build_predict(self.X, full_cov=False)
         return tf.reduce_sum(self.likelihood.variational_expectations(fmean, fvar, self.Y))
 
-    @params_as_tensors
+
     def _build_predict(self, Xnew, full_cov=False, full_output_cov=False):
         """
         Xnew is a data matrix, point at which we want to predict

diff --git a/gpflow/models/sgpr.py b/gpflow/models/sgpr.py
@@ -50,7 +50,7 @@ class SGPRUpperMixin(object):
     """
 
     @autoflow()
-    @params_as_tensors
+
     def compute_upper_bound(self):
         num_data = tf.cast(tf.shape(self.Y)[0], default_float())
 
@@ -117,7 +117,7 @@ def __init__(self, X, Y, kern, feat=None, mean_function=None, Z=None, **kwargs):
         self.feature = features.inducingpoint_wrapper(feat, Z)
         self.num_data = X.shape[0]
 
-    @params_as_tensors
+
     def _build_likelihood(self):
         """
         Construct a tensorflow function to compute the bound on the marginal
@@ -155,7 +155,7 @@ def _build_likelihood(self):
 
         return bound
 
-    @params_as_tensors
+
     def _build_predict(self, Xnew, full_cov=False):
         """
         Compute the mean and variance of the latent function at some new points
@@ -225,7 +225,7 @@ def __init__(self, X, Y, kern, feat=None, mean_function=None, Z=None, **kwargs):
         self.num_data = X.shape[0]
         self.num_latent = Y.shape[1]
 
-    @params_as_tensors
+
     def _build_common_terms(self):
         num_inducing = len(self.feature)
         err = self.Y - self.mean_function(self.X)  # size N x R
@@ -292,7 +292,7 @@ def _build_likelihood(self):
 
         return mahalanobisTerm + logNormalizingTerm * self.num_latent
 
-    @params_as_tensors
+
     def _build_predict(self, Xnew, full_cov=False):
         """
         Compute the mean and variance of the latent function at some new points