QInfer · cgranade · Mar 20, 2014 · Feb 23, 2014 · Feb 23, 2014 · Feb 23, 2014
diff --git a/src/qinfer/abstract_model.py b/src/qinfer/abstract_model.py
@@ -288,6 +288,81 @@ class DifferentiableModel(Model):
     __metaclass__ = abc.ABCMeta # Needed in any class that has abstract methods.
 
     @abc.abstractmethod
-    def grad_log_likelihood(self, outcome, modelparams, expparams):
-        #TODO document
+    def score(self, outcome, modelparams, expparams):
+        r"""
+        Returns the score of this likelihood function, defined as:
+
+        .. math::
+
+            q(d, \vec{x}; \vec{e}) = \vec{\nabla}_{\vec{x}} \Pr(d | \vec{x}; \vec{e}).
+
+        Calls are represented as a four-index tensor
+        ``score[idx_modelparam, idx_outcome, idx_model, idx_experiment]``.
+        The left-most index may be suppressed for single-parameter models.
+        """
         pass
+
+    def fisher_information(self, modelparams, expparams):
+        """
+        Returns the covariance of the score taken over possible outcomes,
+        known as the Fisher information.
+
+        The result is represented as the four-index tensor
+        ``fisher[idx_modelparam_i, idx_modelparam_j, idx_model, idx_experiment]``,
+        which gives the Fisher information matrix for each model vector
+        and each experiment vector.
+
+        .. note::
+
+            The default implementation of this method calls
+            :meth:`~DifferentiableModel.score()` for each possible outcome,
+            which can be quite slow. If possible, overriding this method can
+            give significant speed advantages.
+        """
+
+        # TODO: break into two cases, one for constant outcomes, one for
+        #       variable. The latter will have to be a loop, which is much
+        #       slower.
+        #       Here, we sketch the first case.
+        # FIXME: completely untested!
+        if self.is_n_outcomes_constant:
+            outcomes = np.arange(self.n_outcomes(expparams))
+            L = self.likelihood(outcomes, modelparams, expparams)
+            scores = self.score(outcomes, modelparams, expparams)
+
+            assert len(scores.shape) in (3, 4)
+
+            if len(scores.shape) == 3:
+                scores = scores[np.newaxis, :, :, :]
+
+            # Note that E[score] = 0 by regularity assumptions, so we only
+            # need the expectation over the outer product.
+            return np.einsum("ome,iome,jome->ijme",
+                L, scores, scores
+            )
+        else:
+            # Indexing will be a major pain here, so we need to start
+            # by making an empty array, so that index errors will be raised
+            # when (not if!) we make mistakes.
+            fisher = np.empty((
+                self.n_modelparams, self.n_modelparams,
+                modelparams.shape[0], expparams.shape[0]
+            ))
+
+            # Now we loop over experiments, since we cannot vectorize the
+            # expectation value over data.
+            for idx_experiment, experiment in enumerate(expparams):
+                experiment = experiment.reshape((1,))
+                n_o = self.n_outcomes(experiment)
+
+                outcomes = np.arange(n_o)
+                L = self.likelihood(outcomes, modelparams, experiment)
+                scores = self.score(outcomes, modelparams, experiment)
+
+                fisher[:, :, :, idx_experiment] = np.einsum("ome,iome,jome->ijme",
+                    L, scores, scores
+                )
+
+            return fisher
+
+
diff --git a/src/qinfer/derived_models.py b/src/qinfer/derived_models.py
@@ -41,7 +41,7 @@
 from scipy.stats import binom
 
 from qinfer.utils import binomial_pdf
-from qinfer.abstract_model import Model
+from qinfer.abstract_model import Model, DifferentiableModel
 from qinfer._lib import enum # <- TODO: replace with flufl.enum!
 from qinfer.ale import binom_est_error
 
@@ -218,7 +218,7 @@ def is_n_outcomes_constant(self):
 
     @property
     def modelparam_names(self):
-        return self._model.modelparam_names
+        return self.decorated_model.modelparam_names
 
     ## METHODS ##
 
@@ -263,7 +263,7 @@ def simulate_experiment(self, modelparams, expparams, repeat=1):
             expparams['x'] if self._expparams_scalar else expparams)
 
         dist = binom(
-            expparams['n_meas'].astype('int64'), # ← Really, NumPy?
+            expparams['n_meas'].astype('int'), # ← Really, NumPy?
             pr1[0, :, :]
         )
         sample = (
@@ -277,6 +277,28 @@ def simulate_experiment(self, modelparams, expparams, repeat=1):
         ], axis=0)
         return os[0,0,0] if os.size == 1 else os
 
+class DifferentiableBinomialModel(BinomialModel, DifferentiableModel):
+    """
+    Extends :class:`BinomialModel` to take advantage of differentiable
+    two-outcome models.
+    """
+
+    def __init__(self, decorated_model):
+        if not isinstance(decorated_model, DifferentiableModel):
+            raise TypeError("Decorated model must also be differentiable.")
+        BinomialModel.__init__(self, decorated_model)
+
+    def score(self, outcomes, modelparams, expparams):
+        raise NotImplementedError("Not yet implemented.")
+
+    def fisher_information(self, modelparams, expparams):
+        # Since the FI simply adds, we can multiply the single-shot
+        # FI provided by the underlying model by the number of measurements
+        # that we perform.
+        two_outcome_fi = self.decorated_model.fisher_information(
+            modelparams, expparams
+        )
+        return two_outcome_fi * expparams['n_meas']
 
 ## TESTING CODE ###############################################################
 

diff --git a/src/qinfer/distributions.py b/src/qinfer/distributions.py
@@ -209,20 +209,24 @@ def grad_log_pdf(self, x):
 class MultivariateNormalDistribution(Distribution):
     def __init__(self, mean, cov):
 
-        self.mean = mean
+        # Flatten the mean first, so we have a strong guarantee about its
+        # shape.
+
+        self.mean = np.array(mean).flatten()
         self.cov = cov
         self.invcov = la.inv(cov)
 
     @property
     def n_rvs(self):
-        return self.mean.shape[1]
+        return self.mean.shape[0]
 
-    def sample(self):
+    def sample(self, n=1):
 
-        return np.dot(la.sqrtm(self.cov), np.random.randn(self.n_rvs)) + self.mean
+        return np.einsum("ij,nj->ni", la.sqrtm(self.cov), np.random.randn(n, self.n_rvs)) + self.mean
 
     def grad_log_pdf(self, x):
-        return -np.dot(self.invcov,(x - self.mean[0])) 
+
+        return -np.dot(self.invcov,(x - self.mean).transpose()).transpose()
 
 
 class SlantedNormalDistribution(Distribution):

diff --git a/src/qinfer/smc.py b/src/qinfer/smc.py
@@ -390,10 +390,10 @@ def n_rvs(self):
     def sample(self, n=1):
         # TODO: cache this.
         cumsum_weights = np.cumsum(self.particle_weights)
-        return self.particle_locations[cumsum_weights.searchsorted(
+        return self.particle_locations[np.minimum(cumsum_weights.searchsorted(
             np.random.random((n,)),
             side='right'
-        )]
+        ), len(cumsum_weights) - 1)]
 
     ## ESTIMATION METHODS #####################################################
 
@@ -946,57 +946,111 @@ class SMCUpdaterBCRB(SMCUpdater):
     functionality.
 
     Models considered by this class must be differentiable.
+
+    In addition to the arguments taken by :class:`SMCUpdater`, this class
+    takes the following keyword-only arguments:
+
+    :param bool adaptive: If `True`, the updater will track both the
+        non-adaptive and adaptive Bayes Information matrices.
+    :param initial_bim: If the regularity conditions are not met, then taking
+        the outer products of gradients over the prior will not give the correct
+        initial BIM. In such cases, ``initial_bim`` can be set to the correct
+        BIM corresponding to having done no experiments.
     """
 
 
 
     def __init__(self, *args, **kwargs):
-        SMCUpdater.__init__(self, *args, **kwargs)
+        SMCUpdater.__init__(self, *args, **{
+            key: kwargs[key] for key in kwargs
+            if key in [
+                'resampler_a', 'resampler', 'resample_thresh', 'model',
+                'prior', 'n_particles'
+            ]
+        })
 
         if not isinstance(self.model, DifferentiableModel):
             raise ValueError("Model must be differentiable.")
 
-        self.current_bim = np.sum(np.array([
-            outer_product(self.prior.grad_log_pdf(particle))
-            for particle in self.particle_locations
-            ]), axis=0) / self.n_particles
+        # TODO: fix distributions to make grad_log_pdf return the right
+        #       shape, such that the indices are
+        #       [idx_model, idx_param] → [idx_model, idx_param],
+        #       so that prior.grad_log_pdf(modelparams[i, j])[i, k]
+        #       returns the partial derivative with respect to the kth
+        #       parameter evaluated at the model parameter vector
+        #       modelparams[i, :].
+        if 'initial_bim' not in kwargs or kwargs['initial_bim'] is None:
+            gradients = self.prior.grad_log_pdf(self.particle_locations)
+            self.current_bim = np.sum(
+                gradients[:, :, np.newaxis] * gradients[:, np.newaxis, :],
+                axis=0
+            ) / self.n_particles
+        else:
+            self.current_bim = kwargs['initial_bim']
+
+        # Also track the adaptive BIM, if we've been asked to.
+        if "adaptive" in kwargs and kwargs["adaptive"]:
+            self._track_adaptive = True
+            # Both the prior- and posterior-averaged BIMs start
+            # from the prior.
+            self.adaptive_bim = self.current_bim.copy()
+        else:
+            self._track_adaptive = False
 
+    # TODO: since we are guaranteed differentiability, and since SMCUpdater is
+    #       now a Distribution subclass representing posterior sampling, write
+    #       a grad_log_pdf for the posterior distribution, perhaps?
 
-    def hypothetical_bim(self, expparams):
-        # E_{prior} E_{data | model, exp} [outer-product of grad-log-likelihood]
-        like_bim = np.zeros(self.current_bim.shape)
-
-        # If there is only 1 model parameter, things can be more easily vectorized
-        if self.model.n_modelparams == 1:
-            outcomes = np.arange(self.model.n_outcomes(expparams))
-            modelparams = self.prior.sample(self.n_particles)
-            weights = np.ones((self.n_particles,)) / self.n_particles
-            grad = self.model.grad_log_likelihood(outcomes, modelparams, expparams)**2 
-            L = self.model.likelihood(outcomes, modelparams, expparams)
-
-            like_bim = np.sum(weights[:,np.newaxis] * np.sum(grad * L,0))
-
+    def _bim(self, modelparams, expparams, modelweights=None):
+        # TODO: document
+        #       rough idea of this function is to take expectations of an
+        #       FI over some distribution, here represented by modelparams.
+
+        # NOTE: The signature of this function is a bit odd, but it allows
+        #       us to represent in one function both prior samples of uniform
+        #       weight and weighted samples from a posterior.
+        #       Because it's a bit odd, we make it a private method and expose
+        #       functionality via the prior_bayes_information and
+        #       posterior_bayes_information methods.
+
+        # About shapes: the FI we will be averaging over has four indices:
+        # FI[i, j, m, e], i and j being matrix indices, m being a model index
+        # and e being a model index.
+        # We will thus want to return an array of shape BI[i, j, e], reducing
+        # over the model index.
+        fi = self.model.fisher_information(modelparams, expparams)
+
+        # We now either reweight and sum, or sum and divide, based on whether we
+        # have model weights to consider or not.
+        if modelweights is None:
+            # Assume uniform weights.
+            bim = np.sum(fi, axis=2) / modelparams.shape[0]
         else:
-            for idx_particle in xrange(self.n_particles):
+            bim = np.einsum("m,ijme->ije", modelweights, fi)
+
+        return bim
 
-                modelparams = self.prior.sample()
-
-                weight = 1 / self.n_particles
-
-                for outcome in np.arange(self.model.n_outcomes(expparams))[...,np.newaxis]:
-
-                    grad = outer_product(self.model.grad_log_likelihood(outcome, modelparams, expparams))[0] 
-                    L = self.model.likelihood(outcome, modelparams, expparams)
-
-                    like_bim += weight * grad * L
-
-        return self.current_bim + like_bim
+    def prior_bayes_information(self, expparams, n_samples=None):
+        if n_samples is None:
+            n_samples = self.particle_locations.shape[0]
+        return self._bim(self.prior.sample(n_samples), expparams)
 
+    def posterior_bayes_information(self, expparams):
+        return self._bim(
+            self.particle_locations, expparams,
+            modelweights=self.particle_weights
+        )
 
     def update(self, outcome, expparams):
         # Before we update, we need to commit the new Bayesian information
         # matrix corresponding to the measurement we just made.
-        self.current_bim = self.hypothetical_bim(expparams)
+        self.current_bim += self.prior_bayes_information(expparams)[:, :, 0]
+
+        # If we're tracking the information content accessible to adaptive
+        # algorithms, then we must use the current posterior as the prior
+        # for the next step, then add that accordingly.
+        if self._track_adaptive:
+            self.adaptive_bim += self.posterior_bayes_information(expparams)[:, :, 0]
 
         # We now can update as normal.
         SMCUpdater.update(self, outcome, expparams)

diff --git a/src/qinfer/test_models.py b/src/qinfer/test_models.py
@@ -105,13 +105,15 @@ def likelihood(self, outcomes, modelparams, expparams):
         # Now we concatenate over outcomes.
         return Model.pr0_to_likelihood_array(outcomes, pr0)
 
-    def grad_log_likelihood(self, outcome, modelparams, expparams):
+    def score(self, outcomes, modelparams, expparams):
         #TODO: vectorize this
 
+        outcomes = outcomes[:, np.newaxis, np.newaxis]
+
         arg = modelparams * expparams / 2        
         return (
-            ( expparams / np.tan(arg)) ** (outcome) *
-            (-expparams * np.tan(arg)) ** (1-outcome)
+            ( expparams / np.tan(arg)) ** (outcomes) *
+            (-expparams * np.tan(arg)) ** (1-outcomes)
         )
 
 class NoisyCoinModel(Model):