Feature/neg binomial dist (#627)

* Allow 'negative.binomial' as family * adds eta_mu_deviance * after review fixes
Quantco · Apr 12, 2023 · 894669e · 894669e
1 parent 21c139b
commit 894669e
Show file tree

Hide file tree

Showing 8 changed files with 378 additions and 16 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -7,6 +7,16 @@
 Changelog
 =========
 
+2.5.0 - unreleased
+------------------
+
+**New feature**
+
+- Added Negative Binomial distribution by setting the ``'family'`` parameter of
+  :class:`~glum.GeneralizedLinearRegressor` and :class:`~glum.GeneralizedLinearRegressorCV`
+  to ``'negative.binomial'``.
+
+
 2.4.1 - 2023-03-14
 ------------------
 

diff --git a/src/glum/__init__.py b/src/glum/__init__.py
@@ -6,6 +6,7 @@
     GammaDistribution,
     GeneralizedHyperbolicSecant,
     InverseGaussianDistribution,
+    NegativeBinomialDistribution,
     NormalDistribution,
     PoissonDistribution,
     TweedieDistribution,
@@ -28,6 +29,7 @@
     "NormalDistribution",
     "PoissonDistribution",
     "TweedieDistribution",
+    "NegativeBinomialDistribution",
     "IdentityLink",
     "Link",
     "LogitLink",

diff --git a/src/glum/_distribution.py b/src/glum/_distribution.py
@@ -20,6 +20,10 @@
     gamma_log_eta_mu_deviance,
     gamma_log_likelihood,
     gamma_log_rowwise_gradient_hessian,
+    negative_binomial_deviance,
+    negative_binomial_log_eta_mu_deviance,
+    negative_binomial_log_likelihood,
+    negative_binomial_log_rowwise_gradient_hessian,
     normal_deviance,
     normal_identity_eta_mu_deviance,
     normal_identity_rowwise_gradient_hessian,
@@ -1108,6 +1112,218 @@ def dispersion(self, y, mu, sample_weight=None, ddof=1, method="pearson") -> flo
         )
 
 
+class NegativeBinomialDistribution(ExponentialDispersionModel):
+    r"""A class for the Negative Binomial distribution.
+
+    A Negative Binomial distribution with mean :math:`\mu = \mathrm{E}(Y)` is uniquely
+    defined by its mean-variance relationship
+    :math:`\mathrm{var}(Y) \propto \mu + \theta * \mu^2`.
+
+    Parameters
+    ----------
+    theta : float, optional (default=1.0)
+        The dispersion parameter from `unit_variance`
+        :math:`v(\mu) = \mu + \theta * \mu^2`. For
+        :math:`\theta <= 0`, no distribution exists.
+
+    References
+    ----------
+    For the log-likelihood and deviance:
+        * M. L. Zwilling Negative Binomial Regression, The Mathematica Journal 2013.
+          https://www.mathematica-journal.com/2013/06/27/negative-binomial-regression/
+    """
+
+    lower_bound = 0
+    upper_bound = np.Inf
+    include_lower_bound = True
+    include_upper_bound = False
+
+    def __init__(self, theta=1.0):
+        # validate power and set _upper_bound, _include_upper_bound attrs
+        self.theta = theta
+
+    def __eq__(self, other):  # noqa D
+        return isinstance(other, NegativeBinomialDistribution) and (
+            self.theta == other.theta
+        )
+
+    @property
+    def theta(self) -> float:
+        """Return the Negative Binomial theta parameter."""
+        return self._theta
+
+    @theta.setter
+    def theta(self, theta):
+
+        if not isinstance(theta, (int, float)):
+            raise TypeError(f"theta must be an int or float, input was {theta}")
+        if not theta > 0:
+            raise ValueError(
+                "theta must be strictly positive number, input was {}".format(theta)
+            )
+
+        # Prevents upcasting when working with 32-bit data
+        self._theta = np.float32(theta)
+
+    def unit_variance(self, mu: np.ndarray) -> np.ndarray:
+        """Compute the unit variance of a Negative Binomial distribution
+        ``v(mu) = mu + theta * mu^2``.
+
+        Parameters
+        ----------
+        mu : array-like, shape (n_samples,)
+            Predicted mean.
+
+        Returns
+        -------
+        numpy.ndarray, shape (n_samples,)
+        """
+        return mu + self.theta * mu**2
+
+    def unit_variance_derivative(self, mu: np.ndarray) -> np.ndarray:
+        r"""Compute the derivative of the unit variance of a Negative Binomial distribution.
+
+        Equation: :math:`v(\mu) = 1 + 2 \times \theta \times \mu`.
+
+        Parameters
+        ----------
+        mu : array-like, shape (n_samples,)
+            Predicted mean.
+
+        Returns
+        -------
+        numpy.ndarray, shape (n_samples,)
+        """
+        return 1 + 2 * self.theta * mu
+
+    def deviance(self, y, mu, sample_weight=None) -> float:
+        """Compute the deviance.
+
+        Parameters
+        ----------
+        y : array-like, shape (n_samples,)
+            Target values.
+
+        mu : array-like, shape (n_samples,)
+            Predicted mean.
+
+        sample_weight : array-like, shape (n_samples,), optional (default=1)
+            Sample weights.
+        """
+        theta = self.theta
+        y, mu, sample_weight = _as_float_arrays(y, mu, sample_weight)
+        sample_weight = np.ones_like(y) if sample_weight is None else sample_weight
+
+        return negative_binomial_deviance(y, sample_weight, mu, theta=float(theta))
+
+    def unit_deviance(self, y: np.ndarray, mu: np.ndarray) -> np.ndarray:
+        """Get the deviance of each observation."""
+        theta = self.theta
+
+        r = 1.0 / theta
+
+        return 2 * (special.xlogy(y, y / mu) - (y + r) * np.log((y + r) / (mu + r)))
+
+    def _rowwise_gradient_hessian(
+        self, link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
+    ):
+        if isinstance(link, LogLink):
+            return negative_binomial_log_rowwise_gradient_hessian(
+                y, sample_weight, eta, mu, gradient_rows, hessian_rows, theta=self.theta
+            )
+        return super()._rowwise_gradient_hessian(
+            link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
+        )
+
+    def _eta_mu_deviance(
+        self,
+        link: Link,
+        factor: float,
+        cur_eta: np.ndarray,
+        X_dot_d: np.ndarray,
+        y: np.ndarray,
+        sample_weight: np.ndarray,
+        eta_out: np.ndarray,
+        mu_out: np.ndarray,
+    ):
+        if isinstance(link, LogLink):
+            return negative_binomial_log_eta_mu_deviance(
+                cur_eta,
+                X_dot_d,
+                y,
+                sample_weight,
+                eta_out,
+                mu_out,
+                factor,
+                theta=self.theta,
+            )
+        return super()._eta_mu_deviance(
+            link, factor, cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out
+        )
+
+    def log_likelihood(self, y, mu, sample_weight=None, dispersion=1) -> float:
+        r"""Compute the log likelihood.
+
+        Parameters
+        ----------
+        y : array-like, shape (n_samples,)
+            Target values.
+
+        mu : array-like, shape (n_samples,)
+            Predicted mean.
+
+        sample_weight : array-like, shape (n_samples,), optional (default=1)
+            Sample weights.
+
+        dispersion : float, optional (default=1.0)
+            Ignored.
+        """
+        theta = self.theta
+        y, mu, sample_weight = _as_float_arrays(y, mu, sample_weight)
+        sample_weight = np.ones_like(y) if sample_weight is None else sample_weight
+
+        return negative_binomial_log_likelihood(y, sample_weight, mu, float(theta), 1.0)
+
+    def dispersion(self, y, mu, sample_weight=None, ddof=1, method="pearson") -> float:
+        r"""Estimate the dispersion parameter :math:`\phi`.
+
+        Parameters
+        ----------
+        y : array-like, shape (n_samples,)
+            Target values.
+
+        mu : array-like, shape (n_samples,)
+            Predicted mean.
+
+        sample_weight : array-like, shape (n_samples,), optional (default=None)
+            Weights or exposure to which variance is inversely proportional.
+
+        ddof : int, optional (default=1)
+            Degrees of freedom consumed by the model for ``mu``.
+
+        method = {'pearson', 'deviance'}, optional (default='pearson')
+            Whether to base the estimate on the Pearson residuals or the deviance.
+
+        Returns
+        -------
+        float
+        """
+        theta = self.theta  # noqa: F841
+        y, mu, sample_weight = _as_float_arrays(y, mu, sample_weight)
+
+        if method == "pearson":
+            formula = "((y - mu) ** 2) / (mu + theta * mu ** 2)"
+            if sample_weight is None:
+                return numexpr.evaluate(formula).sum() / (len(y) - ddof)
+            else:
+                formula = f"sample_weight * {formula}"
+                return numexpr.evaluate(formula).sum() / (sample_weight.sum() - ddof)
+
+        return super().dispersion(
+            y, mu, sample_weight=sample_weight, ddof=ddof, method=method
+        )
+
+
 def guess_intercept(
     y: np.ndarray,
     sample_weight: np.ndarray,

diff --git a/src/glum/_functions.pyx b/src/glum/_functions.pyx
@@ -433,3 +433,81 @@ def binomial_logit_rowwise_gradient_hessian(
     for i in prange(n, nogil=True):
         gradient_rows_out[i] = weights[i] * (y[i] - mu[i])
         hessian_rows_out[i] = weights[i] * mu[i] * (1 - mu[i])
+
+def negative_binomial_log_eta_mu_deviance(
+    const_floating1d cur_eta,
+    const_floating1d X_dot_d,
+    const_floating1d y,
+    const_floating1d weights,
+    floating[:] eta_out,
+    floating[:] mu_out,
+    floating factor,
+    floating theta
+):
+    cdef int n = cur_eta.shape[0]
+    cdef int i
+    cdef floating deviance = 0.0
+    cdef floating r = 1.0 / theta  # helper
+
+    for i in prange(n, nogil=True):
+        eta_out[i] = cur_eta[i] + factor * X_dot_d[i]
+        mu_out[i] = exp(eta_out[i])
+        # True log likelihood: y * log(y / mu) - (y + r) * log((y + r) / (mu + r))
+        deviance += weights[i] * (-y[i] * eta_out[i] + (y[i] + r) * log(mu_out[i] + r))
+    return 2 * deviance
+
+def negative_binomial_log_rowwise_gradient_hessian(
+    const_floating1d y,
+    const_floating1d weights,
+    const_floating1d eta,
+    const_floating1d mu,
+    floating[:] gradient_rows_out,
+    floating[:] hessian_rows_out,
+    floating theta
+):
+    cdef int n = eta.shape[0]
+    cdef int i
+    for i in prange(n, nogil=True):
+        gradient_rows_out[i] = weights[i] * (y[i] - mu[i]) / (1.0 + theta * mu[i])
+        hessian_rows_out[i] = weights[i] * mu[i]  / (1.0 + theta * mu[i])
+
+def negative_binomial_log_likelihood(
+    const_floating1d y,
+    const_floating1d weights,
+    const_floating1d mu,
+    floating theta,
+    floating dispersion,
+):
+    cdef int i  # loop counter
+
+    cdef int n = y.shape[0]  # loop length
+    cdef floating ll = 0.0  # output
+    cdef floating r = 1.0 / theta  # helper
+
+    for i in prange(n, nogil=True):
+        ll += weights[i] * (
+            y[i] * log(theta * mu[i]) -
+            (y[i] + r) * log(1 + theta * mu[i]) +
+            lgamma(y[i] + r) -
+            lgamma(r) -
+            lgamma(y[i] + 1.0)
+        )
+
+def negative_binomial_deviance(
+    const_floating1d y,
+    const_floating1d weights,
+    const_floating1d mu,
+    floating theta,
+):
+    cdef int i  # loop counter
+
+    cdef int n = y.shape[0]  # loop length
+    cdef floating D = 0.0  # output
+    cdef floating r = 1.0 / theta  # helper
+
+    for i in prange(n, nogil=True):
+        D += - weights[i] * (y[i] + r) * log((y[i] + r) / (mu[i] + r))
+        if y[i] > 0:
+            D += weights[i] * y[i] * log(y[i] / mu[i])
+
+    return 2 * D