Skip to content

Commit

Permalink
Feature/neg binomial dist (#627)
Browse files Browse the repository at this point in the history
* Allow 'negative.binomial' as family

* adds eta_mu_deviance

* after review fixes
  • Loading branch information
dawidkopczyk committed Apr 12, 2023
1 parent 21c139b commit 894669e
Show file tree
Hide file tree
Showing 8 changed files with 378 additions and 16 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
Changelog
=========

2.5.0 - unreleased
------------------

**New feature**

- Added Negative Binomial distribution by setting the ``'family'`` parameter of
:class:`~glum.GeneralizedLinearRegressor` and :class:`~glum.GeneralizedLinearRegressorCV`
to ``'negative.binomial'``.


2.4.1 - 2023-03-14
------------------

Expand Down
2 changes: 2 additions & 0 deletions src/glum/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
GammaDistribution,
GeneralizedHyperbolicSecant,
InverseGaussianDistribution,
NegativeBinomialDistribution,
NormalDistribution,
PoissonDistribution,
TweedieDistribution,
Expand All @@ -28,6 +29,7 @@
"NormalDistribution",
"PoissonDistribution",
"TweedieDistribution",
"NegativeBinomialDistribution",
"IdentityLink",
"Link",
"LogitLink",
Expand Down
216 changes: 216 additions & 0 deletions src/glum/_distribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
gamma_log_eta_mu_deviance,
gamma_log_likelihood,
gamma_log_rowwise_gradient_hessian,
negative_binomial_deviance,
negative_binomial_log_eta_mu_deviance,
negative_binomial_log_likelihood,
negative_binomial_log_rowwise_gradient_hessian,
normal_deviance,
normal_identity_eta_mu_deviance,
normal_identity_rowwise_gradient_hessian,
Expand Down Expand Up @@ -1108,6 +1112,218 @@ def dispersion(self, y, mu, sample_weight=None, ddof=1, method="pearson") -> flo
)


class NegativeBinomialDistribution(ExponentialDispersionModel):
r"""A class for the Negative Binomial distribution.
A Negative Binomial distribution with mean :math:`\mu = \mathrm{E}(Y)` is uniquely
defined by its mean-variance relationship
:math:`\mathrm{var}(Y) \propto \mu + \theta * \mu^2`.
Parameters
----------
theta : float, optional (default=1.0)
The dispersion parameter from `unit_variance`
:math:`v(\mu) = \mu + \theta * \mu^2`. For
:math:`\theta <= 0`, no distribution exists.
References
----------
For the log-likelihood and deviance:
* M. L. Zwilling Negative Binomial Regression, The Mathematica Journal 2013.
https://www.mathematica-journal.com/2013/06/27/negative-binomial-regression/
"""

lower_bound = 0
upper_bound = np.Inf
include_lower_bound = True
include_upper_bound = False

def __init__(self, theta=1.0):
# validate power and set _upper_bound, _include_upper_bound attrs
self.theta = theta

def __eq__(self, other): # noqa D
return isinstance(other, NegativeBinomialDistribution) and (
self.theta == other.theta
)

@property
def theta(self) -> float:
"""Return the Negative Binomial theta parameter."""
return self._theta

@theta.setter
def theta(self, theta):

if not isinstance(theta, (int, float)):
raise TypeError(f"theta must be an int or float, input was {theta}")
if not theta > 0:
raise ValueError(
"theta must be strictly positive number, input was {}".format(theta)
)

# Prevents upcasting when working with 32-bit data
self._theta = np.float32(theta)

def unit_variance(self, mu: np.ndarray) -> np.ndarray:
"""Compute the unit variance of a Negative Binomial distribution
``v(mu) = mu + theta * mu^2``.
Parameters
----------
mu : array-like, shape (n_samples,)
Predicted mean.
Returns
-------
numpy.ndarray, shape (n_samples,)
"""
return mu + self.theta * mu**2

def unit_variance_derivative(self, mu: np.ndarray) -> np.ndarray:
r"""Compute the derivative of the unit variance of a Negative Binomial distribution.
Equation: :math:`v(\mu) = 1 + 2 \times \theta \times \mu`.
Parameters
----------
mu : array-like, shape (n_samples,)
Predicted mean.
Returns
-------
numpy.ndarray, shape (n_samples,)
"""
return 1 + 2 * self.theta * mu

def deviance(self, y, mu, sample_weight=None) -> float:
"""Compute the deviance.
Parameters
----------
y : array-like, shape (n_samples,)
Target values.
mu : array-like, shape (n_samples,)
Predicted mean.
sample_weight : array-like, shape (n_samples,), optional (default=1)
Sample weights.
"""
theta = self.theta
y, mu, sample_weight = _as_float_arrays(y, mu, sample_weight)
sample_weight = np.ones_like(y) if sample_weight is None else sample_weight

return negative_binomial_deviance(y, sample_weight, mu, theta=float(theta))

def unit_deviance(self, y: np.ndarray, mu: np.ndarray) -> np.ndarray:
"""Get the deviance of each observation."""
theta = self.theta

r = 1.0 / theta

return 2 * (special.xlogy(y, y / mu) - (y + r) * np.log((y + r) / (mu + r)))

def _rowwise_gradient_hessian(
self, link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
):
if isinstance(link, LogLink):
return negative_binomial_log_rowwise_gradient_hessian(
y, sample_weight, eta, mu, gradient_rows, hessian_rows, theta=self.theta
)
return super()._rowwise_gradient_hessian(
link, y, sample_weight, eta, mu, gradient_rows, hessian_rows
)

def _eta_mu_deviance(
self,
link: Link,
factor: float,
cur_eta: np.ndarray,
X_dot_d: np.ndarray,
y: np.ndarray,
sample_weight: np.ndarray,
eta_out: np.ndarray,
mu_out: np.ndarray,
):
if isinstance(link, LogLink):
return negative_binomial_log_eta_mu_deviance(
cur_eta,
X_dot_d,
y,
sample_weight,
eta_out,
mu_out,
factor,
theta=self.theta,
)
return super()._eta_mu_deviance(
link, factor, cur_eta, X_dot_d, y, sample_weight, eta_out, mu_out
)

def log_likelihood(self, y, mu, sample_weight=None, dispersion=1) -> float:
r"""Compute the log likelihood.
Parameters
----------
y : array-like, shape (n_samples,)
Target values.
mu : array-like, shape (n_samples,)
Predicted mean.
sample_weight : array-like, shape (n_samples,), optional (default=1)
Sample weights.
dispersion : float, optional (default=1.0)
Ignored.
"""
theta = self.theta
y, mu, sample_weight = _as_float_arrays(y, mu, sample_weight)
sample_weight = np.ones_like(y) if sample_weight is None else sample_weight

return negative_binomial_log_likelihood(y, sample_weight, mu, float(theta), 1.0)

def dispersion(self, y, mu, sample_weight=None, ddof=1, method="pearson") -> float:
r"""Estimate the dispersion parameter :math:`\phi`.
Parameters
----------
y : array-like, shape (n_samples,)
Target values.
mu : array-like, shape (n_samples,)
Predicted mean.
sample_weight : array-like, shape (n_samples,), optional (default=None)
Weights or exposure to which variance is inversely proportional.
ddof : int, optional (default=1)
Degrees of freedom consumed by the model for ``mu``.
method = {'pearson', 'deviance'}, optional (default='pearson')
Whether to base the estimate on the Pearson residuals or the deviance.
Returns
-------
float
"""
theta = self.theta # noqa: F841
y, mu, sample_weight = _as_float_arrays(y, mu, sample_weight)

if method == "pearson":
formula = "((y - mu) ** 2) / (mu + theta * mu ** 2)"
if sample_weight is None:
return numexpr.evaluate(formula).sum() / (len(y) - ddof)
else:
formula = f"sample_weight * {formula}"
return numexpr.evaluate(formula).sum() / (sample_weight.sum() - ddof)

return super().dispersion(
y, mu, sample_weight=sample_weight, ddof=ddof, method=method
)


def guess_intercept(
y: np.ndarray,
sample_weight: np.ndarray,
Expand Down
78 changes: 78 additions & 0 deletions src/glum/_functions.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -433,3 +433,81 @@ def binomial_logit_rowwise_gradient_hessian(
for i in prange(n, nogil=True):
gradient_rows_out[i] = weights[i] * (y[i] - mu[i])
hessian_rows_out[i] = weights[i] * mu[i] * (1 - mu[i])

def negative_binomial_log_eta_mu_deviance(
const_floating1d cur_eta,
const_floating1d X_dot_d,
const_floating1d y,
const_floating1d weights,
floating[:] eta_out,
floating[:] mu_out,
floating factor,
floating theta
):
cdef int n = cur_eta.shape[0]
cdef int i
cdef floating deviance = 0.0
cdef floating r = 1.0 / theta # helper

for i in prange(n, nogil=True):
eta_out[i] = cur_eta[i] + factor * X_dot_d[i]
mu_out[i] = exp(eta_out[i])
# True log likelihood: y * log(y / mu) - (y + r) * log((y + r) / (mu + r))
deviance += weights[i] * (-y[i] * eta_out[i] + (y[i] + r) * log(mu_out[i] + r))
return 2 * deviance

def negative_binomial_log_rowwise_gradient_hessian(
const_floating1d y,
const_floating1d weights,
const_floating1d eta,
const_floating1d mu,
floating[:] gradient_rows_out,
floating[:] hessian_rows_out,
floating theta
):
cdef int n = eta.shape[0]
cdef int i
for i in prange(n, nogil=True):
gradient_rows_out[i] = weights[i] * (y[i] - mu[i]) / (1.0 + theta * mu[i])
hessian_rows_out[i] = weights[i] * mu[i] / (1.0 + theta * mu[i])

def negative_binomial_log_likelihood(
const_floating1d y,
const_floating1d weights,
const_floating1d mu,
floating theta,
floating dispersion,
):
cdef int i # loop counter

cdef int n = y.shape[0] # loop length
cdef floating ll = 0.0 # output
cdef floating r = 1.0 / theta # helper

for i in prange(n, nogil=True):
ll += weights[i] * (
y[i] * log(theta * mu[i]) -
(y[i] + r) * log(1 + theta * mu[i]) +
lgamma(y[i] + r) -
lgamma(r) -
lgamma(y[i] + 1.0)
)

def negative_binomial_deviance(
const_floating1d y,
const_floating1d weights,
const_floating1d mu,
floating theta,
):
cdef int i # loop counter

cdef int n = y.shape[0] # loop length
cdef floating D = 0.0 # output
cdef floating r = 1.0 / theta # helper

for i in prange(n, nogil=True):
D += - weights[i] * (y[i] + r) * log((y[i] + r) / (mu[i] + r))
if y[i] > 0:
D += weights[i] * y[i] * log(y[i] / mu[i])

return 2 * D

0 comments on commit 894669e

Please sign in to comment.