Scoring with offsets (#491)

Quantco · Nov 29, 2021 · 007d826 · 007d826
1 parent 3241a67
commit 007d826
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 6 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -7,14 +7,20 @@
 Changelog
 =========
 
+2.0.4 - 202X-XX-XX
+------------------
+
+**Bug fix:**
+
+- The :meth:`score` method of :class:`~glum.GeneralizedLinearRegressor` and :class:`~glum.GeneralizedLinearRegressorCV` now accepts offsets.
+
 2.0.3 - 2021-11-05
 ------------------
 
 **Other:**
 
 - We are now specifying the run time dependencies in ``setup.py``, so that missing dependencies are automatically installed from PyPI when installing ``glum`` via pip.
 
-
 2.0.2 - 2021-11-03
 ------------------
 

diff --git a/src/glum/_glm.py b/src/glum/_glm.py
@@ -1512,6 +1512,7 @@ def score(
         X: ShapedArrayLike,
         y: ShapedArrayLike,
         sample_weight: Optional[ArrayLike] = None,
+        offset: Optional[ArrayLike] = None,
     ):
         """Compute :math:`D^2`, the percentage of deviance explained.
 
@@ -1536,16 +1537,18 @@ def score(
         sample_weight : array-like, shape (n_samples,), optional (default=None)
             Sample weights.
 
+        offset : array-like, shape (n_samples,), optional (default=None)
+
         Returns
         -------
-        score : float
+        float
             D^2 of self.predict(X) w.r.t. y.
         """
         # Note, default score defined in RegressorMixin is R^2 score.
         # TODO: make D^2 a score function in module metrics (and thereby get
         #       input validation and so on)
         sample_weight = _check_weights(sample_weight, y.shape[0], y.dtype)
-        mu = self.predict(X)
+        mu = self.predict(X, offset=offset)
         family = get_family(self.family)
         dev = family.deviance(y, mu, sample_weight=sample_weight)
         y_mean = np.average(y, weights=sample_weight)

diff --git a/tests/glm/test_glm.py b/tests/glm/test_glm.py
@@ -1753,8 +1753,9 @@ def test_sparse_std_errors(regression_data):
 
 
 @pytest.mark.parametrize("as_data_frame", [False, True])
+@pytest.mark.parametrize("offset", [False, True])
 @pytest.mark.parametrize("weighted", [False, True])
-def test_score_method(as_data_frame, weighted):
+def test_score_method(as_data_frame, offset, weighted):
 
     regressor = GeneralizedLinearRegressor(
         alpha=0,
@@ -1776,7 +1777,14 @@ def test_score_method(as_data_frame, weighted):
     else:
         x = np.ones((len(y), 1))
 
-    score = regressor.fit(x, y, sample_weight=wgts).score(x, y, sample_weight=wgts)
+    if offset:
+        offset = y
+    else:
+        offset = None
+
+    score = regressor.fit(x, y, offset=offset, sample_weight=wgts).score(
+        x, y, offset=offset, sample_weight=wgts
+    )
 
     # use pytest because NumPy used to always reject comparisons against zero
-    assert pytest.approx(score, 1e-8) == 0
+    assert pytest.approx(score, 1e-8) == int(offset is not None)