PolicyEngine · MaxGhenis · Apr 17, 2026 · Apr 17, 2026
diff --git a/changelog.d/qrf-randomness-and-quantiles.fixed.md b/changelog.d/qrf-randomness-and-quantiles.fixed.md
@@ -0,0 +1 @@
+Fixed three correctness bugs in `_QRFModel.predict`: (1) the random-number generator was re-initialised from `self.seed` on every `predict()` call, so repeated calls returned identical draws and multiple-imputation variance collapsed to zero — the RNG is now created once in `__init__` and advanced across calls; (2) the quantile grid `[0.091..0.909]` combined with `.astype(int)` truncation systematically biased stochastic median predictions low and truncated the tails — the implementation now rounds (rather than floors) a beta-distribution draw onto a fine symmetric grid so the empirical mean maps to the intended quantile; (3) when users passed explicit `quantiles=[q1, q2, q3]`, each quantile request drew its own per-row random index, producing crossed quantiles — `QRFResults._predict` now routes explicit quantiles through a deterministic `exact_quantile` path that guarantees row-level monotonicity.
diff --git a/microimpute/models/qrf.py b/microimpute/models/qrf.py
@@ -147,6 +147,9 @@ def __init__(self, seed: int, logger):
         self.logger = logger
         self.qrf = None
         self.output_column = None
+        # Create the RNG once at construction so that repeated predict()
+        # calls consume state progressively and return different draws.
+        self._rng = np.random.default_rng(self.seed)
 
     def fit(self, X: pd.DataFrame, y: pd.Series, **qrf_kwargs: Any) -> None:
         """Fit the QRF model.
@@ -172,28 +175,69 @@ def predict(
         X: pd.DataFrame,
         mean_quantile: float = 0.5,
         count_samples: int = 10,
+        exact_quantile: Optional[float] = None,
     ) -> pd.Series:
         """Predict using the fitted model with beta distribution sampling.
 
         Note: Assumes X is already preprocessed with categorical encoding
         handled by the base ImputerResults class.
-        """
-        # Generate quantile grid
-        eps = 1.0 / (count_samples + 1)
-        quantile_grid = np.linspace(eps, 1.0 - eps, count_samples)
-        pred = self.qrf.predict(X, quantiles=list(quantile_grid))
 
-        # Sample from beta distribution
-        random_generator = np.random.default_rng(self.seed)
+        Args:
+            X: Predictor matrix (already preprocessed).
+            mean_quantile: Mean quantile for beta-distribution sampling. Only
+                used when ``exact_quantile`` is None.
+            count_samples: Number of samples for the legacy quantile-grid code
+                path (kept for backward compatibility but no longer used by the
+                unbiased implementation).
+            exact_quantile: If provided, query the underlying QRF at exactly
+                this quantile (no beta sampling). This guarantees monotonicity
+                across quantiles for a given row and is used when the caller
+                supplies an explicit ``quantiles`` list.
+        """
+        # Deterministic path: user asked for a specific quantile — query the
+        # QRF directly so that for any row i,
+        # prediction(q_low) <= prediction(q_mid) <= prediction(q_high).
+        if exact_quantile is not None:
+            pred = self.qrf.predict(X, quantiles=[float(exact_quantile)])
+            pred = np.asarray(pred).reshape(len(X), -1)[:, 0]
+            return pd.Series(pred, index=X.index, name=self.output_column)
+
+        # Stochastic path: draw one continuous quantile per row from a Beta
+        # distribution centred at ``mean_quantile`` (Beta(a,1) with
+        # a = mean_quantile/(1 - mean_quantile); for mean_quantile=0.5 this
+        # reduces to Uniform(0,1), so Beta(1,1) gives E[q]=0.5 and an
+        # unbiased empirical median in the limit). The old implementation
+        # converted a continuous beta draw into an integer index via
+        # ``np.clip(beta(a,1)*10).astype(int)``, which *floored* (not
+        # rounded) the index and used a grid of [0.091..0.909] — this
+        # systematically biased the median low and truncated the tails.
         a = mean_quantile / (1 - mean_quantile)
-        input_quantiles = random_generator.beta(a, 1, size=len(X)) * count_samples
-        input_quantiles = np.clip(input_quantiles.astype(int), 0, count_samples - 1)
+        # Use the instance RNG so repeated predict() calls on the same X
+        # produce independent draws (previously each call reset the RNG
+        # from ``self.seed``, collapsing variance to zero).
+        continuous_quantiles = self._rng.beta(a, 1, size=len(X))
+
+        # Bucket continuous quantiles onto a fine symmetric grid covering the
+        # full open interval (0, 1). Using round() (not floor) keeps the
+        # mapping centred on the intended quantile, so the empirical mean of
+        # mapped quantiles ≈ ``mean_quantile``. We avoid exact 0 and 1 because
+        # QRF cannot extrapolate beyond observed extremes.
+        grid_size = max(int(count_samples), 101)
+        eps = 1.0 / (grid_size + 1)
+        quantile_grid = np.linspace(eps, 1.0 - eps, grid_size)
+        # Round (not floor) onto the grid to eliminate the low-side bias.
+        grid_indices = np.clip(
+            np.rint(continuous_quantiles * (grid_size - 1)).astype(int),
+            0,
+            grid_size - 1,
+        )
 
-        # Extract predictions
-        if len(pred.shape) == 2:
-            predictions = pred[np.arange(len(pred)), input_quantiles]
+        pred = self.qrf.predict(X, quantiles=list(quantile_grid))
+        pred = np.asarray(pred)
+        if pred.ndim == 2:
+            predictions = pred[np.arange(len(X)), grid_indices]
         else:
-            predictions = pred[np.arange(len(pred)), :, input_quantiles]
+            predictions = pred[np.arange(len(X)), :, grid_indices]
 
         return pd.Series(predictions, index=X.index, name=self.output_column)
 
@@ -413,11 +457,24 @@ def _predict(
                             return_probs=False,
                         )
                     else:
-                        # Regression for numeric targets
-                        imputed_values = model.predict(
-                            X_test_augmented[encoded_predictors],
-                            mean_quantile=q,
-                        )
+                        # Regression for numeric targets.
+                        # If the caller passed an explicit ``quantiles`` list
+                        # (the user wants to inspect specific quantiles, e.g.
+                        # for prediction intervals), we query the QRF at
+                        # exactly ``q`` per row — NO beta sampling. This
+                        # guarantees row-level monotonicity across quantiles.
+                        # Otherwise, sample stochastically around ``q`` (the
+                        # beta-mean default for imputation variance).
+                        if quantiles:
+                            imputed_values = model.predict(
+                                X_test_augmented[encoded_predictors],
+                                exact_quantile=q,
+                            )
+                        else:
+                            imputed_values = model.predict(
+                                X_test_augmented[encoded_predictors],
+                                mean_quantile=q,
+                            )
 
                     imputed_df[variable] = imputed_values
 

diff --git a/tests/test_models/test_qrf.py b/tests/test_models/test_qrf.py
@@ -179,6 +179,105 @@ def test_qrf_beta_distribution_sampling():
         assert median[0.5]["y"].iloc[i] <= extreme_high[0.99]["y"].iloc[i]
 
 
+def test_qrf_repeated_predict_calls_produce_different_draws(
+    simple_data: pd.DataFrame,
+) -> None:
+    """Regression test for the seed-reset bug (#1): two sequential predict()
+    calls on the same X must draw different stochastic quantiles."""
+    train_data = simple_data[:80]
+    test_data = simple_data[80:][["x1", "x2"]]
+
+    model = QRF()
+    fitted = model.fit(
+        train_data,
+        predictors=["x1", "x2"],
+        imputed_variables=["y"],
+        n_estimators=50,
+    )
+
+    preds1 = fitted.predict(test_data)
+    preds2 = fitted.predict(test_data)
+
+    # Predictions must differ across calls (stochastic imputation), not be
+    # bit-identical as they were when the RNG was re-seeded every call.
+    assert not np.allclose(preds1["y"].values, preds2["y"].values), (
+        "Repeated predict() calls must produce different draws"
+    )
+
+
+def test_qrf_stochastic_median_is_unbiased() -> None:
+    """Regression test for the quantile-grid bias bug (#2): the mean of many
+    stochastic median predictions must approximate the true median (q=0.5)
+    query, not a systematically lower value."""
+    rng = np.random.default_rng(0)
+    n = 400
+    x = rng.normal(size=n)
+    # Symmetric noise -> true median ~ 3*x.
+    y = 3.0 * x + rng.normal(size=n)
+    data = pd.DataFrame({"x": x, "y": y})
+
+    model = QRF()
+    fitted = model.fit(
+        data,
+        predictors=["x"],
+        imputed_variables=["y"],
+        n_estimators=200,
+        min_samples_leaf=5,
+    )
+
+    x_grid = pd.DataFrame({"x": np.linspace(-1.5, 1.5, 50)})
+
+    # Deterministic median via exact-quantile path (should not be biased).
+    exact_median = fitted.predict(x_grid, quantiles=[0.5])[0.5]["y"].values
+
+    # Average stochastic predictions across many calls; each call uses a
+    # fresh draw from Beta(1,1) = Uniform(0,1), so the mean of their
+    # selected quantiles is ≈ 0.5 and the average prediction should match
+    # the exact median.
+    n_draws = 60
+    stochastic_means = np.zeros(len(x_grid))
+    for _ in range(n_draws):
+        stochastic_means += fitted.predict(x_grid)["y"].values
+    stochastic_means /= n_draws
+
+    # With n_draws averaging, the gap should be small relative to y's scale.
+    scale = np.std(y)
+    assert np.abs(stochastic_means - exact_median).mean() < 0.25 * scale, (
+        "Stochastic median prediction is biased vs deterministic median"
+    )
+
+
+def test_qrf_multi_quantile_per_row_monotonicity() -> None:
+    """Regression test for #3: when the user passes explicit
+    ``quantiles=[q_low, q_mid, q_high]``, each row must satisfy
+    q_low <= q_mid <= q_high (no per-row random quantile draw)."""
+    rng = np.random.default_rng(1)
+    n = 200
+    x = rng.normal(size=n)
+    y = 2.0 * x + rng.normal(size=n)
+    data = pd.DataFrame({"x": x, "y": y})
+
+    model = QRF()
+    fitted = model.fit(
+        data,
+        predictors=["x"],
+        imputed_variables=["y"],
+        n_estimators=100,
+        min_samples_leaf=5,
+    )
+
+    x_test = pd.DataFrame({"x": rng.normal(size=50)})
+    preds = fitted.predict(x_test, quantiles=[0.1, 0.5, 0.9])
+
+    q_low = preds[0.1]["y"].values
+    q_mid = preds[0.5]["y"].values
+    q_high = preds[0.9]["y"].values
+
+    # All rows must have non-crossing quantiles.
+    assert np.all(q_low <= q_mid + 1e-9), "q=0.1 prediction must be <= q=0.5"
+    assert np.all(q_mid <= q_high + 1e-9), "q=0.5 prediction must be <= q=0.9"
+
+
 # === Categorical Variable Tests ===
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fixed three correctness bugs in `_QRFModel.predict`: (1) the random-number generator was re-initialised from `self.seed` on every `predict()` call, so repeated calls returned identical draws and multiple-imputation variance collapsed to zero — the RNG is now created once in `__init__` and advanced across calls; (2) the quantile grid `[0.091..0.909]` combined with `.astype(int)` truncation systematically biased stochastic median predictions low and truncated the tails — the implementation now rounds (rather than floors) a beta-distribution draw onto a fine symmetric grid so the empirical mean maps to the intended quantile; (3) when users passed explicit `quantiles=[q1, q2, q3]`, each quantile request drew its own per-row random index, producing crossed quantiles — `QRFResults._predict` now routes explicit quantiles through a deterministic `exact_quantile` path that guarantees row-level monotonicity.