From 202808dc69fe3714bf3541fd65f6258343abe27a Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Tue, 21 Oct 2025 15:41:46 +0200 Subject: [PATCH 01/38] first ps version --- doubleml/utils/propensity_score_processing.py | 206 ++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 doubleml/utils/propensity_score_processing.py diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py new file mode 100644 index 00000000..76214349 --- /dev/null +++ b/doubleml/utils/propensity_score_processing.py @@ -0,0 +1,206 @@ +import warnings +import numpy as np +from typing import Any, Dict, List, Optional + +from doubleml.utils._checks import _check_is_propensity, _check_trimming +from doubleml.utils._propensity_score import _trimm + + +class PropensityScoreProcessor: + """ + Processor for propensity score validation, clipping, and warnings. + + Parameters + ---------- + clipping_threshold : float, default=1e-2 + Threshold used for clipping propensity scores. + warn_extreme_values : bool, default=True + Whether to warn about extreme propensity score values. + extreme_threshold : float, default=0.05 + Threshold for extreme value warnings. + warning_proportion : float, default=0.1 + Proportion threshold for triggering extreme value warnings. + + Examples + -------- + >>> processor = PropensityScoreProcessor(clipping_threshold=0.01) + >>> clipped_scores = processor.adjust(raw_scores) + """ + + _DEFAULT_CONFIG: Dict[str, Any] = { + "clipping_threshold": 1e-2, + "warn_extreme_values": True, + "extreme_threshold": 0.05, + "warning_proportion": 0.1, + } + + def __init__(self, **config: Any) -> None: + + unknown_params = set(config.keys()) - set(self._DEFAULT_CONFIG.keys()) + if unknown_params: + raise ValueError(f"Unknown parameters: {unknown_params}") + + self._config: Dict[str, Any] = {**self._DEFAULT_CONFIG, **config} + self._validate_params() + + # ------------------------------------------------------------------------- + # Configuration methods + # ------------------------------------------------------------------------- + def _validate_params(self) -> None: + """Validate configuration parameters.""" + _check_trimming("truncate", self._config["clipping_threshold"]) + + if not isinstance(self._config["warn_extreme_values"], bool): + raise TypeError("warn_extreme_values must be boolean.") + + if not (0 < self._config["extreme_threshold"] < 0.5): + raise ValueError("extreme_threshold must be between 0 and 0.5.") + + if not (0 < self._config["warning_proportion"] < 1): + raise ValueError("warning_proportion must be between 0 and 1.") + + @property + def clipping_threshold(self) -> float: + """Get the clipping threshold.""" + return self._config["clipping_threshold"] + + @property + def warn_extreme_values(self) -> bool: + """Get the warn extreme values setting.""" + return self._config["warn_extreme_values"] + + @property + def extreme_threshold(self) -> float: + """Get the extreme threshold.""" + return self._config["extreme_threshold"] + + @property + def warning_proportion(self) -> float: + """Get the warning proportion.""" + return self._config["warning_proportion"] + + @classmethod + def get_default_config(cls) -> Dict[str, Any]: + """Return the default configuration dictionary.""" + return cls._DEFAULT_CONFIG.copy() + + def get_config(self) -> Dict[str, Any]: + """Return a copy of the current configuration dictionary.""" + return self._config.copy() + + def update_config(self, **new_config: Any) -> None: + """ + Update configuration parameters. + + Reinitializes the instance to ensure all validation and defaults + are applied consistently. + """ + updated = {**self._config, **new_config} + self.__init__(**updated) + + # ------------------------------------------------------------------------- + # Core functionality + # ------------------------------------------------------------------------- + def adjust( + self, + propensity_scores: np.ndarray, + learner_name: str = "ml_m", + smpls: Optional[List[Any]] = None, + ) -> np.ndarray: + """ + Adjust propensity scores via validation, clipping, and warnings. + + Parameters + ---------- + propensity_scores : array-like + Raw propensity score predictions. + learner_name : str, default="ml_m" + Name of the learner for error messages. + smpls : list, optional + Sample splits for validation. + + Returns + ------- + np.ndarray + Clipped and validated propensity scores. + """ + # Validation + _check_is_propensity( + propensity_scores, + learner_name, + learner_name, + smpls, + eps=1e-12, + ) + + # Warnings for extreme values + if self.warn_extreme_values: + self._warn_extreme_values(propensity_scores) + + # Clipping + clipped_scores = _trimm( + propensity_scores, + "truncate", + self.clipping_threshold, + ) + + return np.asarray(clipped_scores) + + # ------------------------------------------------------------------------- + # Private helper methods + # ------------------------------------------------------------------------- + def _warn_extreme_values(self, propensity_scores: np.ndarray) -> None: + """Emit warnings for extreme or clipped propensity scores.""" + min_prop = np.min(propensity_scores) + max_prop = np.max(propensity_scores) + + extreme_low = np.mean(propensity_scores < self.extreme_threshold) + extreme_high = np.mean(propensity_scores > (1 - self.extreme_threshold)) + + if extreme_low > self.warning_proportion: + warnings.warn( + f"Large proportion ({extreme_low:.1%}) of propensity scores " + f"below {self.extreme_threshold}. This may indicate poor overlap. " + f"Consider adjusting the model or increasing clipping_threshold " + f"(current: {self.clipping_threshold}).", + UserWarning, + ) + + if extreme_high > self.warning_proportion: + warnings.warn( + f"Large proportion ({extreme_high:.1%}) of propensity scores " + f"above {1 - self.extreme_threshold}. This may indicate poor overlap. " + f"Consider adjusting the model or increasing clipping_threshold " + f"(current: {self.clipping_threshold}).", + UserWarning, + ) + + if min_prop <= self.clipping_threshold: + warnings.warn( + f"Minimum propensity score ({min_prop:.6f}) is at or below " + f"clipping threshold ({self.clipping_threshold}). " + f"Some observations may be heavily clipped.", + UserWarning, + ) + + if max_prop >= (1 - self.clipping_threshold): + warnings.warn( + f"Maximum propensity score ({max_prop:.6f}) is at or above " + f"clipping threshold ({1 - self.clipping_threshold}). " + f"Some observations may be heavily clipped.", + UserWarning, + ) + + # ------------------------------------------------------------------------- + # Representations + # ------------------------------------------------------------------------- + def __repr__(self) -> str: + config_str = ", ".join( + [f"{k}={v}" for k, v in sorted(self._config.items())] + ) + return f"{self.__class__.__name__}({config_str})" + + def __eq__(self, other: object) -> bool: + if not isinstance(other, PropensityScoreProcessor): + return False + return self._config == other._config From 2ff5ecb64bb2446a33775000166c1ffced2c1cb8 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 22 Oct 2025 09:10:18 +0200 Subject: [PATCH 02/38] add exceptions for ps processor --- doubleml/utils/__init__.py | 2 + doubleml/utils/propensity_score_processing.py | 60 +++++++----- .../tests/test_ps_processor_exceptions.py | 94 +++++++++++++++++++ 3 files changed, 131 insertions(+), 25 deletions(-) create mode 100644 doubleml/utils/tests/test_ps_processor_exceptions.py diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py index 386586ce..ce1ba3c6 100644 --- a/doubleml/utils/__init__.py +++ b/doubleml/utils/__init__.py @@ -7,6 +7,7 @@ from .gain_statistics import gain_statistics from .global_learner import GlobalClassifier, GlobalRegressor from .policytree import DoubleMLPolicyTree +from .propensity_score_processing import PropensityScoreProcessor from .resampling import DoubleMLClusterResampling, DoubleMLResampling __all__ = [ @@ -19,4 +20,5 @@ "gain_statistics", "GlobalClassifier", "GlobalRegressor", + "PropensityScoreProcessor", ] diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index 76214349..381c443e 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -1,15 +1,15 @@ import warnings -import numpy as np from typing import Any, Dict, List, Optional -from doubleml.utils._checks import _check_is_propensity, _check_trimming -from doubleml.utils._propensity_score import _trimm +import numpy as np + +from doubleml.utils._checks import _check_is_propensity class PropensityScoreProcessor: """ Processor for propensity score validation, clipping, and warnings. - + Parameters ---------- clipping_threshold : float, default=1e-2 @@ -20,7 +20,7 @@ class PropensityScoreProcessor: Threshold for extreme value warnings. warning_proportion : float, default=0.1 Proportion threshold for triggering extreme value warnings. - + Examples -------- >>> processor = PropensityScoreProcessor(clipping_threshold=0.01) @@ -35,28 +35,38 @@ class PropensityScoreProcessor: } def __init__(self, **config: Any) -> None: - + unknown_params = set(config.keys()) - set(self._DEFAULT_CONFIG.keys()) if unknown_params: raise ValueError(f"Unknown parameters: {unknown_params}") - self._config: Dict[str, Any] = {**self._DEFAULT_CONFIG, **config} - self._validate_params() + updated_config = {**self._DEFAULT_CONFIG, **config} + self._validate_config(updated_config) + self._config = updated_config # ------------------------------------------------------------------------- # Configuration methods # ------------------------------------------------------------------------- - def _validate_params(self) -> None: + def _validate_config(self, config: Dict[str, Any]) -> None: """Validate configuration parameters.""" - _check_trimming("truncate", self._config["clipping_threshold"]) - if not isinstance(self._config["warn_extreme_values"], bool): + clipping_threshold = config["clipping_threshold"] + if not isinstance(clipping_threshold, float): + raise TypeError("clipping_threshold must be of float type. " f"Object of type {type(clipping_threshold)} passed.") + if (clipping_threshold <= 0) or (clipping_threshold >= 0.5): + raise ValueError(f"clipping_threshold must be between 0 and 0.5. " f"{clipping_threshold} was passed.") + + if not isinstance(config["warn_extreme_values"], bool): raise TypeError("warn_extreme_values must be boolean.") - if not (0 < self._config["extreme_threshold"] < 0.5): + if not (0 < config["extreme_threshold"] < 0.5): raise ValueError("extreme_threshold must be between 0 and 0.5.") - if not (0 < self._config["warning_proportion"] < 1): + if not isinstance(config["warning_proportion"], float): + raise TypeError( + "warning_proportion must be of float type. " f"Object of type {type(config['warning_proportion'])} passed." + ) + if not (0 < config["warning_proportion"] < 1): raise ValueError("warning_proportion must be between 0 and 1.") @property @@ -92,11 +102,17 @@ def update_config(self, **new_config: Any) -> None: """ Update configuration parameters. - Reinitializes the instance to ensure all validation and defaults - are applied consistently. + Validates the new configuration before applying changes to ensure + the object remains in a consistent state. """ - updated = {**self._config, **new_config} - self.__init__(**updated) + + unknown_params = set(new_config.keys()) - set(self._DEFAULT_CONFIG.keys()) + if unknown_params: + raise ValueError(f"Unknown parameters: {unknown_params}") + + updated_config = {**self._config, **new_config} + self._validate_config(updated_config) + self._config = updated_config # ------------------------------------------------------------------------- # Core functionality @@ -138,11 +154,7 @@ def adjust( self._warn_extreme_values(propensity_scores) # Clipping - clipped_scores = _trimm( - propensity_scores, - "truncate", - self.clipping_threshold, - ) + clipped_scores = np.clip(propensity_scores, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold) return np.asarray(clipped_scores) @@ -195,9 +207,7 @@ def _warn_extreme_values(self, propensity_scores: np.ndarray) -> None: # Representations # ------------------------------------------------------------------------- def __repr__(self) -> str: - config_str = ", ".join( - [f"{k}={v}" for k, v in sorted(self._config.items())] - ) + config_str = ", ".join([f"{k}={v}" for k, v in sorted(self._config.items())]) return f"{self.__class__.__name__}({config_str})" def __eq__(self, other: object) -> bool: diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py new file mode 100644 index 00000000..1f739b2b --- /dev/null +++ b/doubleml/utils/tests/test_ps_processor_exceptions.py @@ -0,0 +1,94 @@ +import pytest + +from doubleml.utils import PropensityScoreProcessor + +# ------------------------------------------------------------------------- +# Tests for __init__ method +# ------------------------------------------------------------------------- + + +@pytest.mark.ci +def test_init_unknown_parameter(): + """Test that unknown parameters raise ValueError during initialization.""" + with pytest.raises(ValueError, match="Unknown parameters: {'invalid_param'}"): + PropensityScoreProcessor(invalid_param=0.5) + + +@pytest.mark.ci +def test_init_clipping_threshold_type_error(): + """Test that non-float clipping_threshold raises TypeError.""" + with pytest.raises(TypeError, match="clipping_threshold must be of float type"): + PropensityScoreProcessor(clipping_threshold="0.01") + + +@pytest.mark.ci +def test_init_clipping_threshold_value_error(): + """Test that invalid clipping_threshold values raise ValueError.""" + with pytest.raises(ValueError, match="clipping_threshold must be between 0 and 0.5"): + PropensityScoreProcessor(clipping_threshold=0.0) # exactly 0 + + with pytest.raises(ValueError, match="clipping_threshold must be between 0 and 0.5"): + PropensityScoreProcessor(clipping_threshold=0.6) # above 0.5 + + +@pytest.mark.ci +def test_init_warn_extreme_values_type_error(): + """Test that non-bool warn_extreme_values raises TypeError.""" + with pytest.raises(TypeError, match="warn_extreme_values must be boolean"): + PropensityScoreProcessor(warn_extreme_values="True") + + +@pytest.mark.ci +def test_init_extreme_threshold_value_error(): + """Test that invalid extreme_threshold values raise ValueError.""" + with pytest.raises(ValueError, match="extreme_threshold must be between 0 and 0.5"): + PropensityScoreProcessor(extreme_threshold=0.0) # exactly 0 + + with pytest.raises(ValueError, match="extreme_threshold must be between 0 and 0.5"): + PropensityScoreProcessor(extreme_threshold=0.6) # above 0.5 + + +@pytest.mark.ci +def test_init_warning_proportion_type_error(): + """Test that invalid warning_proportion values raise TypeError.""" + with pytest.raises(TypeError, match="warning_proportion must be of float type"): + PropensityScoreProcessor(warning_proportion="0.0") + + +@pytest.mark.ci +def test_init_warning_proportion_value_error(): + """Test that invalid warning_proportion values raise ValueError.""" + with pytest.raises(ValueError, match="warning_proportion must be between 0 and 1"): + PropensityScoreProcessor(warning_proportion=0.0) # exactly 0 + + with pytest.raises(ValueError, match="warning_proportion must be between 0 and 1"): + PropensityScoreProcessor(warning_proportion=1.1) # above 1 + + +# ------------------------------------------------------------------------- +# Tests for update_config method +# ------------------------------------------------------------------------- + + +@pytest.mark.ci +def test_update_config_unknown_parameter(): + """Test that unknown parameters raise ValueError during config update.""" + processor = PropensityScoreProcessor() + + with pytest.raises(ValueError, match="Unknown parameters: {'invalid_param'}"): + processor.update_config(invalid_param=0.5) + + +@pytest.mark.ci +def test_update_config_preserves_state_on_failure(): + """Test that failed config updates don't change the processor state.""" + processor = PropensityScoreProcessor(clipping_threshold=0.1) + original_config = processor.get_config() + + # Try to update with invalid value + with pytest.raises(ValueError): + processor.update_config(clipping_threshold=0.6) + + # Verify state hasn't changed + assert processor.get_config() == original_config + assert processor.clipping_threshold == 0.1 From 97726940eaa9151edc9f27d5c4151169df72583f Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 22 Oct 2025 09:26:10 +0200 Subject: [PATCH 03/38] add representation tests --- .../test_ps_processor_representations.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 doubleml/utils/tests/test_ps_processor_representations.py diff --git a/doubleml/utils/tests/test_ps_processor_representations.py b/doubleml/utils/tests/test_ps_processor_representations.py new file mode 100644 index 00000000..e25e10a1 --- /dev/null +++ b/doubleml/utils/tests/test_ps_processor_representations.py @@ -0,0 +1,41 @@ +import pytest + +from doubleml.utils import PropensityScoreProcessor + + +@pytest.mark.ci +def test_repr_default_config(): + """Test __repr__ with default configuration.""" + processor = PropensityScoreProcessor() + expected = ( + "PropensityScoreProcessor(clipping_threshold=0.01, extreme_threshold=0.05, " + "warn_extreme_values=True, warning_proportion=0.1)" + ) + assert repr(processor) == expected + + +@pytest.mark.ci +def test_repr_custom_config(): + """Test __repr__ with custom configuration.""" + processor = PropensityScoreProcessor(clipping_threshold=0.05, warn_extreme_values=False, warning_proportion=0.2) + expected = ( + "PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=0.05, " + "warn_extreme_values=False, warning_proportion=0.2)" + ) + assert repr(processor) == expected + + +@pytest.mark.ci +def test_eq_same_config(): + """Test equality with same configuration.""" + processor1 = PropensityScoreProcessor(clipping_threshold=0.05) + processor2 = PropensityScoreProcessor(clipping_threshold=0.05) + assert processor1 == processor2 + + +@pytest.mark.ci +def test_eq_different_config(): + """Test inequality with different configuration.""" + processor1 = PropensityScoreProcessor(clipping_threshold=0.05) + processor2 = PropensityScoreProcessor(clipping_threshold=0.1) + assert processor1 != processor2 From b0f52a0b47200adba8db4796683d02c56be1b3c0 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 22 Oct 2025 09:56:24 +0200 Subject: [PATCH 04/38] clean up ps processor class to only have clipping threshold and extreme threshold --- doubleml/utils/propensity_score_processing.py | 107 ++++-------------- doubleml/utils/tests/test_ps_processor.py | 27 +++++ .../tests/test_ps_processor_exceptions.py | 24 ---- .../test_ps_processor_representations.py | 12 +- 4 files changed, 55 insertions(+), 115 deletions(-) create mode 100644 doubleml/utils/tests/test_ps_processor.py diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index 381c443e..f0eb5e19 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -1,10 +1,8 @@ import warnings -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional import numpy as np -from doubleml.utils._checks import _check_is_propensity - class PropensityScoreProcessor: """ @@ -23,15 +21,18 @@ class PropensityScoreProcessor: Examples -------- + >>> import numpy as np + >>> from doubleml.utils import PropensityScoreProcessor + >>> raw_scores = np.array([0.001, 0.2, 0.5, 0.8, 0.999]) >>> processor = PropensityScoreProcessor(clipping_threshold=0.01) >>> clipped_scores = processor.adjust(raw_scores) + >>> print(clipped_scores) + [0.01 0.2 0.5 0.8 0.99] """ _DEFAULT_CONFIG: Dict[str, Any] = { "clipping_threshold": 1e-2, - "warn_extreme_values": True, - "extreme_threshold": 0.05, - "warning_proportion": 0.1, + "extreme_threshold": 1e-12, } def __init__(self, **config: Any) -> None: @@ -56,39 +57,19 @@ def _validate_config(self, config: Dict[str, Any]) -> None: if (clipping_threshold <= 0) or (clipping_threshold >= 0.5): raise ValueError(f"clipping_threshold must be between 0 and 0.5. " f"{clipping_threshold} was passed.") - if not isinstance(config["warn_extreme_values"], bool): - raise TypeError("warn_extreme_values must be boolean.") - if not (0 < config["extreme_threshold"] < 0.5): raise ValueError("extreme_threshold must be between 0 and 0.5.") - if not isinstance(config["warning_proportion"], float): - raise TypeError( - "warning_proportion must be of float type. " f"Object of type {type(config['warning_proportion'])} passed." - ) - if not (0 < config["warning_proportion"] < 1): - raise ValueError("warning_proportion must be between 0 and 1.") - @property def clipping_threshold(self) -> float: """Get the clipping threshold.""" return self._config["clipping_threshold"] - @property - def warn_extreme_values(self) -> bool: - """Get the warn extreme values setting.""" - return self._config["warn_extreme_values"] - @property def extreme_threshold(self) -> float: """Get the extreme threshold.""" return self._config["extreme_threshold"] - @property - def warning_proportion(self) -> float: - """Get the warning proportion.""" - return self._config["warning_proportion"] - @classmethod def get_default_config(cls) -> Dict[str, Any]: """Return the default configuration dictionary.""" @@ -117,12 +98,7 @@ def update_config(self, **new_config: Any) -> None: # ------------------------------------------------------------------------- # Core functionality # ------------------------------------------------------------------------- - def adjust( - self, - propensity_scores: np.ndarray, - learner_name: str = "ml_m", - smpls: Optional[List[Any]] = None, - ) -> np.ndarray: + def adjust(self, propensity_scores: np.ndarray, learner_name: Optional[str] = None) -> np.ndarray: """ Adjust propensity scores via validation, clipping, and warnings. @@ -130,76 +106,43 @@ def adjust( ---------- propensity_scores : array-like Raw propensity score predictions. - learner_name : str, default="ml_m" - Name of the learner for error messages. - smpls : list, optional - Sample splits for validation. + learner_name : str, optional + Name of the learner providing the propensity scores, used in warnings. Returns ------- np.ndarray Clipped and validated propensity scores. """ - # Validation - _check_is_propensity( + self._validate_propensity_scores( propensity_scores, learner_name, - learner_name, - smpls, - eps=1e-12, ) - - # Warnings for extreme values - if self.warn_extreme_values: - self._warn_extreme_values(propensity_scores) - - # Clipping clipped_scores = np.clip(propensity_scores, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold) - return np.asarray(clipped_scores) + return clipped_scores # ------------------------------------------------------------------------- # Private helper methods # ------------------------------------------------------------------------- - def _warn_extreme_values(self, propensity_scores: np.ndarray) -> None: - """Emit warnings for extreme or clipped propensity scores.""" - min_prop = np.min(propensity_scores) - max_prop = np.max(propensity_scores) - extreme_low = np.mean(propensity_scores < self.extreme_threshold) - extreme_high = np.mean(propensity_scores > (1 - self.extreme_threshold)) + def _validate_propensity_scores( + self, + preds: np.ndarray, + learner_name: Optional[str] = None, + ) -> None: + """Validate if propensity predictions are valid.""" + learner_msg = f" from learner {learner_name}" if learner_name is not None else "" - if extreme_low > self.warning_proportion: - warnings.warn( - f"Large proportion ({extreme_low:.1%}) of propensity scores " - f"below {self.extreme_threshold}. This may indicate poor overlap. " - f"Consider adjusting the model or increasing clipping_threshold " - f"(current: {self.clipping_threshold}).", - UserWarning, - ) + if not isinstance(preds, np.ndarray): + raise TypeError(f"Propensity predictions {learner_msg} must be of type np.ndarray. " f"Type {type(preds)} found.") - if extreme_high > self.warning_proportion: - warnings.warn( - f"Large proportion ({extreme_high:.1%}) of propensity scores " - f"above {1 - self.extreme_threshold}. This may indicate poor overlap. " - f"Consider adjusting the model or increasing clipping_threshold " - f"(current: {self.clipping_threshold}).", - UserWarning, - ) - - if min_prop <= self.clipping_threshold: - warnings.warn( - f"Minimum propensity score ({min_prop:.6f}) is at or below " - f"clipping threshold ({self.clipping_threshold}). " - f"Some observations may be heavily clipped.", - UserWarning, - ) + if preds.ndim != 1: + raise ValueError(f"Propensity predictions {learner_msg} must be 1-dimensional. " f"Shape {preds.shape} found.") - if max_prop >= (1 - self.clipping_threshold): + if any((preds < self.extreme_threshold) | (preds > 1 - self.extreme_threshold)): warnings.warn( - f"Maximum propensity score ({max_prop:.6f}) is at or above " - f"clipping threshold ({1 - self.clipping_threshold}). " - f"Some observations may be heavily clipped.", + f"Propensity predictions {learner_msg} " f"are close to zero or one (eps={self.extreme_threshold}).", UserWarning, ) diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py new file mode 100644 index 00000000..5dc1b190 --- /dev/null +++ b/doubleml/utils/tests/test_ps_processor.py @@ -0,0 +1,27 @@ +import numpy as np +import pytest + +from doubleml.utils.propensity_score_processing import PropensityScoreProcessor + + +@pytest.mark.ci +def test_adjust_basic_clipping(): + """Test basic clipping functionality.""" + processor = PropensityScoreProcessor(clipping_threshold=0.1) + + scores = np.array([0.05, 0.2, 0.8, 0.95]) + adjusted = processor.adjust(scores) + + expected = np.array([0.1, 0.2, 0.8, 0.9]) + np.testing.assert_array_equal(adjusted, expected) + + +@pytest.mark.ci +def test_adjust_no_clipping_needed(): + """Test when no clipping is needed.""" + processor = PropensityScoreProcessor(clipping_threshold=0.01) + + scores = np.array([0.2, 0.3, 0.7, 0.8]) + adjusted = processor.adjust(scores) + + np.testing.assert_array_equal(adjusted, scores) diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py index 1f739b2b..64859092 100644 --- a/doubleml/utils/tests/test_ps_processor_exceptions.py +++ b/doubleml/utils/tests/test_ps_processor_exceptions.py @@ -31,13 +31,6 @@ def test_init_clipping_threshold_value_error(): PropensityScoreProcessor(clipping_threshold=0.6) # above 0.5 -@pytest.mark.ci -def test_init_warn_extreme_values_type_error(): - """Test that non-bool warn_extreme_values raises TypeError.""" - with pytest.raises(TypeError, match="warn_extreme_values must be boolean"): - PropensityScoreProcessor(warn_extreme_values="True") - - @pytest.mark.ci def test_init_extreme_threshold_value_error(): """Test that invalid extreme_threshold values raise ValueError.""" @@ -48,23 +41,6 @@ def test_init_extreme_threshold_value_error(): PropensityScoreProcessor(extreme_threshold=0.6) # above 0.5 -@pytest.mark.ci -def test_init_warning_proportion_type_error(): - """Test that invalid warning_proportion values raise TypeError.""" - with pytest.raises(TypeError, match="warning_proportion must be of float type"): - PropensityScoreProcessor(warning_proportion="0.0") - - -@pytest.mark.ci -def test_init_warning_proportion_value_error(): - """Test that invalid warning_proportion values raise ValueError.""" - with pytest.raises(ValueError, match="warning_proportion must be between 0 and 1"): - PropensityScoreProcessor(warning_proportion=0.0) # exactly 0 - - with pytest.raises(ValueError, match="warning_proportion must be between 0 and 1"): - PropensityScoreProcessor(warning_proportion=1.1) # above 1 - - # ------------------------------------------------------------------------- # Tests for update_config method # ------------------------------------------------------------------------- diff --git a/doubleml/utils/tests/test_ps_processor_representations.py b/doubleml/utils/tests/test_ps_processor_representations.py index e25e10a1..215258ca 100644 --- a/doubleml/utils/tests/test_ps_processor_representations.py +++ b/doubleml/utils/tests/test_ps_processor_representations.py @@ -7,21 +7,15 @@ def test_repr_default_config(): """Test __repr__ with default configuration.""" processor = PropensityScoreProcessor() - expected = ( - "PropensityScoreProcessor(clipping_threshold=0.01, extreme_threshold=0.05, " - "warn_extreme_values=True, warning_proportion=0.1)" - ) + expected = "PropensityScoreProcessor(clipping_threshold=0.01, extreme_threshold=1e-12)" assert repr(processor) == expected @pytest.mark.ci def test_repr_custom_config(): """Test __repr__ with custom configuration.""" - processor = PropensityScoreProcessor(clipping_threshold=0.05, warn_extreme_values=False, warning_proportion=0.2) - expected = ( - "PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=0.05, " - "warn_extreme_values=False, warning_proportion=0.2)" - ) + processor = PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=1e-6) + expected = "PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=1e-06)" assert repr(processor) == expected From 6b746d4cf6fc325b12d588926eeb894dfd14f7cd Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 22 Oct 2025 12:43:06 +0200 Subject: [PATCH 05/38] add treatment to adjust method calls --- doubleml/utils/propensity_score_processing.py | 28 +++++-- doubleml/utils/tests/test_ps_processor.py | 6 +- .../tests/test_ps_processor_exceptions.py | 77 +++++++++++++++++++ .../test_ps_processor_representations.py | 7 ++ 4 files changed, 111 insertions(+), 7 deletions(-) diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index f0eb5e19..5c1d0e05 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -2,6 +2,7 @@ from typing import Any, Dict, Optional import numpy as np +from sklearn.utils.multiclass import type_of_target class PropensityScoreProcessor: @@ -23,10 +24,11 @@ class PropensityScoreProcessor: -------- >>> import numpy as np >>> from doubleml.utils import PropensityScoreProcessor - >>> raw_scores = np.array([0.001, 0.2, 0.5, 0.8, 0.999]) + >>> ps_scores = np.array([0.001, 0.2, 0.5, 0.8, 0.999]) + >>> treatment = np.array([0, 1, 1, 0, 1]) >>> processor = PropensityScoreProcessor(clipping_threshold=0.01) - >>> clipped_scores = processor.adjust(raw_scores) - >>> print(clipped_scores) + >>> adj_scores = processor.adjust(ps_scores, treatment) + >>> print(adj_scores) [0.01 0.2 0.5 0.8 0.99] """ @@ -98,14 +100,16 @@ def update_config(self, **new_config: Any) -> None: # ------------------------------------------------------------------------- # Core functionality # ------------------------------------------------------------------------- - def adjust(self, propensity_scores: np.ndarray, learner_name: Optional[str] = None) -> np.ndarray: + def adjust(self, propensity_scores: np.ndarray, treatment: np.ndarray, learner_name: Optional[str] = None) -> np.ndarray: """ Adjust propensity scores via validation, clipping, and warnings. Parameters ---------- - propensity_scores : array-like + propensity_scores : np.ndarray Raw propensity score predictions. + treatment : np.ndarray + Treatment assignments (1 for treated, 0 for control). learner_name : str, optional Name of the learner providing the propensity scores, used in warnings. @@ -118,6 +122,7 @@ def adjust(self, propensity_scores: np.ndarray, learner_name: Optional[str] = No propensity_scores, learner_name, ) + self._validate_treatment(treatment) clipped_scores = np.clip(propensity_scores, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold) return clipped_scores @@ -146,6 +151,19 @@ def _validate_propensity_scores( UserWarning, ) + def _validate_treatment(self, treatment: np.ndarray) -> None: + """Validate treatment vector.""" + if not isinstance(treatment, np.ndarray): + raise TypeError(f"Treatment assignments must be of type np.ndarray. " f"Type {type(treatment)} found.") + + if treatment.ndim != 1: + raise ValueError(f"Treatment assignments must be 1-dimensional. " f"Shape {treatment.shape} found.") + + binary_treat = type_of_target(treatment) == "binary" + zero_one_treat = np.all((np.power(treatment, 2) - treatment) == 0) + if not (binary_treat and zero_one_treat): + raise ValueError("Treatment vector must be binary (0 and 1).") + # ------------------------------------------------------------------------- # Representations # ------------------------------------------------------------------------- diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py index 5dc1b190..d16e266a 100644 --- a/doubleml/utils/tests/test_ps_processor.py +++ b/doubleml/utils/tests/test_ps_processor.py @@ -10,7 +10,8 @@ def test_adjust_basic_clipping(): processor = PropensityScoreProcessor(clipping_threshold=0.1) scores = np.array([0.05, 0.2, 0.8, 0.95]) - adjusted = processor.adjust(scores) + treatment = np.array([0, 1, 1, 0]) + adjusted = processor.adjust(scores, treatment) expected = np.array([0.1, 0.2, 0.8, 0.9]) np.testing.assert_array_equal(adjusted, expected) @@ -22,6 +23,7 @@ def test_adjust_no_clipping_needed(): processor = PropensityScoreProcessor(clipping_threshold=0.01) scores = np.array([0.2, 0.3, 0.7, 0.8]) - adjusted = processor.adjust(scores) + treatment = np.array([0, 1, 1, 0]) + adjusted = processor.adjust(scores, treatment) np.testing.assert_array_equal(adjusted, scores) diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py index 64859092..479d1cb7 100644 --- a/doubleml/utils/tests/test_ps_processor_exceptions.py +++ b/doubleml/utils/tests/test_ps_processor_exceptions.py @@ -1,3 +1,4 @@ +import numpy as np import pytest from doubleml.utils import PropensityScoreProcessor @@ -68,3 +69,79 @@ def test_update_config_preserves_state_on_failure(): # Verify state hasn't changed assert processor.get_config() == original_config assert processor.clipping_threshold == 0.1 + + +@pytest.mark.ci +def test_update_config_successful_update(): + """Test successful configuration updates.""" + processor = PropensityScoreProcessor(clipping_threshold=0.1) + + processor.update_config(clipping_threshold=0.05) + assert processor.clipping_threshold == 0.05 + + +@pytest.mark.ci +def test_update_config_defaults(): + """Test updating configuration back to defaults.""" + processor = PropensityScoreProcessor(clipping_threshold=0.1) + + processor.update_config(clipping_threshold=0.01) + assert processor.clipping_threshold == 0.01 + + # Update back to default + default_config = PropensityScoreProcessor.get_default_config() + processor.update_config(**default_config) + assert processor.clipping_threshold == default_config["clipping_threshold"] + + +# ------------------------------------------------------------------------- +# Tests for propensity score validation +# ------------------------------------------------------------------------- + + +@pytest.mark.ci +def test_validate_propensity_scores_type_error_with_learner(): + """Test TypeError includes learner name.""" + processor = PropensityScoreProcessor() + with pytest.raises(TypeError, match="from learner test_learner"): + processor.adjust([0.1, 0.2], np.array([0, 1]), learner_name="test_learner") + + +@pytest.mark.ci +def test_validate_propensity_scores_dimension_error(): + """Test that non-1D propensity scores raise ValueError.""" + processor = PropensityScoreProcessor() + with pytest.raises(ValueError, match="must be 1-dimensional"): + processor.adjust(np.array([[0.1, 0.2]]), np.array([0, 1])) + + +@pytest.mark.ci +def test_validate_propensity_scores_extreme_warning(): + """Test extreme values trigger warnings.""" + processor = PropensityScoreProcessor(extreme_threshold=0.05) + with pytest.warns(UserWarning, match="close to zero or one"): + processor.adjust(np.array([0.01, 0.99]), np.array([0, 1])) + + +@pytest.mark.ci +def test_validate_treatment_type_error(): + """Test that non-numpy array treatment raises TypeError.""" + processor = PropensityScoreProcessor() + with pytest.raises(TypeError, match="Treatment assignments must be of type np.ndarray"): + processor.adjust(np.array([0.2, 0.8]), [0, 1]) + + +@pytest.mark.ci +def test_validate_treatment_dimension_error(): + """Test that non-1D treatment raises ValueError.""" + processor = PropensityScoreProcessor() + with pytest.raises(ValueError, match="must be 1-dimensional"): + processor.adjust(np.array([0.2, 0.8]), np.array([[0, 1]])) + + +@pytest.mark.ci +def test_validate_treatment_binary_error(): + """Test that non-binary treatment values raise ValueError.""" + processor = PropensityScoreProcessor() + with pytest.raises(ValueError, match="must be binary"): + processor.adjust(np.array([0.2, 0.8]), np.array([0, 2])) diff --git a/doubleml/utils/tests/test_ps_processor_representations.py b/doubleml/utils/tests/test_ps_processor_representations.py index 215258ca..e58d67bd 100644 --- a/doubleml/utils/tests/test_ps_processor_representations.py +++ b/doubleml/utils/tests/test_ps_processor_representations.py @@ -33,3 +33,10 @@ def test_eq_different_config(): processor1 = PropensityScoreProcessor(clipping_threshold=0.05) processor2 = PropensityScoreProcessor(clipping_threshold=0.1) assert processor1 != processor2 + + +@pytest.mark.ci +def test_eq_different_type(): + """Test inequality with different object type.""" + processor = PropensityScoreProcessor() + assert processor != "NotAPropensityScoreProcessor" From 478a04ed0f9bdddd6bde60b6bd7eab9518efc190 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 22 Oct 2025 13:49:25 +0200 Subject: [PATCH 06/38] include calibration via isotonic regression --- doubleml/utils/propensity_score_processing.py | 75 ++++++++++++++++++- .../tests/test_ps_processor_exceptions.py | 41 +++++++++- .../test_ps_processor_representations.py | 10 ++- 3 files changed, 120 insertions(+), 6 deletions(-) diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index 5c1d0e05..35ef3043 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -2,12 +2,14 @@ from typing import Any, Dict, Optional import numpy as np +from sklearn.isotonic import IsotonicRegression +from sklearn.model_selection import cross_val_predict from sklearn.utils.multiclass import type_of_target class PropensityScoreProcessor: """ - Processor for propensity score validation, clipping, and warnings. + Processor for propensity score calibration, clipping, and validation. Parameters ---------- @@ -35,8 +37,12 @@ class PropensityScoreProcessor: _DEFAULT_CONFIG: Dict[str, Any] = { "clipping_threshold": 1e-2, "extreme_threshold": 1e-12, + "calibration_method": None, + "cv_calibration": False, } + _VALID_CALIBRATION_METHODS = {None, "isotonic"} + def __init__(self, **config: Any) -> None: unknown_params = set(config.keys()) - set(self._DEFAULT_CONFIG.keys()) @@ -62,6 +68,17 @@ def _validate_config(self, config: Dict[str, Any]) -> None: if not (0 < config["extreme_threshold"] < 0.5): raise ValueError("extreme_threshold must be between 0 and 0.5.") + calibration_method = config["calibration_method"] + if calibration_method not in self._VALID_CALIBRATION_METHODS: + raise ValueError( + f"calibration_method must be one of {self._VALID_CALIBRATION_METHODS}. " f"Got {calibration_method}." + ) + + if not isinstance(config["cv_calibration"], bool): + raise TypeError("cv_calibration must be of bool type.") + if config["cv_calibration"] and config["calibration_method"] is None: + raise ValueError("cv_calibration can only be used with a calibration_method.") + @property def clipping_threshold(self) -> float: """Get the clipping threshold.""" @@ -72,6 +89,16 @@ def extreme_threshold(self) -> float: """Get the extreme threshold.""" return self._config["extreme_threshold"] + @property + def calibration_method(self) -> Optional[str]: + """Get the calibration method.""" + return self._config["calibration_method"] + + @property + def cv_calibration(self) -> bool: + """Get whether cross-validation calibration is used.""" + return self._config["cv_calibration"] + @classmethod def get_default_config(cls) -> Dict[str, Any]: """Return the default configuration dictionary.""" @@ -100,7 +127,13 @@ def update_config(self, **new_config: Any) -> None: # ------------------------------------------------------------------------- # Core functionality # ------------------------------------------------------------------------- - def adjust(self, propensity_scores: np.ndarray, treatment: np.ndarray, learner_name: Optional[str] = None) -> np.ndarray: + def adjust( + self, + propensity_scores: np.ndarray, + treatment: np.ndarray, + cv: Optional[int | list] = None, + learner_name: Optional[str] = None, + ) -> np.ndarray: """ Adjust propensity scores via validation, clipping, and warnings. @@ -110,6 +143,8 @@ def adjust(self, propensity_scores: np.ndarray, treatment: np.ndarray, learner_n Raw propensity score predictions. treatment : np.ndarray Treatment assignments (1 for treated, 0 for control). + cv : int or list, optional + Cross-validation strategy for calibration. Used only if calibration is applied. learner_name : str, optional Name of the learner providing the propensity scores, used in warnings. @@ -123,13 +158,47 @@ def adjust(self, propensity_scores: np.ndarray, treatment: np.ndarray, learner_n learner_name, ) self._validate_treatment(treatment) - clipped_scores = np.clip(propensity_scores, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold) + + if self.cv_calibration: + cv = cv + else: + cv = None + calibrated_ps = self._apply_calibration(propensity_scores, treatment, cv=cv) + clipped_scores = np.clip(calibrated_ps, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold) return clipped_scores # ------------------------------------------------------------------------- # Private helper methods # ------------------------------------------------------------------------- + def _apply_calibration( + self, + propensity_scores: np.ndarray, + treatment: np.ndarray, + cv: Optional[int | list] = None, + ) -> np.ndarray: + """Apply calibration method to propensity scores if specified.""" + if self.calibration_method is None: + calibrated_ps = propensity_scores + elif self.calibration_method == "isotonic": + calibration_model = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0) + + if cv is None: + calibration_model.fit(propensity_scores.reshape(-1, 1), treatment) + calibrated_ps = calibration_model.predict(propensity_scores.reshape(-1, 1)) + else: + calibrated_ps = cross_val_predict( + estimator=calibration_model, X=propensity_scores.reshape(-1, 1), y=treatment, cv=cv, method="predict" + ) + + else: + # This point should never be reached due to prior validation + raise ValueError( + f"Unsupported calibration method: {self.calibration_method}. " + f"Valid methods are: {self._VALID_CALIBRATION_METHODS}" + ) + + return calibrated_ps def _validate_propensity_scores( self, diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py index 479d1cb7..f07cd787 100644 --- a/doubleml/utils/tests/test_ps_processor_exceptions.py +++ b/doubleml/utils/tests/test_ps_processor_exceptions.py @@ -42,6 +42,27 @@ def test_init_extreme_threshold_value_error(): PropensityScoreProcessor(extreme_threshold=0.6) # above 0.5 +@pytest.mark.ci +def test_init_calibration_method_value_error(): + """Test that invalid calibration_method raises ValueError.""" + with pytest.raises(ValueError, match="calibration_method must be one of"): + PropensityScoreProcessor(calibration_method="invalid_method") + + +@pytest.mark.ci +def test_init_cv_calibration_type_error(): + """Test that non-bool cv_calibration raises TypeError.""" + with pytest.raises(TypeError, match="cv_calibration must be of bool type."): + PropensityScoreProcessor(cv_calibration="True") + + +@pytest.mark.ci +def test_init_cv_calibration_value_error(): + """Test that cv_calibration True with None calibration_method raises ValueError.""" + with pytest.raises(ValueError, match="cv_calibration can only be used with a calibration_method."): + PropensityScoreProcessor(calibration_method=None, cv_calibration=True) + + # ------------------------------------------------------------------------- # Tests for update_config method # ------------------------------------------------------------------------- @@ -95,7 +116,7 @@ def test_update_config_defaults(): # ------------------------------------------------------------------------- -# Tests for propensity score validation +# Tests for propensity score & treatment validation # ------------------------------------------------------------------------- @@ -145,3 +166,21 @@ def test_validate_treatment_binary_error(): processor = PropensityScoreProcessor() with pytest.raises(ValueError, match="must be binary"): processor.adjust(np.array([0.2, 0.8]), np.array([0, 2])) + + +# ------------------------------------------------------------------------- +# Other exception tests +# ------------------------------------------------------------------------- + + +@pytest.mark.ci +def test_apply_calibration_unsupported_method_error(): + """Test that unsupported calibration method raises ValueError.""" + processor = PropensityScoreProcessor() + processor._config["calibration_method"] = "unsupported_method" + + propensity_scores = np.array([0.2, 0.8]) + treatment = np.array([0, 1]) + + with pytest.raises(ValueError, match="Unsupported calibration method: unsupported_method"): + processor._apply_calibration(propensity_scores, treatment) diff --git a/doubleml/utils/tests/test_ps_processor_representations.py b/doubleml/utils/tests/test_ps_processor_representations.py index e58d67bd..365f9ea4 100644 --- a/doubleml/utils/tests/test_ps_processor_representations.py +++ b/doubleml/utils/tests/test_ps_processor_representations.py @@ -7,7 +7,10 @@ def test_repr_default_config(): """Test __repr__ with default configuration.""" processor = PropensityScoreProcessor() - expected = "PropensityScoreProcessor(clipping_threshold=0.01, extreme_threshold=1e-12)" + expected = ( + "PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.01, " + "cv_calibration=False, extreme_threshold=1e-12)" + ) assert repr(processor) == expected @@ -15,7 +18,10 @@ def test_repr_default_config(): def test_repr_custom_config(): """Test __repr__ with custom configuration.""" processor = PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=1e-6) - expected = "PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=1e-06)" + expected = ( + "PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.05, " + "cv_calibration=False, extreme_threshold=1e-06)" + ) assert repr(processor) == expected From 61a529b1e0d3cedccd12c44be4c273398d157311 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 22 Oct 2025 14:17:57 +0200 Subject: [PATCH 07/38] add cv to ps calibration --- doubleml/utils/tests/test_ps_processor.py | 73 +++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py index d16e266a..7b2c0d81 100644 --- a/doubleml/utils/tests/test_ps_processor.py +++ b/doubleml/utils/tests/test_ps_processor.py @@ -1,5 +1,9 @@ +from unittest.mock import patch + import numpy as np import pytest +from sklearn.isotonic import IsotonicRegression +from sklearn.model_selection import KFold, cross_val_predict from doubleml.utils.propensity_score_processing import PropensityScoreProcessor @@ -27,3 +31,72 @@ def test_adjust_no_clipping_needed(): adjusted = processor.adjust(scores, treatment) np.testing.assert_array_equal(adjusted, scores) + + +@pytest.mark.ci +def test_isotonic_calibration_without_cv(): + """Test isotonic calibration without cross-validation.""" + ps = np.random.uniform(0, 1, size=100) + treatment = np.random.binomial(1, 0.5, size=100) + + clipping_threshold = 0.01 + processor = PropensityScoreProcessor( + calibration_method="isotonic", + cv_calibration=False, + clipping_threshold=clipping_threshold, + ) + + isotonic_manual = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0) + isotonic_manual.fit(ps.reshape(-1, 1), treatment) + expected_ps_manual = isotonic_manual.predict(ps.reshape(-1, 1)) + expected_ps_manual = np.clip(expected_ps_manual, clipping_threshold, 1 - clipping_threshold) + + adjusted_ps = processor.adjust(ps, treatment) + np.testing.assert_array_equal(adjusted_ps, expected_ps_manual) + + +@pytest.fixture(scope="module", params=[3, "iterable", "splitter"]) +def cv(request): + return request.param + + +@pytest.mark.ci +def test_isotonic_calibration_with_cv(cv): + """Test isotonic calibration with cross-validation.""" + n_obs = 100 + ps = np.random.uniform(0, 1, size=n_obs) + treatment = np.random.binomial(1, 0.5, size=n_obs) + if cv == "iterable": + cv = [(train, test) for train, test in KFold(n_splits=3).split(ps)] + elif cv == "splitter": + cv = KFold(n_splits=3) + else: + cv = cv + + clipping_threshold = 0.01 + processor = PropensityScoreProcessor( + calibration_method="isotonic", cv_calibration=True, clipping_threshold=clipping_threshold + ) + + isotonic_manual = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0) + ps_cv = cross_val_predict(isotonic_manual, ps.reshape(-1, 1), treatment, cv=cv) + expected_ps_manual = np.clip(ps_cv, clipping_threshold, 1 - clipping_threshold) + + adjusted_ps = processor.adjust(ps, treatment, cv=cv) + np.testing.assert_array_equal(adjusted_ps, expected_ps_manual) + + +@pytest.mark.ci +def test_no_calibration(): + """Test that no calibration is applied when calibration_method is None.""" + processor = PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.01) + + scores = np.array([0.2, 0.3, 0.7, 0.8]) + treatment = np.array([0, 1, 1, 0]) + + # Should not call any calibration methods + with patch("sklearn.isotonic.IsotonicRegression") as mock_isotonic: + adjusted = processor.adjust(scores, treatment) + mock_isotonic.assert_not_called() + + np.testing.assert_array_equal(adjusted, scores) From cd3605bc15b3baed5c1f8f8da90f76c7260a5efb Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 22 Oct 2025 15:05:43 +0200 Subject: [PATCH 08/38] remove print from rdd example in docstring as test fails --- doubleml/rdd/rdd.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py index 0d97ed0a..fffa9a0a 100644 --- a/doubleml/rdd/rdd.py +++ b/doubleml/rdd/rdd.py @@ -91,16 +91,6 @@ class RDFlex: >>> ml_g = RandomForestRegressor() >>> ml_m = RandomForestClassifier() >>> rdflex_obj = dml.rdd.RDFlex(obj_dml_data, ml_g, ml_m, fuzzy=True) - >>> print(rdflex_obj.fit()) - Method Coef. S.E. t-stat P>|t| 95% CI - ------------------------------------------------------------------------- - Conventional 0.950 0.225 4.230 2.333e-05 [0.510, 1.391] - Robust - - 3.653 2.589e-04 [0.431, 1.429] - Design Type: Fuzzy - Cutoff: 0 - First Stage Kernel: triangular - Final Bandwidth: [0.74746872] - """ def __init__( From dce29b4fc91e96b188666fab8a7fdd52b6c681cc Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 22 Oct 2025 17:56:56 +0200 Subject: [PATCH 09/38] change to direct arguments for propensity score processer --- doubleml/utils/propensity_score_processing.py | 162 ++++++------------ doubleml/utils/tests/test_ps_processor.py | 10 +- .../tests/test_ps_processor_exceptions.py | 77 +-------- .../test_ps_processor_representations.py | 48 ------ 4 files changed, 71 insertions(+), 226 deletions(-) delete mode 100644 doubleml/utils/tests/test_ps_processor_representations.py diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index 35ef3043..3c65d23a 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -1,5 +1,5 @@ import warnings -from typing import Any, Dict, Optional +from typing import Optional import numpy as np from sklearn.isotonic import IsotonicRegression @@ -14,120 +14,71 @@ class PropensityScoreProcessor: Parameters ---------- clipping_threshold : float, default=1e-2 - Threshold used for clipping propensity scores. - warn_extreme_values : bool, default=True - Whether to warn about extreme propensity score values. - extreme_threshold : float, default=0.05 - Threshold for extreme value warnings. - warning_proportion : float, default=0.1 - Proportion threshold for triggering extreme value warnings. + Minimum and maximum bound for propensity scores after clipping. + + extreme_threshold : float, default=1e-12 + Threshold below which propensity scores are considered extreme. + Used for generating warnings. + + calibration_method : {'isotonic', None}, optional + If provided, applies the specified calibration method to + the propensity scores before clipping. + + cv_calibration : bool, default=False + Whether to use cross-validation for calibration. + Only applies if a calibration method is specified. Examples -------- >>> import numpy as np - >>> from doubleml.utils import PropensityScoreProcessor - >>> ps_scores = np.array([0.001, 0.2, 0.5, 0.8, 0.999]) + >>> ps = np.array([0.001, 0.2, 0.5, 0.8, 0.999]) >>> treatment = np.array([0, 1, 1, 0, 1]) >>> processor = PropensityScoreProcessor(clipping_threshold=0.01) - >>> adj_scores = processor.adjust(ps_scores, treatment) - >>> print(adj_scores) + >>> adjusted = processor.adjust_ps(ps, treatment) + >>> print(np.round(adjusted, 3)) [0.01 0.2 0.5 0.8 0.99] """ - _DEFAULT_CONFIG: Dict[str, Any] = { - "clipping_threshold": 1e-2, - "extreme_threshold": 1e-12, - "calibration_method": None, - "cv_calibration": False, - } - _VALID_CALIBRATION_METHODS = {None, "isotonic"} - def __init__(self, **config: Any) -> None: - - unknown_params = set(config.keys()) - set(self._DEFAULT_CONFIG.keys()) - if unknown_params: - raise ValueError(f"Unknown parameters: {unknown_params}") - - updated_config = {**self._DEFAULT_CONFIG, **config} - self._validate_config(updated_config) - self._config = updated_config - - # ------------------------------------------------------------------------- - # Configuration methods - # ------------------------------------------------------------------------- - def _validate_config(self, config: Dict[str, Any]) -> None: - """Validate configuration parameters.""" - - clipping_threshold = config["clipping_threshold"] - if not isinstance(clipping_threshold, float): - raise TypeError("clipping_threshold must be of float type. " f"Object of type {type(clipping_threshold)} passed.") - if (clipping_threshold <= 0) or (clipping_threshold >= 0.5): - raise ValueError(f"clipping_threshold must be between 0 and 0.5. " f"{clipping_threshold} was passed.") - - if not (0 < config["extreme_threshold"] < 0.5): - raise ValueError("extreme_threshold must be between 0 and 0.5.") - - calibration_method = config["calibration_method"] - if calibration_method not in self._VALID_CALIBRATION_METHODS: - raise ValueError( - f"calibration_method must be one of {self._VALID_CALIBRATION_METHODS}. " f"Got {calibration_method}." - ) - - if not isinstance(config["cv_calibration"], bool): - raise TypeError("cv_calibration must be of bool type.") - if config["cv_calibration"] and config["calibration_method"] is None: - raise ValueError("cv_calibration can only be used with a calibration_method.") + def __init__( + self, + clipping_threshold: float = 1e-2, + extreme_threshold: float = 1e-12, + calibration_method: Optional[str] = None, + cv_calibration: bool = False, + ): + self._clipping_threshold = clipping_threshold + self._extreme_threshold = extreme_threshold + self._calibration_method = calibration_method + self._cv_calibration = cv_calibration + + self._validate_config() @property def clipping_threshold(self) -> float: """Get the clipping threshold.""" - return self._config["clipping_threshold"] + return self._clipping_threshold @property def extreme_threshold(self) -> float: """Get the extreme threshold.""" - return self._config["extreme_threshold"] + return self._extreme_threshold @property def calibration_method(self) -> Optional[str]: """Get the calibration method.""" - return self._config["calibration_method"] + return self._calibration_method @property def cv_calibration(self) -> bool: """Get whether cross-validation calibration is used.""" - return self._config["cv_calibration"] - - @classmethod - def get_default_config(cls) -> Dict[str, Any]: - """Return the default configuration dictionary.""" - return cls._DEFAULT_CONFIG.copy() - - def get_config(self) -> Dict[str, Any]: - """Return a copy of the current configuration dictionary.""" - return self._config.copy() - - def update_config(self, **new_config: Any) -> None: - """ - Update configuration parameters. - - Validates the new configuration before applying changes to ensure - the object remains in a consistent state. - """ - - unknown_params = set(new_config.keys()) - set(self._DEFAULT_CONFIG.keys()) - if unknown_params: - raise ValueError(f"Unknown parameters: {unknown_params}") - - updated_config = {**self._config, **new_config} - self._validate_config(updated_config) - self._config = updated_config + return self._cv_calibration # ------------------------------------------------------------------------- # Core functionality # ------------------------------------------------------------------------- - def adjust( + def adjust_ps( self, propensity_scores: np.ndarray, treatment: np.ndarray, @@ -159,10 +110,6 @@ def adjust( ) self._validate_treatment(treatment) - if self.cv_calibration: - cv = cv - else: - cv = None calibrated_ps = self._apply_calibration(propensity_scores, treatment, cv=cv) clipped_scores = np.clip(calibrated_ps, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold) @@ -183,14 +130,13 @@ def _apply_calibration( elif self.calibration_method == "isotonic": calibration_model = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0) - if cv is None: - calibration_model.fit(propensity_scores.reshape(-1, 1), treatment) - calibrated_ps = calibration_model.predict(propensity_scores.reshape(-1, 1)) - else: + if self.cv_calibration and cv is not None: calibrated_ps = cross_val_predict( estimator=calibration_model, X=propensity_scores.reshape(-1, 1), y=treatment, cv=cv, method="predict" ) - + else: + calibration_model.fit(propensity_scores.reshape(-1, 1), treatment) + calibrated_ps = calibration_model.predict(propensity_scores.reshape(-1, 1)) else: # This point should never be reached due to prior validation raise ValueError( @@ -200,6 +146,24 @@ def _apply_calibration( return calibrated_ps + def _validate_config(self) -> None: + """Validate configuration parameters.""" + if not isinstance(self.clipping_threshold, float): + raise TypeError("clipping_threshold must be a float.") + if not (0 < self.clipping_threshold < 0.5): + raise ValueError("clipping_threshold must be between 0 and 0.5.") + + if not (0 < self.extreme_threshold < 0.5): + raise ValueError("extreme_threshold must be between 0 and 0.5.") + + if self.calibration_method not in self._VALID_CALIBRATION_METHODS: + raise ValueError(f"calibration_method must be one of {self._VALID_CALIBRATION_METHODS}.") + + if not isinstance(self.cv_calibration, bool): + raise TypeError("cv_calibration must be of bool type.") + if self.cv_calibration and self.calibration_method is None: + raise ValueError("cv_calibration=True requires a calibration_method.") + def _validate_propensity_scores( self, preds: np.ndarray, @@ -232,15 +196,3 @@ def _validate_treatment(self, treatment: np.ndarray) -> None: zero_one_treat = np.all((np.power(treatment, 2) - treatment) == 0) if not (binary_treat and zero_one_treat): raise ValueError("Treatment vector must be binary (0 and 1).") - - # ------------------------------------------------------------------------- - # Representations - # ------------------------------------------------------------------------- - def __repr__(self) -> str: - config_str = ", ".join([f"{k}={v}" for k, v in sorted(self._config.items())]) - return f"{self.__class__.__name__}({config_str})" - - def __eq__(self, other: object) -> bool: - if not isinstance(other, PropensityScoreProcessor): - return False - return self._config == other._config diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py index 7b2c0d81..d4f8bdcb 100644 --- a/doubleml/utils/tests/test_ps_processor.py +++ b/doubleml/utils/tests/test_ps_processor.py @@ -15,7 +15,7 @@ def test_adjust_basic_clipping(): scores = np.array([0.05, 0.2, 0.8, 0.95]) treatment = np.array([0, 1, 1, 0]) - adjusted = processor.adjust(scores, treatment) + adjusted = processor.adjust_ps(scores, treatment) expected = np.array([0.1, 0.2, 0.8, 0.9]) np.testing.assert_array_equal(adjusted, expected) @@ -28,7 +28,7 @@ def test_adjust_no_clipping_needed(): scores = np.array([0.2, 0.3, 0.7, 0.8]) treatment = np.array([0, 1, 1, 0]) - adjusted = processor.adjust(scores, treatment) + adjusted = processor.adjust_ps(scores, treatment) np.testing.assert_array_equal(adjusted, scores) @@ -51,7 +51,7 @@ def test_isotonic_calibration_without_cv(): expected_ps_manual = isotonic_manual.predict(ps.reshape(-1, 1)) expected_ps_manual = np.clip(expected_ps_manual, clipping_threshold, 1 - clipping_threshold) - adjusted_ps = processor.adjust(ps, treatment) + adjusted_ps = processor.adjust_ps(ps, treatment) np.testing.assert_array_equal(adjusted_ps, expected_ps_manual) @@ -82,7 +82,7 @@ def test_isotonic_calibration_with_cv(cv): ps_cv = cross_val_predict(isotonic_manual, ps.reshape(-1, 1), treatment, cv=cv) expected_ps_manual = np.clip(ps_cv, clipping_threshold, 1 - clipping_threshold) - adjusted_ps = processor.adjust(ps, treatment, cv=cv) + adjusted_ps = processor.adjust_ps(ps, treatment, cv=cv) np.testing.assert_array_equal(adjusted_ps, expected_ps_manual) @@ -96,7 +96,7 @@ def test_no_calibration(): # Should not call any calibration methods with patch("sklearn.isotonic.IsotonicRegression") as mock_isotonic: - adjusted = processor.adjust(scores, treatment) + adjusted = processor.adjust_ps(scores, treatment) mock_isotonic.assert_not_called() np.testing.assert_array_equal(adjusted, scores) diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py index f07cd787..eb6d2b7b 100644 --- a/doubleml/utils/tests/test_ps_processor_exceptions.py +++ b/doubleml/utils/tests/test_ps_processor_exceptions.py @@ -8,17 +8,10 @@ # ------------------------------------------------------------------------- -@pytest.mark.ci -def test_init_unknown_parameter(): - """Test that unknown parameters raise ValueError during initialization.""" - with pytest.raises(ValueError, match="Unknown parameters: {'invalid_param'}"): - PropensityScoreProcessor(invalid_param=0.5) - - @pytest.mark.ci def test_init_clipping_threshold_type_error(): """Test that non-float clipping_threshold raises TypeError.""" - with pytest.raises(TypeError, match="clipping_threshold must be of float type"): + with pytest.raises(TypeError, match="clipping_threshold must be a float."): PropensityScoreProcessor(clipping_threshold="0.01") @@ -59,62 +52,10 @@ def test_init_cv_calibration_type_error(): @pytest.mark.ci def test_init_cv_calibration_value_error(): """Test that cv_calibration True with None calibration_method raises ValueError.""" - with pytest.raises(ValueError, match="cv_calibration can only be used with a calibration_method."): + with pytest.raises(ValueError, match="cv_calibration=True requires a calibration_method."): PropensityScoreProcessor(calibration_method=None, cv_calibration=True) -# ------------------------------------------------------------------------- -# Tests for update_config method -# ------------------------------------------------------------------------- - - -@pytest.mark.ci -def test_update_config_unknown_parameter(): - """Test that unknown parameters raise ValueError during config update.""" - processor = PropensityScoreProcessor() - - with pytest.raises(ValueError, match="Unknown parameters: {'invalid_param'}"): - processor.update_config(invalid_param=0.5) - - -@pytest.mark.ci -def test_update_config_preserves_state_on_failure(): - """Test that failed config updates don't change the processor state.""" - processor = PropensityScoreProcessor(clipping_threshold=0.1) - original_config = processor.get_config() - - # Try to update with invalid value - with pytest.raises(ValueError): - processor.update_config(clipping_threshold=0.6) - - # Verify state hasn't changed - assert processor.get_config() == original_config - assert processor.clipping_threshold == 0.1 - - -@pytest.mark.ci -def test_update_config_successful_update(): - """Test successful configuration updates.""" - processor = PropensityScoreProcessor(clipping_threshold=0.1) - - processor.update_config(clipping_threshold=0.05) - assert processor.clipping_threshold == 0.05 - - -@pytest.mark.ci -def test_update_config_defaults(): - """Test updating configuration back to defaults.""" - processor = PropensityScoreProcessor(clipping_threshold=0.1) - - processor.update_config(clipping_threshold=0.01) - assert processor.clipping_threshold == 0.01 - - # Update back to default - default_config = PropensityScoreProcessor.get_default_config() - processor.update_config(**default_config) - assert processor.clipping_threshold == default_config["clipping_threshold"] - - # ------------------------------------------------------------------------- # Tests for propensity score & treatment validation # ------------------------------------------------------------------------- @@ -125,7 +66,7 @@ def test_validate_propensity_scores_type_error_with_learner(): """Test TypeError includes learner name.""" processor = PropensityScoreProcessor() with pytest.raises(TypeError, match="from learner test_learner"): - processor.adjust([0.1, 0.2], np.array([0, 1]), learner_name="test_learner") + processor.adjust_ps([0.1, 0.2], np.array([0, 1]), learner_name="test_learner") @pytest.mark.ci @@ -133,7 +74,7 @@ def test_validate_propensity_scores_dimension_error(): """Test that non-1D propensity scores raise ValueError.""" processor = PropensityScoreProcessor() with pytest.raises(ValueError, match="must be 1-dimensional"): - processor.adjust(np.array([[0.1, 0.2]]), np.array([0, 1])) + processor.adjust_ps(np.array([[0.1, 0.2]]), np.array([0, 1])) @pytest.mark.ci @@ -141,7 +82,7 @@ def test_validate_propensity_scores_extreme_warning(): """Test extreme values trigger warnings.""" processor = PropensityScoreProcessor(extreme_threshold=0.05) with pytest.warns(UserWarning, match="close to zero or one"): - processor.adjust(np.array([0.01, 0.99]), np.array([0, 1])) + processor.adjust_ps(np.array([0.01, 0.99]), np.array([0, 1])) @pytest.mark.ci @@ -149,7 +90,7 @@ def test_validate_treatment_type_error(): """Test that non-numpy array treatment raises TypeError.""" processor = PropensityScoreProcessor() with pytest.raises(TypeError, match="Treatment assignments must be of type np.ndarray"): - processor.adjust(np.array([0.2, 0.8]), [0, 1]) + processor.adjust_ps(np.array([0.2, 0.8]), [0, 1]) @pytest.mark.ci @@ -157,7 +98,7 @@ def test_validate_treatment_dimension_error(): """Test that non-1D treatment raises ValueError.""" processor = PropensityScoreProcessor() with pytest.raises(ValueError, match="must be 1-dimensional"): - processor.adjust(np.array([0.2, 0.8]), np.array([[0, 1]])) + processor.adjust_ps(np.array([0.2, 0.8]), np.array([[0, 1]])) @pytest.mark.ci @@ -165,7 +106,7 @@ def test_validate_treatment_binary_error(): """Test that non-binary treatment values raise ValueError.""" processor = PropensityScoreProcessor() with pytest.raises(ValueError, match="must be binary"): - processor.adjust(np.array([0.2, 0.8]), np.array([0, 2])) + processor.adjust_ps(np.array([0.2, 0.8]), np.array([0, 2])) # ------------------------------------------------------------------------- @@ -177,7 +118,7 @@ def test_validate_treatment_binary_error(): def test_apply_calibration_unsupported_method_error(): """Test that unsupported calibration method raises ValueError.""" processor = PropensityScoreProcessor() - processor._config["calibration_method"] = "unsupported_method" + processor._calibration_method = "unsupported_method" propensity_scores = np.array([0.2, 0.8]) treatment = np.array([0, 1]) diff --git a/doubleml/utils/tests/test_ps_processor_representations.py b/doubleml/utils/tests/test_ps_processor_representations.py deleted file mode 100644 index 365f9ea4..00000000 --- a/doubleml/utils/tests/test_ps_processor_representations.py +++ /dev/null @@ -1,48 +0,0 @@ -import pytest - -from doubleml.utils import PropensityScoreProcessor - - -@pytest.mark.ci -def test_repr_default_config(): - """Test __repr__ with default configuration.""" - processor = PropensityScoreProcessor() - expected = ( - "PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.01, " - "cv_calibration=False, extreme_threshold=1e-12)" - ) - assert repr(processor) == expected - - -@pytest.mark.ci -def test_repr_custom_config(): - """Test __repr__ with custom configuration.""" - processor = PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=1e-6) - expected = ( - "PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.05, " - "cv_calibration=False, extreme_threshold=1e-06)" - ) - assert repr(processor) == expected - - -@pytest.mark.ci -def test_eq_same_config(): - """Test equality with same configuration.""" - processor1 = PropensityScoreProcessor(clipping_threshold=0.05) - processor2 = PropensityScoreProcessor(clipping_threshold=0.05) - assert processor1 == processor2 - - -@pytest.mark.ci -def test_eq_different_config(): - """Test inequality with different configuration.""" - processor1 = PropensityScoreProcessor(clipping_threshold=0.05) - processor2 = PropensityScoreProcessor(clipping_threshold=0.1) - assert processor1 != processor2 - - -@pytest.mark.ci -def test_eq_different_type(): - """Test inequality with different object type.""" - processor = PropensityScoreProcessor() - assert processor != "NotAPropensityScoreProcessor" From 08a00737b1f6af8b52c4e1353c07525ba28b2ece Mon Sep 17 00:00:00 2001 From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 23 Oct 2025 09:49:22 +0200 Subject: [PATCH 10/38] add data class with config for psprocessor --- doubleml/utils/__init__.py | 5 +-- doubleml/utils/propensity_score_processing.py | 26 +++++++++++++-- doubleml/utils/tests/test_ps_processor.py | 30 +++++++++++++---- .../tests/test_ps_processor_exceptions.py | 32 +++++++++---------- 4 files changed, 66 insertions(+), 27 deletions(-) diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py index ce1ba3c6..4f6269dd 100644 --- a/doubleml/utils/__init__.py +++ b/doubleml/utils/__init__.py @@ -7,7 +7,7 @@ from .gain_statistics import gain_statistics from .global_learner import GlobalClassifier, GlobalRegressor from .policytree import DoubleMLPolicyTree -from .propensity_score_processing import PropensityScoreProcessor +from .propensity_score_processing import PSProcessor, PSProcessorConfig from .resampling import DoubleMLClusterResampling, DoubleMLResampling __all__ = [ @@ -20,5 +20,6 @@ "gain_statistics", "GlobalClassifier", "GlobalRegressor", - "PropensityScoreProcessor", + "PSProcessor", + "PSProcessorConfig", ] diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index 3c65d23a..9a42a1b3 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -1,4 +1,5 @@ import warnings +from dataclasses import dataclass from typing import Optional import numpy as np @@ -7,7 +8,15 @@ from sklearn.utils.multiclass import type_of_target -class PropensityScoreProcessor: +@dataclass +class PSProcessorConfig: + clipping_threshold: float = 1e-2 + extreme_threshold: float = 1e-12 + calibration_method: Optional[str] = None + cv_calibration: bool = False + + +class PSProcessor: """ Processor for propensity score calibration, clipping, and validation. @@ -55,6 +64,19 @@ def __init__( self._validate_config() + @classmethod + def from_config(cls, config: PSProcessorConfig): + return cls( + clipping_threshold=config.clipping_threshold, + extreme_threshold=config.extreme_threshold, + calibration_method=config.calibration_method, + cv_calibration=config.cv_calibration, + ) + + # ------------------------------------------------------------------------- + # Properties + # ------------------------------------------------------------------------- + @property def clipping_threshold(self) -> float: """Get the clipping threshold.""" @@ -130,7 +152,7 @@ def _apply_calibration( elif self.calibration_method == "isotonic": calibration_model = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0) - if self.cv_calibration and cv is not None: + if self.cv_calibration: calibrated_ps = cross_val_predict( estimator=calibration_model, X=propensity_scores.reshape(-1, 1), y=treatment, cv=cv, method="predict" ) diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py index d4f8bdcb..4251e0b8 100644 --- a/doubleml/utils/tests/test_ps_processor.py +++ b/doubleml/utils/tests/test_ps_processor.py @@ -5,13 +5,29 @@ from sklearn.isotonic import IsotonicRegression from sklearn.model_selection import KFold, cross_val_predict -from doubleml.utils.propensity_score_processing import PropensityScoreProcessor +from doubleml.utils.propensity_score_processing import PSProcessor, PSProcessorConfig + + +@pytest.mark.ci +def test_from_config_initialization(): + """Test initialization of PSProcessor from PSProcessorConfig.""" + config = PSProcessorConfig( + clipping_threshold=0.05, + extreme_threshold=1e-8, + calibration_method="isotonic", + cv_calibration=True, + ) + processor = PSProcessor.from_config(config) + assert processor.clipping_threshold == 0.05 + assert processor.extreme_threshold == 1e-8 + assert processor.calibration_method == "isotonic" + assert processor.cv_calibration is True @pytest.mark.ci def test_adjust_basic_clipping(): """Test basic clipping functionality.""" - processor = PropensityScoreProcessor(clipping_threshold=0.1) + processor = PSProcessor(clipping_threshold=0.1) scores = np.array([0.05, 0.2, 0.8, 0.95]) treatment = np.array([0, 1, 1, 0]) @@ -24,7 +40,7 @@ def test_adjust_basic_clipping(): @pytest.mark.ci def test_adjust_no_clipping_needed(): """Test when no clipping is needed.""" - processor = PropensityScoreProcessor(clipping_threshold=0.01) + processor = PSProcessor(clipping_threshold=0.01) scores = np.array([0.2, 0.3, 0.7, 0.8]) treatment = np.array([0, 1, 1, 0]) @@ -40,7 +56,7 @@ def test_isotonic_calibration_without_cv(): treatment = np.random.binomial(1, 0.5, size=100) clipping_threshold = 0.01 - processor = PropensityScoreProcessor( + processor = PSProcessor( calibration_method="isotonic", cv_calibration=False, clipping_threshold=clipping_threshold, @@ -55,7 +71,7 @@ def test_isotonic_calibration_without_cv(): np.testing.assert_array_equal(adjusted_ps, expected_ps_manual) -@pytest.fixture(scope="module", params=[3, "iterable", "splitter"]) +@pytest.fixture(scope="module", params=[None, 3, "iterable", "splitter"]) def cv(request): return request.param @@ -74,7 +90,7 @@ def test_isotonic_calibration_with_cv(cv): cv = cv clipping_threshold = 0.01 - processor = PropensityScoreProcessor( + processor = PSProcessor( calibration_method="isotonic", cv_calibration=True, clipping_threshold=clipping_threshold ) @@ -89,7 +105,7 @@ def test_isotonic_calibration_with_cv(cv): @pytest.mark.ci def test_no_calibration(): """Test that no calibration is applied when calibration_method is None.""" - processor = PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.01) + processor = PSProcessor(calibration_method=None, clipping_threshold=0.01) scores = np.array([0.2, 0.3, 0.7, 0.8]) treatment = np.array([0, 1, 1, 0]) diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py index eb6d2b7b..0c3971a7 100644 --- a/doubleml/utils/tests/test_ps_processor_exceptions.py +++ b/doubleml/utils/tests/test_ps_processor_exceptions.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from doubleml.utils import PropensityScoreProcessor +from doubleml.utils.propensity_score_processing import PSProcessor # ------------------------------------------------------------------------- # Tests for __init__ method @@ -12,48 +12,48 @@ def test_init_clipping_threshold_type_error(): """Test that non-float clipping_threshold raises TypeError.""" with pytest.raises(TypeError, match="clipping_threshold must be a float."): - PropensityScoreProcessor(clipping_threshold="0.01") + PSProcessor(clipping_threshold="0.01") @pytest.mark.ci def test_init_clipping_threshold_value_error(): """Test that invalid clipping_threshold values raise ValueError.""" with pytest.raises(ValueError, match="clipping_threshold must be between 0 and 0.5"): - PropensityScoreProcessor(clipping_threshold=0.0) # exactly 0 + PSProcessor(clipping_threshold=0.0) # exactly 0 with pytest.raises(ValueError, match="clipping_threshold must be between 0 and 0.5"): - PropensityScoreProcessor(clipping_threshold=0.6) # above 0.5 + PSProcessor(clipping_threshold=0.6) # above 0.5 @pytest.mark.ci def test_init_extreme_threshold_value_error(): """Test that invalid extreme_threshold values raise ValueError.""" with pytest.raises(ValueError, match="extreme_threshold must be between 0 and 0.5"): - PropensityScoreProcessor(extreme_threshold=0.0) # exactly 0 + PSProcessor(extreme_threshold=0.0) # exactly 0 with pytest.raises(ValueError, match="extreme_threshold must be between 0 and 0.5"): - PropensityScoreProcessor(extreme_threshold=0.6) # above 0.5 + PSProcessor(extreme_threshold=0.6) # above 0.5 @pytest.mark.ci def test_init_calibration_method_value_error(): """Test that invalid calibration_method raises ValueError.""" with pytest.raises(ValueError, match="calibration_method must be one of"): - PropensityScoreProcessor(calibration_method="invalid_method") + PSProcessor(calibration_method="invalid_method") @pytest.mark.ci def test_init_cv_calibration_type_error(): """Test that non-bool cv_calibration raises TypeError.""" with pytest.raises(TypeError, match="cv_calibration must be of bool type."): - PropensityScoreProcessor(cv_calibration="True") + PSProcessor(cv_calibration="True") @pytest.mark.ci def test_init_cv_calibration_value_error(): """Test that cv_calibration True with None calibration_method raises ValueError.""" with pytest.raises(ValueError, match="cv_calibration=True requires a calibration_method."): - PropensityScoreProcessor(calibration_method=None, cv_calibration=True) + PSProcessor(calibration_method=None, cv_calibration=True) # ------------------------------------------------------------------------- @@ -64,7 +64,7 @@ def test_init_cv_calibration_value_error(): @pytest.mark.ci def test_validate_propensity_scores_type_error_with_learner(): """Test TypeError includes learner name.""" - processor = PropensityScoreProcessor() + processor = PSProcessor() with pytest.raises(TypeError, match="from learner test_learner"): processor.adjust_ps([0.1, 0.2], np.array([0, 1]), learner_name="test_learner") @@ -72,7 +72,7 @@ def test_validate_propensity_scores_type_error_with_learner(): @pytest.mark.ci def test_validate_propensity_scores_dimension_error(): """Test that non-1D propensity scores raise ValueError.""" - processor = PropensityScoreProcessor() + processor = PSProcessor() with pytest.raises(ValueError, match="must be 1-dimensional"): processor.adjust_ps(np.array([[0.1, 0.2]]), np.array([0, 1])) @@ -80,7 +80,7 @@ def test_validate_propensity_scores_dimension_error(): @pytest.mark.ci def test_validate_propensity_scores_extreme_warning(): """Test extreme values trigger warnings.""" - processor = PropensityScoreProcessor(extreme_threshold=0.05) + processor = PSProcessor(extreme_threshold=0.05) with pytest.warns(UserWarning, match="close to zero or one"): processor.adjust_ps(np.array([0.01, 0.99]), np.array([0, 1])) @@ -88,7 +88,7 @@ def test_validate_propensity_scores_extreme_warning(): @pytest.mark.ci def test_validate_treatment_type_error(): """Test that non-numpy array treatment raises TypeError.""" - processor = PropensityScoreProcessor() + processor = PSProcessor() with pytest.raises(TypeError, match="Treatment assignments must be of type np.ndarray"): processor.adjust_ps(np.array([0.2, 0.8]), [0, 1]) @@ -96,7 +96,7 @@ def test_validate_treatment_type_error(): @pytest.mark.ci def test_validate_treatment_dimension_error(): """Test that non-1D treatment raises ValueError.""" - processor = PropensityScoreProcessor() + processor = PSProcessor() with pytest.raises(ValueError, match="must be 1-dimensional"): processor.adjust_ps(np.array([0.2, 0.8]), np.array([[0, 1]])) @@ -104,7 +104,7 @@ def test_validate_treatment_dimension_error(): @pytest.mark.ci def test_validate_treatment_binary_error(): """Test that non-binary treatment values raise ValueError.""" - processor = PropensityScoreProcessor() + processor = PSProcessor() with pytest.raises(ValueError, match="must be binary"): processor.adjust_ps(np.array([0.2, 0.8]), np.array([0, 2])) @@ -117,7 +117,7 @@ def test_validate_treatment_binary_error(): @pytest.mark.ci def test_apply_calibration_unsupported_method_error(): """Test that unsupported calibration method raises ValueError.""" - processor = PropensityScoreProcessor() + processor = PSProcessor() processor._calibration_method = "unsupported_method" propensity_scores = np.array([0.2, 0.8]) From 4c5f850dbc727dcf1d77926f655dbaaffd316563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Klaa=C3=9Fen?= <47529404+SvenKlaassen@users.noreply.github.com> Date: Thu, 23 Oct 2025 15:35:59 +0200 Subject: [PATCH 11/38] add init_ps_processor function --- doubleml/utils/propensity_score_processing.py | 30 +++++++++++++++++++ doubleml/utils/tests/test_ps_processor.py | 25 +++++++++++++++- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index 9a42a1b3..dedb0268 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -16,6 +16,36 @@ class PSProcessorConfig: cv_calibration: bool = False +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). +def init_ps_processor( + ps_processor_config: Optional[PSProcessorConfig], + trimming_rule: Optional[str], + trimming_threshold: Optional[float] +): + if trimming_rule is not None: + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config' with 'clipping_threshold' instead.", + DeprecationWarning, + stacklevel=3 + ) + if trimming_threshold is not None: + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config' with 'clipping_threshold' instead.", + DeprecationWarning, + stacklevel=3 + ) + if ps_processor_config is not None: + config = ps_processor_config + else: + config = PSProcessorConfig( + clipping_threshold=trimming_threshold if trimming_threshold is not None else 1e-2 + ) + processor = PSProcessor.from_config(config) + return config, processor + + class PSProcessor: """ Processor for propensity score calibration, clipping, and validation. diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py index 4251e0b8..d48a4b7d 100644 --- a/doubleml/utils/tests/test_ps_processor.py +++ b/doubleml/utils/tests/test_ps_processor.py @@ -1,11 +1,34 @@ from unittest.mock import patch +import warnings import numpy as np import pytest from sklearn.isotonic import IsotonicRegression from sklearn.model_selection import KFold, cross_val_predict -from doubleml.utils.propensity_score_processing import PSProcessor, PSProcessorConfig +from doubleml.utils.propensity_score_processing import ( + PSProcessorConfig, PSProcessor, init_ps_processor +) + + +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). +@pytest.mark.ci +def test_init_ps_processor_with_deprecated(): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + cfg, proc = init_ps_processor(None, "truncate", 0.02) + assert any("deprecated" in str(warn.message) for warn in w) + assert isinstance(cfg, PSProcessorConfig) + assert proc.clipping_threshold == 0.02 + + +@pytest.mark.ci +def test_init_ps_processor_with_config(): + config = PSProcessorConfig(clipping_threshold=0.05) + cfg, proc = init_ps_processor(config, None, None) + assert isinstance(cfg, PSProcessorConfig) + assert isinstance(proc, PSProcessor) + assert proc.clipping_threshold == 0.05 @pytest.mark.ci From f284fa464ff97f190ecd6323f37d0ffc07c0b507 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 24 Oct 2025 12:46:07 +0200 Subject: [PATCH 12/38] update irm class and tests with ps_processor --- doubleml/irm/irm.py | 67 ++++++++++++++----- doubleml/irm/tests/_utils_irm_manual.py | 8 +-- doubleml/irm/tests/test_irm.py | 23 ++++--- doubleml/irm/tests/test_irm_classifier.py | 9 +-- .../tests/test_irm_external_predictions.py | 12 +++- .../irm/tests/test_irm_weighted_scores.py | 13 +++- doubleml/irm/tests/test_irm_with_missings.py | 15 +++-- doubleml/tests/_utils.py | 8 +-- doubleml/utils/propensity_score_processing.py | 16 ++--- 9 files changed, 116 insertions(+), 55 deletions(-) diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index 5e2d693b..270fb4b5 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -1,4 +1,5 @@ import warnings +from typing import Optional import numpy as np import pandas as pd @@ -12,17 +13,17 @@ _check_binary_predictions, _check_finite_predictions, _check_integer, - _check_is_propensity, _check_score, - _check_trimming, _check_weights, ) from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls -from doubleml.utils._propensity_score import _propensity_score_adjustment, _trimm +from doubleml.utils._propensity_score import _propensity_score_adjustment from doubleml.utils.blp import DoubleMLBLP from doubleml.utils.policytree import DoubleMLPolicyTree +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLIRM(LinearScoreMixin, DoubleML): """Double machine learning for interactive regression models @@ -68,13 +69,16 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML): Indicates whether the inverse probability weights are normalized. Default is ``False``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -131,8 +135,9 @@ def __init__( score="ATE", weights=None, normalize_ipw=False, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -167,9 +172,13 @@ def __init__( raise TypeError( "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed." ) + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -184,19 +193,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def weights(self): @@ -327,9 +361,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa return_models=return_models, ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - # also trimm external predictions - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], self._dml_data.d, cv=smpls) psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], smpls) psi_elements = {"psi_a": psi_a, "psi_b": psi_b} diff --git a/doubleml/irm/tests/_utils_irm_manual.py b/doubleml/irm/tests/_utils_irm_manual.py index f5a5bad7..d5bf3dc3 100644 --- a/doubleml/irm/tests/_utils_irm_manual.py +++ b/doubleml/irm/tests/_utils_irm_manual.py @@ -20,7 +20,7 @@ def fit_irm( g1_params=None, m_params=None, normalize_ipw=True, - trimming_threshold=1e-2, + clipping_threshold=1e-2, ): n_obs = len(y) @@ -44,7 +44,7 @@ def fit_irm( g0_params=g0_params, g1_params=g1_params, m_params=m_params, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) all_g_hat0.append(g_hat0) @@ -72,7 +72,7 @@ def fit_irm( def fit_nuisance_irm( - y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, trimming_threshold=1e-12 + y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, clipping_threshold=1e-12 ): ml_g0 = clone(learner_g) ml_g1 = clone(learner_g) @@ -89,7 +89,7 @@ def fit_nuisance_irm( g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1) ml_m = clone(learner_m) - m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) + m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold) p_hat_list = [] for _ in smpls: diff --git a/doubleml/irm/tests/test_irm.py b/doubleml/irm/tests/test_irm.py index 856c7f59..f3b60ea9 100644 --- a/doubleml/irm/tests/test_irm.py +++ b/doubleml/irm/tests/test_irm.py @@ -9,6 +9,7 @@ import doubleml as dml from doubleml.irm.datasets import make_irm_data +from doubleml.utils.propensity_score_processing import PSProcessorConfig from doubleml.utils.resampling import DoubleMLResampling from ...tests._utils import draw_smpls @@ -40,12 +41,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold): +def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -62,6 +63,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d) obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d) + ps_processor_config = PSProcessorConfig(clipping_threshold=clipping_threshold) np.random.seed(3141) dml_irm_obj = dml.DoubleMLIRM( obj_dml_data, @@ -71,7 +73,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t score=score, normalize_ipw=normalize_ipw, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + ps_processor_config=ps_processor_config, ) # synchronize the sample splitting @@ -88,7 +90,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) np.random.seed(3141) @@ -101,7 +103,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t score=score, normalize_ipw=normalize_ipw, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + ps_processor_config=ps_processor_config, ) # synchronize the sample splitting @@ -235,8 +237,8 @@ def test_dml_irm_cate_gate(cov_type): # First stage estimation ml_g = RandomForestRegressor(n_estimators=10) ml_m = RandomForestClassifier(n_estimators=10) - - dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_m=ml_m, ml_g=ml_g, trimming_threshold=0.05, n_folds=5) + ps_processor_config = PSProcessorConfig(clipping_threshold=0.05) + dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_m=ml_m, ml_g=ml_g, ps_processor_config=ps_processor_config, n_folds=5) dml_irm_obj.fit() # create a random basis @@ -279,7 +281,12 @@ def dml_irm_weights_fixture(n_rep): # collect data np.random.seed(42) obj_dml_data = make_irm_data(n_obs=n, dim_x=2) - kwargs = {"trimming_threshold": 0.05, "n_folds": 5, "n_rep": n_rep, "draw_sample_splitting": False} + kwargs = { + "ps_processor_config": PSProcessorConfig(clipping_threshold=0.05), + "n_folds": 5, + "n_rep": n_rep, + "draw_sample_splitting": False, + } smpls = DoubleMLResampling(n_folds=5, n_rep=n_rep, n_obs=n, stratify=obj_dml_data.d).split_samples() diff --git a/doubleml/irm/tests/test_irm_classifier.py b/doubleml/irm/tests/test_irm_classifier.py index 9389439d..afe8ca65 100644 --- a/doubleml/irm/tests/test_irm_classifier.py +++ b/doubleml/irm/tests/test_irm_classifier.py @@ -7,6 +7,7 @@ from sklearn.linear_model import LogisticRegression import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import draw_smpls from ._utils_irm_manual import boot_irm, fit_irm @@ -37,12 +38,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normalize_ipw, trimming_threshold): +def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -65,7 +66,7 @@ def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normali n_folds, score=score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) # synchronize the sample splitting @@ -82,7 +83,7 @@ def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normali all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_irm_external_predictions.py b/doubleml/irm/tests/test_irm_external_predictions.py index 5d0412d5..17bf44dc 100644 --- a/doubleml/irm/tests/test_irm_external_predictions.py +++ b/doubleml/irm/tests/test_irm_external_predictions.py @@ -64,7 +64,12 @@ def doubleml_irm_fixture(irm_score, n_rep, set_ml_m_ext, set_ml_g_ext): np.random.seed(3141) dml_irm_ext.fit(external_predictions=ext_predictions) - res_dict = {"coef_normal": dml_irm.coef[0], "coef_ext": dml_irm_ext.coef[0]} + res_dict = { + "coef_normal": dml_irm.coef[0], + "coef_ext": dml_irm_ext.coef[0], + "se": dml_irm.se[0], + "se_ext": dml_irm_ext.se[0], + } return res_dict @@ -72,3 +77,8 @@ def doubleml_irm_fixture(irm_score, n_rep, set_ml_m_ext, set_ml_g_ext): @pytest.mark.ci def test_doubleml_irm_coef(doubleml_irm_fixture): assert math.isclose(doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4) + + +@pytest.mark.ci +def test_doubleml_irm_se(doubleml_irm_fixture): + assert math.isclose(doubleml_irm_fixture["se"], doubleml_irm_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-4) diff --git a/doubleml/irm/tests/test_irm_weighted_scores.py b/doubleml/irm/tests/test_irm_weighted_scores.py index 0592c3d3..56d841cc 100644 --- a/doubleml/irm/tests/test_irm_weighted_scores.py +++ b/doubleml/irm/tests/test_irm_weighted_scores.py @@ -6,6 +6,7 @@ import doubleml as dml from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils.propensity_score_processing import PSProcessorConfig def old_score_elements(y, d, g_hat0, g_hat1, m_hat, score, normalize_ipw): @@ -65,12 +66,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold): +def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_ipw, clipping_threshold): n_folds = 2 # collect data @@ -83,7 +84,13 @@ def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_i np.random.seed(3141) dml_irm_obj = dml.DoubleMLIRM( - obj_dml_data, ml_g, ml_m, n_folds, score=score, normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold + obj_dml_data, + ml_g, + ml_m, + n_folds, + score=score, + normalize_ipw=normalize_ipw, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), ) dml_irm_obj.fit() diff --git a/doubleml/irm/tests/test_irm_with_missings.py b/doubleml/irm/tests/test_irm_with_missings.py index a6c30cae..838ea98a 100644 --- a/doubleml/irm/tests/test_irm_with_missings.py +++ b/doubleml/irm/tests/test_irm_with_missings.py @@ -9,6 +9,7 @@ from xgboost import XGBClassifier, XGBRegressor import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import draw_smpls from ._utils_irm_manual import boot_irm, fit_irm @@ -43,12 +44,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, score, normalize_ipw, trimming_threshold): +def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, score, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -66,7 +67,13 @@ def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, sco np.random.seed(3141) obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, force_all_x_finite="allow-nan") dml_irm_obj = dml.DoubleMLIRM( - obj_dml_data, ml_g, ml_m, n_folds, score=score, normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold + obj_dml_data, + ml_g, + ml_m, + n_folds, + score=score, + normalize_ipw=normalize_ipw, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), ) # synchronize the sample splitting dml_irm_obj.set_sample_splitting(all_smpls=all_smpls) @@ -83,7 +90,7 @@ def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, sco all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py index 907d03d1..60416246 100644 --- a/doubleml/tests/_utils.py +++ b/doubleml/tests/_utils.py @@ -44,7 +44,7 @@ def fit_predict(y, x, ml_model, params, smpls, train_cond=None): return y_hat -def fit_predict_proba(y, x, ml_model, params, smpls, trimming_threshold=0, train_cond=None): +def fit_predict_proba(y, x, ml_model, params, smpls, clipping_threshold=0, train_cond=None): y_hat = [] for idx, (train_index, test_index) in enumerate(smpls): if params is not None: @@ -55,9 +55,9 @@ def fit_predict_proba(y, x, ml_model, params, smpls, trimming_threshold=0, train train_index_cond = np.intersect1d(train_cond, train_index) preds = ml_model.fit(x[train_index_cond], y[train_index_cond]).predict_proba(x[test_index])[:, 1] - if trimming_threshold > 0: - preds[preds < trimming_threshold] = trimming_threshold - preds[preds > 1 - trimming_threshold] = 1 - trimming_threshold + if clipping_threshold > 0: + preds[preds < clipping_threshold] = clipping_threshold + preds[preds > 1 - clipping_threshold] = 1 - clipping_threshold y_hat.append(preds) return y_hat diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index dedb0268..3bec652f 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -18,30 +18,26 @@ class PSProcessorConfig: # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). def init_ps_processor( - ps_processor_config: Optional[PSProcessorConfig], - trimming_rule: Optional[str], - trimming_threshold: Optional[float] + ps_processor_config: Optional[PSProcessorConfig], trimming_rule: Optional[str], trimming_threshold: Optional[float] ): - if trimming_rule is not None: + if trimming_rule != "truncate": warnings.warn( "'trimming_rule' is deprecated and will be removed in a future version. " "Use 'ps_processor_config' with 'clipping_threshold' instead.", DeprecationWarning, - stacklevel=3 + stacklevel=3, ) - if trimming_threshold is not None: + if trimming_threshold != 1e-2: warnings.warn( "'trimming_threshold' is deprecated and will be removed in a future version. " "Use 'ps_processor_config' with 'clipping_threshold' instead.", DeprecationWarning, - stacklevel=3 + stacklevel=3, ) if ps_processor_config is not None: config = ps_processor_config else: - config = PSProcessorConfig( - clipping_threshold=trimming_threshold if trimming_threshold is not None else 1e-2 - ) + config = PSProcessorConfig(clipping_threshold=trimming_threshold if trimming_threshold is not None else 1e-2) processor = PSProcessor.from_config(config) return config, processor From dac26ece4853d98eaf12fe34d02f3bad593c2e6a Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 24 Oct 2025 12:47:32 +0200 Subject: [PATCH 13/38] add test for ps_processor with irm --- doubleml/irm/tests/test_irm_ps_processor.py | 59 +++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 doubleml/irm/tests/test_irm_ps_processor.py diff --git a/doubleml/irm/tests/test_irm_ps_processor.py b/doubleml/irm/tests/test_irm_ps_processor.py new file mode 100644 index 00000000..efd5fe0c --- /dev/null +++ b/doubleml/irm/tests/test_irm_ps_processor.py @@ -0,0 +1,59 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml import DoubleMLData, DoubleMLIRM +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_irm_ml_m_predictions_ps_processor(generate_data_irm, ps_config): + x, y, d = generate_data_irm + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + dml_irm = DoubleMLIRM( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + ) + dml_irm.fit(store_predictions=True) + ml_m_preds = dml_irm.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +def test_irm_ml_m_predictions_ps_processor_differences(generate_data_irm): + x, y, d = generate_data_irm + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_irm = DoubleMLIRM( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + ) + dml_irm.fit(store_predictions=True) + preds.append(dml_irm.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) From 5ec4648e23775a8cc469aaec50c4250fd5e22c5f Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 24 Oct 2025 15:52:47 +0200 Subject: [PATCH 14/38] update apo and tests for ps_processor_config --- doubleml/irm/apo.py | 64 ++++++++++++++----- doubleml/irm/tests/_utils_apo_manual.py | 8 +-- doubleml/irm/tests/test_apo.py | 18 ++++-- doubleml/irm/tests/test_apo_classifier.py | 9 +-- doubleml/irm/tests/test_apo_exceptions.py | 16 ----- .../irm/tests/test_apo_weighted_scores.py | 7 +- 6 files changed, 73 insertions(+), 49 deletions(-) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index 0de311bc..be7e327e 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -1,4 +1,5 @@ import warnings +from typing import Optional import numpy as np import pandas as pd @@ -9,14 +10,13 @@ from doubleml.utils._checks import ( _check_binary_predictions, _check_finite_predictions, - _check_is_propensity, _check_score, - _check_trimming, _check_weights, ) from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls -from doubleml.utils._propensity_score import _propensity_score_adjustment, _trimm +from doubleml.utils._propensity_score import _propensity_score_adjustment from doubleml.utils.blp import DoubleMLBLP +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor class DoubleMLAPO(LinearScoreMixin, DoubleML): @@ -66,13 +66,16 @@ class DoubleMLAPO(LinearScoreMixin, DoubleML): Indicates whether the inverse probability weights are normalized. Default is ``False``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -91,8 +94,9 @@ def __init__( score="APO", weights=None, normalize_ipw=False, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -131,9 +135,13 @@ def __init__( raise TypeError( "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed." ) + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -163,19 +171,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def weights(self): @@ -288,10 +321,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa return_models=return_models, ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) # also trimm external predictions - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], self.treated, cv=smpls) psi_a, psi_b = self._score_elements(y, treated, g_hat_d_lvl0["preds"], g_hat_d_lvl1["preds"], m_hat["preds"], smpls) psi_elements = {"psi_a": psi_a, "psi_b": psi_b} diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py index 0ec84417..8abcb029 100644 --- a/doubleml/irm/tests/_utils_apo_manual.py +++ b/doubleml/irm/tests/_utils_apo_manual.py @@ -21,7 +21,7 @@ def fit_apo( g1_params=None, m_params=None, normalize_ipw=False, - trimming_threshold=1e-2, + clipping_threshold=1e-2, ): n_obs = len(y) treated = d == treatment_level @@ -46,7 +46,7 @@ def fit_apo( g0_params=g0_params, g1_params=g1_params, m_params=m_params, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) all_g_hat0.append(g_hat0) @@ -83,7 +83,7 @@ def fit_nuisance_apo( g0_params=None, g1_params=None, m_params=None, - trimming_threshold=1e-12, + clipping_threshold=1e-12, ): ml_g0 = clone(learner_g) ml_g1 = clone(learner_g) @@ -102,7 +102,7 @@ def fit_nuisance_apo( g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1) ml_m = clone(learner_m) - m_hat_list = fit_predict_proba(treated, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) + m_hat_list = fit_predict_proba(treated, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold) return g_hat0_list, g_hat1_list, m_hat_list diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py index 7558b7c1..1b41705f 100644 --- a/doubleml/irm/tests/test_apo.py +++ b/doubleml/irm/tests/test_apo.py @@ -9,6 +9,7 @@ import doubleml as dml from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import draw_smpls from ._utils_apo_manual import boot_apo, fit_apo, fit_sensitivity_elements_apo @@ -34,7 +35,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -44,7 +45,7 @@ def treatment_level(request): @pytest.fixture(scope="module") -def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level): +def dml_apo_fixture(learner, normalize_ipw, clipping_threshold, treatment_level): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -76,7 +77,7 @@ def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level) score="APO", normalize_ipw=normalize_ipw, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), ) # synchronize the sample splitting @@ -94,7 +95,7 @@ def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level) all_smpls=all_smpls, score="APO", normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) np.random.seed(3141) @@ -108,7 +109,7 @@ def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level) score="APO", normalize_ipw=normalize_ipw, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), ) # synchronize the sample splitting @@ -242,7 +243,12 @@ def test_dml_apo_capo_gapo(treatment_level, cov_type): ml_m = RandomForestClassifier(n_estimators=10) dml_obj = dml.DoubleMLAPO( - obj_dml_data, ml_m=ml_m, ml_g=ml_g, treatment_level=treatment_level, trimming_threshold=0.05, n_folds=5 + obj_dml_data, + ml_m=ml_m, + ml_g=ml_g, + treatment_level=treatment_level, + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, ) dml_obj.fit() diff --git a/doubleml/irm/tests/test_apo_classifier.py b/doubleml/irm/tests/test_apo_classifier.py index 042f3fe8..0b471956 100644 --- a/doubleml/irm/tests/test_apo_classifier.py +++ b/doubleml/irm/tests/test_apo_classifier.py @@ -7,6 +7,7 @@ from sklearn.linear_model import LogisticRegression import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import draw_smpls from ._utils_apo_manual import boot_apo, fit_apo @@ -32,12 +33,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, trimming_threshold): +def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -64,7 +65,7 @@ def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, n_folds=n_folds, score=score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) # synchronize the sample splitting @@ -82,7 +83,7 @@ def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py index 5991ee5e..f428de6b 100644 --- a/doubleml/irm/tests/test_apo_exceptions.py +++ b/doubleml/irm/tests/test_apo_exceptions.py @@ -76,22 +76,6 @@ def test_apo_exception_scores(): _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, score="MAR") -@pytest.mark.ci -def test_apo_exception_trimming_rule(): - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="discard") - - # check the trimming_threshold exceptions - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="truncate", trimming_threshold="0.1") - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="truncate", trimming_threshold=0.6) - - @pytest.mark.ci def test_apo_exception_ipw_normalization(): msg = "Normalization indicator has to be boolean. Object of type passed." diff --git a/doubleml/irm/tests/test_apo_weighted_scores.py b/doubleml/irm/tests/test_apo_weighted_scores.py index 63687ebd..b5ba8a32 100644 --- a/doubleml/irm/tests/test_apo_weighted_scores.py +++ b/doubleml/irm/tests/test_apo_weighted_scores.py @@ -5,6 +5,7 @@ from sklearn.linear_model import LinearRegression, LogisticRegression import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import draw_smpls @@ -39,7 +40,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -49,7 +50,7 @@ def treatment_level(request): @pytest.fixture(scope="module") -def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normalize_ipw, trimming_threshold, treatment_level): +def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normalize_ipw, clipping_threshold, treatment_level): n_folds = 2 # collect data @@ -67,7 +68,7 @@ def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normali "n_rep": n_rep, "score": score, "normalize_ipw": normalize_ipw, - "trimming_threshold": trimming_threshold, + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), "draw_sample_splitting": False, } From 8e57bc7303c88aa923ce366f3226058226cae527 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 24 Oct 2025 16:37:05 +0200 Subject: [PATCH 15/38] add ps_processor to apos --- doubleml/irm/apos.py | 48 +++++++++++++++---- doubleml/irm/tests/_utils_apos_manual.py | 8 ++-- doubleml/irm/tests/test_apos.py | 11 ++--- doubleml/irm/tests/test_apos_classfier.py | 11 ++--- doubleml/irm/tests/test_apos_exceptions.py | 16 ------- .../irm/tests/test_apos_weighted_scores.py | 8 ++-- doubleml/irm/tests/test_irm_vs_apos.py | 11 +++-- 7 files changed, 63 insertions(+), 50 deletions(-) diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 5a6d41fc..23e7085e 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -1,5 +1,7 @@ import copy +import warnings from collections.abc import Iterable +from typing import Optional import numpy as np import pandas as pd @@ -11,10 +13,11 @@ from doubleml.double_ml_framework import concat from doubleml.double_ml_sampling_mixins import SampleSplittingMixin from doubleml.irm.apo import DoubleMLAPO -from doubleml.utils._checks import _check_score, _check_trimming, _check_weights +from doubleml.utils._checks import _check_score, _check_weights from doubleml.utils._descriptive import generate_summary from doubleml.utils._sensitivity import _compute_sensitivity_bias from doubleml.utils.gain_statistics import gain_statistics +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor class DoubleMLAPOS(SampleSplittingMixin): @@ -31,8 +34,9 @@ def __init__( score="APO", weights=None, normalize_ipw=False, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): self._dml_data = obj_dml_data @@ -58,10 +62,12 @@ def __init__( # initialize framework which is constructed after the fit method is called self._framework = None - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold if not isinstance(self.normalize_ipw, bool): raise TypeError( @@ -131,19 +137,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def weights(self): @@ -819,8 +850,7 @@ def _initialize_models(self): "n_folds": self.n_folds, "n_rep": self.n_rep, "weights": self.weights, - "trimming_rule": self.trimming_rule, - "trimming_threshold": self.trimming_threshold, + "ps_processor_config": self.ps_processor_config, "normalize_ipw": self.normalize_ipw, "draw_sample_splitting": False, } diff --git a/doubleml/irm/tests/_utils_apos_manual.py b/doubleml/irm/tests/_utils_apos_manual.py index 88fc59c2..d1eb575f 100644 --- a/doubleml/irm/tests/_utils_apos_manual.py +++ b/doubleml/irm/tests/_utils_apos_manual.py @@ -1,6 +1,8 @@ import numpy as np from sklearn.base import clone +from doubleml.utils.propensity_score_processing import PSProcessorConfig + from ...data.base_data import DoubleMLData from ...tests._utils_boot import draw_weights from ..apo import DoubleMLAPO @@ -16,9 +18,8 @@ def fit_apos( all_smpls, score, n_rep=1, - trimming_rule="truncate", normalize_ipw=False, - trimming_threshold=1e-2, + clipping_threshold=1e-2, ): n_obs = len(y) n_treatments = len(treatment_levels) @@ -39,8 +40,7 @@ def fit_apos( n_folds=n_folds, n_rep=n_rep, score=score, - trimming_rule=trimming_rule, - trimming_threshold=trimming_threshold, + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), normalize_ipw=normalize_ipw, draw_sample_splitting=False, ) diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py index 55a48ced..a3897352 100644 --- a/doubleml/irm/tests/test_apos.py +++ b/doubleml/irm/tests/test_apos.py @@ -7,6 +7,7 @@ import doubleml as dml from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import confint_manual from ._utils_apos_manual import boot_apos, fit_apos @@ -90,7 +91,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -100,7 +101,7 @@ def treatment_levels(request): @pytest.fixture(scope="module") -def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels): +def dml_apos_fixture(learner, n_rep, normalize_ipw, clipping_threshold, treatment_levels): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -124,8 +125,7 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen "n_rep": n_rep, "score": "APO", "normalize_ipw": normalize_ipw, - "trimming_rule": "truncate", - "trimming_threshold": trimming_threshold, + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), } unfitted_apos_model = dml.DoubleMLAPOS(**input_args) @@ -151,9 +151,8 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen all_smpls=all_smpls, n_rep=n_rep, score="APO", - trimming_rule="truncate", normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) ci = dml_obj.confint(joint=False, level=0.95) diff --git a/doubleml/irm/tests/test_apos_classfier.py b/doubleml/irm/tests/test_apos_classfier.py index f9cfc10c..a044a979 100644 --- a/doubleml/irm/tests/test_apos_classfier.py +++ b/doubleml/irm/tests/test_apos_classfier.py @@ -7,6 +7,7 @@ import doubleml as dml from doubleml.irm.datasets import make_irm_data_discrete_treatments +from doubleml.utils.propensity_score_processing import PSProcessorConfig from ...tests._utils import confint_manual from ._utils_apos_manual import boot_apos, fit_apos @@ -37,7 +38,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -47,7 +48,7 @@ def treatment_levels(request): @pytest.fixture(scope="module") -def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels): +def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, clipping_threshold, treatment_levels): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -71,8 +72,7 @@ def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshol "n_rep": n_rep, "score": "APO", "normalize_ipw": normalize_ipw, - "trimming_rule": "truncate", - "trimming_threshold": trimming_threshold, + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), } unfitted_apos_model = dml.DoubleMLAPOS(**input_args) @@ -97,9 +97,8 @@ def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshol treatment_levels=treatment_levels, all_smpls=all_smpls, score="APO", - trimming_rule="truncate", normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) ci = dml_obj.confint(joint=False, level=0.95) diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py index 93274cee..f5b741ef 100644 --- a/doubleml/irm/tests/test_apos_exceptions.py +++ b/doubleml/irm/tests/test_apos_exceptions.py @@ -59,22 +59,6 @@ def test_apos_exception_scores(): _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, score="MAR") -@pytest.mark.ci -def test_apos_exception_trimming_rule(): - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="discard") - - # check the trimming_threshold exceptions - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="truncate", trimming_threshold="0.1") - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="truncate", trimming_threshold=0.6) - - @pytest.mark.ci def test_apos_exception_ipw_normalization(): msg = "Normalization indicator has to be boolean. Object of type passed." diff --git a/doubleml/irm/tests/test_apos_weighted_scores.py b/doubleml/irm/tests/test_apos_weighted_scores.py index 6d0a7f65..e400532d 100644 --- a/doubleml/irm/tests/test_apos_weighted_scores.py +++ b/doubleml/irm/tests/test_apos_weighted_scores.py @@ -7,6 +7,7 @@ import doubleml as dml from doubleml.irm.datasets import make_irm_data_discrete_treatments +from doubleml.utils.propensity_score_processing import PSProcessorConfig @pytest.fixture( @@ -39,7 +40,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -49,7 +50,7 @@ def treatment_levels(request): @pytest.fixture(scope="module") -def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, trimming_threshold, treatment_levels): +def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, clipping_threshold, treatment_levels): n_obs = 500 n_folds = 2 @@ -71,8 +72,7 @@ def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, trimming_t "n_rep": n_rep, "score": score, "normalize_ipw": normalize_ipw, - "trimming_threshold": trimming_threshold, - "trimming_rule": "truncate", + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), } np.random.seed(42) diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py index a91c8c05..aab0e09e 100644 --- a/doubleml/irm/tests/test_irm_vs_apos.py +++ b/doubleml/irm/tests/test_irm_vs_apos.py @@ -8,6 +8,7 @@ import doubleml as dml from doubleml.utils._propensity_score import _propensity_score_adjustment +from doubleml.utils.propensity_score_processing import PSProcessorConfig @pytest.fixture( @@ -35,12 +36,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.2, 0.15]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold): +def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, clipping_threshold): # collect data (x, y, d) = generate_data_irm @@ -54,7 +55,7 @@ def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimm kwargs = { "n_folds": n_folds, "n_rep": n_rep, - "trimming_threshold": trimming_threshold, + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), "normalize_ipw": normalize_ipw, } @@ -159,7 +160,7 @@ def test_apos_vs_irm_sensitivity(dml_irm_apos_fixture): @pytest.fixture(scope="module") -def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold): +def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_ipw, clipping_threshold): # collect data (x, y, d) = generate_data_irm @@ -173,7 +174,7 @@ def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_i kwargs = { "n_folds": n_folds, "n_rep": n_rep, - "trimming_threshold": trimming_threshold, + "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold), "normalize_ipw": normalize_ipw, } From 0493024298c721ac3eaa7aa7342c9a41d0c07644 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Fri, 24 Oct 2025 16:40:20 +0200 Subject: [PATCH 16/38] add ps_processor test for apo --- doubleml/irm/tests/test_apo_ps_processor.py | 72 +++++++++++++++++++++ doubleml/irm/tests/test_irm_ps_processor.py | 2 + 2 files changed, 74 insertions(+) create mode 100644 doubleml/irm/tests/test_apo_ps_processor.py diff --git a/doubleml/irm/tests/test_apo_ps_processor.py b/doubleml/irm/tests/test_apo_ps_processor.py new file mode 100644 index 00000000..d70f2553 --- /dev/null +++ b/doubleml/irm/tests/test_apo_ps_processor.py @@ -0,0 +1,72 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml import DoubleMLAPO, DoubleMLData +from doubleml.irm.datasets import make_irm_data_discrete_treatments +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.fixture(scope="module") +def generate_data_apo(): + np.random.seed(3141) + data = make_irm_data_discrete_treatments(n_obs=200) + x = data["x"] + y = data["y"] + d = data["d"] + return x, y, d + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_apo_ml_m_predictions_ps_processor(generate_data_apo, ps_config): + x, y, d = generate_data_apo + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + dml_apo = DoubleMLAPO( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + treatment_level=0, + ) + dml_apo.fit(store_predictions=True) + ml_m_preds = dml_apo.predictions["ml_m"][:, 0, 0] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_apo_ml_m_predictions_ps_processor_differences(generate_data_apo): + x, y, d = generate_data_apo + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_apo = DoubleMLAPO( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + treatment_level=0, + ) + dml_apo.fit(store_predictions=True) + preds.append(dml_apo.predictions["ml_m"][:, 0, 0]) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/irm/tests/test_irm_ps_processor.py b/doubleml/irm/tests/test_irm_ps_processor.py index efd5fe0c..33ae66f4 100644 --- a/doubleml/irm/tests/test_irm_ps_processor.py +++ b/doubleml/irm/tests/test_irm_ps_processor.py @@ -6,6 +6,7 @@ from doubleml.utils.propensity_score_processing import PSProcessorConfig +@pytest.mark.ci @pytest.mark.parametrize( "ps_config", [ @@ -33,6 +34,7 @@ def test_irm_ml_m_predictions_ps_processor(generate_data_irm, ps_config): assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) +@pytest.mark.ci def test_irm_ml_m_predictions_ps_processor_differences(generate_data_irm): x, y, d = generate_data_irm dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) From 5b60bbf6cb991475a7eca4aa71f18645955015a9 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 08:00:45 +0100 Subject: [PATCH 17/38] update IIVM with psprocessor --- doubleml/irm/iivm.py | 67 ++++++++++++++++------ doubleml/irm/tests/_utils_iivm_manual.py | 8 +-- doubleml/irm/tests/test_iivm.py | 9 +-- doubleml/irm/tests/test_iivm_classifier.py | 8 +-- doubleml/irm/tests/test_iivm_subgroups.py | 9 +-- 5 files changed, 69 insertions(+), 32 deletions(-) diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py index 4eaa1d50..7f330cfb 100644 --- a/doubleml/irm/iivm.py +++ b/doubleml/irm/iivm.py @@ -1,3 +1,6 @@ +import warnings +from typing import Optional + import numpy as np from scipy.stats import norm from sklearn.utils import check_X_y @@ -11,10 +14,10 @@ _check_finite_predictions, _check_is_propensity, _check_score, - _check_trimming, ) from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _solve_quadratic_inequality -from doubleml.utils._propensity_score import _normalize_ipw, _trimm +from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor class DoubleMLIIVM(LinearScoreMixin, DoubleML): @@ -64,13 +67,16 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML): Indicates whether the inverse probability weights are normalized. Default is ``False``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -135,8 +141,9 @@ def __init__( score="LATE", subgroups=None, normalize_ipw=False, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -172,9 +179,13 @@ def __init__( raise TypeError( "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed." ) + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold if subgroups is None: # this is the default for subgroups; via None to prevent a mutable default argument @@ -213,19 +224,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold def _initialize_ml_nuisance_params(self): valid_learner = ["ml_g0", "ml_g1", "ml_m", "ml_r0", "ml_r1"] @@ -330,9 +366,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa return_models=return_models, ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - # also trimm external predictions - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], z, cv=smpls) # nuisance r r0 = external_predictions["ml_r0"] is not None diff --git a/doubleml/irm/tests/_utils_iivm_manual.py b/doubleml/irm/tests/_utils_iivm_manual.py index b61526b2..601604ee 100644 --- a/doubleml/irm/tests/_utils_iivm_manual.py +++ b/doubleml/irm/tests/_utils_iivm_manual.py @@ -23,7 +23,7 @@ def fit_iivm( r0_params=None, r1_params=None, normalize_ipw=True, - trimming_threshold=1e-2, + clipping_threshold=1e-2, always_takers=True, never_takers=True, ): @@ -53,7 +53,7 @@ def fit_iivm( m_params=m_params, r0_params=r0_params, r1_params=r1_params, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, always_takers=always_takers, never_takers=never_takers, ) @@ -98,7 +98,7 @@ def fit_nuisance_iivm( m_params=None, r0_params=None, r1_params=None, - trimming_threshold=1e-12, + clipping_threshold=1e-12, always_takers=True, never_takers=True, ): @@ -117,7 +117,7 @@ def fit_nuisance_iivm( g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1) ml_m = clone(learner_m) - m_hat_list = fit_predict_proba(z, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) + m_hat_list = fit_predict_proba(z, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold) ml_r0 = clone(learner_r) if always_takers: diff --git a/doubleml/irm/tests/test_iivm.py b/doubleml/irm/tests/test_iivm.py index 169f4175..1c049e26 100644 --- a/doubleml/irm/tests/test_iivm.py +++ b/doubleml/irm/tests/test_iivm.py @@ -34,12 +34,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming_threshold): +def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 491 @@ -72,7 +72,8 @@ def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming n_folds, draw_sample_splitting=False, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), + score=score, ) # synchronize the sample splitting dml_iivm_obj.set_sample_splitting(all_smpls=all_smpls) @@ -91,7 +92,7 @@ def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_iivm_classifier.py b/doubleml/irm/tests/test_iivm_classifier.py index 983c34a7..78096031 100644 --- a/doubleml/irm/tests/test_iivm_classifier.py +++ b/doubleml/irm/tests/test_iivm_classifier.py @@ -34,12 +34,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, normalize_ipw, trimming_threshold): +def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, normalize_ipw, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 491 @@ -63,7 +63,7 @@ def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, norma ml_r, n_folds, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) # synchronize the sample splitting @@ -83,7 +83,7 @@ def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, norma all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_iivm_subgroups.py b/doubleml/irm/tests/test_iivm_subgroups.py index 906ed897..8633f6c8 100644 --- a/doubleml/irm/tests/test_iivm_subgroups.py +++ b/doubleml/irm/tests/test_iivm_subgroups.py @@ -30,7 +30,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -47,7 +47,7 @@ def subgroups(request): @pytest.fixture(scope="module") -def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming_threshold, subgroups): +def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw, clipping_threshold, subgroups): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 491 @@ -73,9 +73,10 @@ def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw ml_m, ml_r, n_folds, + score=score, subgroups=subgroups, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) # synchronize the sample splitting @@ -99,7 +100,7 @@ def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw all_smpls, score, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, always_takers=subgroups["always_takers"], never_takers=subgroups["never_takers"], ) From a30f1ae4db597e548b5219e0905104cd323af141 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 08:28:05 +0100 Subject: [PATCH 18/38] add ps_processor to ssm --- doubleml/irm/ssm.py | 62 +++++++++++++++----- doubleml/irm/tests/_utils_ssm_manual.py | 10 ++-- doubleml/irm/tests/test_ssm.py | 28 +++++++-- doubleml/irm/tests/test_ssm_exceptions.py | 16 ------ doubleml/irm/tests/test_ssm_ps_processor.py | 63 +++++++++++++++++++++ 5 files changed, 138 insertions(+), 41 deletions(-) create mode 100644 doubleml/irm/tests/test_ssm_ps_processor.py diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py index 00a49191..fdc2ab6e 100644 --- a/doubleml/irm/ssm.py +++ b/doubleml/irm/ssm.py @@ -1,5 +1,6 @@ import copy import warnings +from typing import Optional import numpy as np from sklearn.base import clone @@ -9,11 +10,12 @@ from doubleml.data.ssm_data import DoubleMLSSMData from doubleml.double_ml import DoubleML from doubleml.double_ml_score_mixins import LinearScoreMixin -from doubleml.utils._checks import _check_finite_predictions, _check_score, _check_trimming +from doubleml.utils._checks import _check_finite_predictions, _check_score from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d, _predict_zero_one_propensity -from doubleml.utils._propensity_score import _trimm +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLSSM(LinearScoreMixin, DoubleML): """Double machine learning for sample selection models @@ -50,13 +52,16 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML): Indicates whether the inverse probability weights are normalized. Default is ``False``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -109,8 +114,9 @@ def __init__( n_rep=1, score="missing-at-random", normalize_ipw=False, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -119,9 +125,12 @@ def __init__( self._sensitivity_implemented = False self._normalize_ipw = normalize_ipw + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold self._check_data(self._dml_data) self._is_cluster_data = self._dml_data.is_cluster_data @@ -165,19 +174,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold def _initialize_ml_nuisance_params(self): valid_learner = ["ml_g_d0", "ml_g_d1", "ml_pi", "ml_m"] @@ -369,7 +403,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa pi_hat["models"] = fitted_models["ml_pi"] m_hat["models"] = fitted_models["ml_m"] - m_hat["preds"] = _trimm(m_hat["preds"], self._trimming_rule, self._trimming_threshold) + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls) # treatment indicator dtreat = d == 1 diff --git a/doubleml/irm/tests/_utils_ssm_manual.py b/doubleml/irm/tests/_utils_ssm_manual.py index f14a1f66..07014018 100644 --- a/doubleml/irm/tests/_utils_ssm_manual.py +++ b/doubleml/irm/tests/_utils_ssm_manual.py @@ -19,7 +19,7 @@ def fit_selection( all_smpls, score, trimming_rule="truncate", - trimming_threshold=1e-2, + clipping_threshold=1e-2, normalize_ipw=True, n_rep=1, g_d0_params=None, @@ -55,7 +55,7 @@ def fit_selection( smpls, score, trimming_rule=trimming_rule, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, g_d0_params=g_d0_params, g_d1_params=g_d1_params, pi_params=pi_params, @@ -108,7 +108,7 @@ def fit_nuisance_selection( smpls, score, trimming_rule="truncate", - trimming_threshold=1e-2, + clipping_threshold=1e-2, g_d0_params=None, g_d1_params=None, pi_params=None, @@ -125,7 +125,7 @@ def fit_nuisance_selection( dx = np.column_stack((d, x, z)) if score == "missing-at-random": - pi_hat_list = fit_predict_proba(s, dx, ml_pi, pi_params, smpls, trimming_threshold=trimming_threshold) + pi_hat_list = fit_predict_proba(s, dx, ml_pi, pi_params, smpls, clipping_threshold=clipping_threshold) m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls) @@ -212,7 +212,7 @@ def fit_nuisance_selection( # predict conditional outcome g_hat_d0 = ml_g_d0.predict(xpi_test) - m_hat = _trimm(m_hat, trimming_rule, trimming_threshold) + m_hat = _trimm(m_hat, trimming_rule, clipping_threshold) # append predictions on test sample to final list of predictions g_hat_d1_list.append(g_hat_d1) diff --git a/doubleml/irm/tests/test_ssm.py b/doubleml/irm/tests/test_ssm.py index c561d9fe..735c6471 100644 --- a/doubleml/irm/tests/test_ssm.py +++ b/doubleml/irm/tests/test_ssm.py @@ -26,14 +26,14 @@ def normalize_ipw(request): return request.param -@pytest.fixture(scope="module", params=[0.01]) -def trimming_threshold(request): +@pytest.fixture(scope="module", params=[0.01, 0.05]) +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") def dml_selection_fixture( - generate_data_selection_mar, generate_data_selection_nonignorable, learner, score, trimming_threshold, normalize_ipw + generate_data_selection_mar, generate_data_selection_nonignorable, learner, score, clipping_threshold, normalize_ipw ): n_folds = 3 @@ -55,11 +55,27 @@ def dml_selection_fixture( np.random.seed(42) if score == "missing-at-random": obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s) - dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score) + dml_sel_obj = dml.DoubleMLSSM( + obj_dml_data, + ml_g, + ml_pi, + ml_m, + n_folds=n_folds, + score=score, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), + ) else: assert score == "nonignorable" obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=z, s=s) - dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score) + dml_sel_obj = dml.DoubleMLSSM( + obj_dml_data, + ml_g, + ml_pi, + ml_m, + n_folds=n_folds, + score=score, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), + ) np.random.seed(42) dml_sel_obj.set_sample_splitting(all_smpls=all_smpls) @@ -78,7 +94,7 @@ def dml_selection_fixture( all_smpls, score, trimming_rule="truncate", - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, normalize_ipw=normalize_ipw, ) diff --git a/doubleml/irm/tests/test_ssm_exceptions.py b/doubleml/irm/tests/test_ssm_exceptions.py index 6df76908..4ca9f263 100644 --- a/doubleml/irm/tests/test_ssm_exceptions.py +++ b/doubleml/irm/tests/test_ssm_exceptions.py @@ -62,22 +62,6 @@ def test_ssm_exception_scores(): _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, score=0) -@pytest.mark.ci -def test_ssm_exception_trimming_rule(): - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="discard") - - # check the trimming_threshold exceptions - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="truncate", trimming_threshold="0.1") - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="truncate", trimming_threshold=0.6) - - @pytest.mark.ci def test_ssm_exception_ipw_normalization(): msg = "Normalization indicator has to be boolean. Object of type passed." diff --git a/doubleml/irm/tests/test_ssm_ps_processor.py b/doubleml/irm/tests/test_ssm_ps_processor.py new file mode 100644 index 00000000..c0627699 --- /dev/null +++ b/doubleml/irm/tests/test_ssm_ps_processor.py @@ -0,0 +1,63 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_ssm_ml_m_predictions_ps_processor(generate_data_selection_mar, ps_config): + x, y, d, _, s = generate_data_selection_mar + dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s) + np.random.seed(3141) + dml_ssm = dml.DoubleMLSSM( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_pi=LogisticRegression(), + ml_m=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + ) + dml_ssm.fit(store_predictions=True) + ml_m_preds = dml_ssm.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_ssm_ml_m_predictions_ps_processor_differences(generate_data_selection_mar): + x, y, d, _, s = generate_data_selection_mar + dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_ssm = dml.DoubleMLSSM( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_pi=LogisticRegression(), + ml_m=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + ) + dml_ssm.fit(store_predictions=True) + preds.append(dml_ssm.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) From 5d1a822c9406481c94703c2615a6900f070c3f1e Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 08:28:17 +0100 Subject: [PATCH 19/38] add test for iivm ps processor with fixture --- doubleml/irm/tests/test_iivm_ps_processor.py | 68 ++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 doubleml/irm/tests/test_iivm_ps_processor.py diff --git a/doubleml/irm/tests/test_iivm_ps_processor.py b/doubleml/irm/tests/test_iivm_ps_processor.py new file mode 100644 index 00000000..e8eed01f --- /dev/null +++ b/doubleml/irm/tests/test_iivm_ps_processor.py @@ -0,0 +1,68 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml import DoubleMLData +from doubleml.irm.iivm import DoubleMLIIVM +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.fixture +def dml_data_iivm(generate_data_iivm): + data = generate_data_iivm + x_cols = data.columns[data.columns.str.startswith("X")].tolist() + dml_data = DoubleMLData(data, "y", ["d"], x_cols, "z") + return dml_data + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_iivm_ml_m_predictions_ps_processor(dml_data_iivm, ps_config): + np.random.seed(3141) + dml_iivm = DoubleMLIIVM( + obj_dml_data=dml_data_iivm, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ml_r=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + ) + dml_iivm.fit(store_predictions=True) + ml_m_preds = dml_iivm.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_iivm_ml_m_predictions_ps_processor_differences(dml_data_iivm): + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_iivm = DoubleMLIIVM( + obj_dml_data=dml_data_iivm, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ml_r=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + ) + dml_iivm.fit(store_predictions=True) + preds.append(dml_iivm.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) From f97be67221219a240fdbd4c34dcb9936fece4e1b Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 09:04:28 +0100 Subject: [PATCH 20/38] add ps_processor to cvar --- doubleml/irm/cvar.py | 69 +++++++++++++++----- doubleml/irm/irm.py | 2 +- doubleml/irm/tests/_utils_cvar_manual.py | 16 ++--- doubleml/irm/tests/test_cvar.py | 8 +-- doubleml/irm/tests/test_cvar_ps_processor.py | 65 ++++++++++++++++++ doubleml/irm/tests/test_cvar_tune.py | 4 +- 6 files changed, 131 insertions(+), 33 deletions(-) create mode 100644 doubleml/irm/tests/test_cvar_ps_processor.py diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py index 6d29f5e2..64e82ad8 100644 --- a/doubleml/irm/cvar.py +++ b/doubleml/irm/cvar.py @@ -1,3 +1,6 @@ +import warnings +from typing import Optional + import numpy as np from sklearn.base import clone from sklearn.model_selection import StratifiedKFold, train_test_split @@ -11,7 +14,6 @@ _check_quantile, _check_score, _check_treatment, - _check_trimming, _check_zero_one_treatment, ) from doubleml.utils._estimation import ( @@ -22,9 +24,11 @@ _predict_zero_one_propensity, _solve_ipw_score, ) -from doubleml.utils._propensity_score import _normalize_ipw, _trimm +from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLCVAR(LinearScoreMixin, DoubleML): """Double machine learning for conditional value at risk for potential outcomes @@ -66,13 +70,16 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML): Indicates whether the inverse probability weights are normalized. Default is ``True``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -107,8 +114,9 @@ def __init__( n_rep=1, score="CVaR", normalize_ipw=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -139,10 +147,12 @@ def __init__( if draw_sample_splitting: self.draw_sample_splitting() - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold _ = self._check_learner(ml_g, "ml_g", regressor=True, classifier=False) _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True) @@ -172,19 +182,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold def _compute_ipw_score(self, theta, d, y, prop): score = (d == self.treatment) / prop * (y <= theta) - self.quantile @@ -254,7 +289,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa "preds" ] - m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold) + m_hat_prelim = self._ps_processor.adjust_ps(m_hat_prelim, d_train_1, cv=smpls_prelim) if self._normalize_ipw: m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1) @@ -304,9 +339,7 @@ def ipw_score(theta): g_hat["models"] = fitted_models["ml_g"] m_hat["models"] = fitted_models["ml_m"] - # clip propensities and normalize ipw weights - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) - + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls) # this is not done in the score to be equivalent to PQ models if self._normalize_ipw: m_hat_adj = _normalize_ipw(m_hat["preds"], d) diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index 29c90c87..e880c48e 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -362,7 +362,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], self._dml_data.d, cv=smpls) + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls) psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], smpls) psi_elements = {"psi_a": psi_a, "psi_b": psi_b} diff --git a/doubleml/irm/tests/_utils_cvar_manual.py b/doubleml/irm/tests/_utils_cvar_manual.py index dd6935b6..8d9f0120 100644 --- a/doubleml/irm/tests/_utils_cvar_manual.py +++ b/doubleml/irm/tests/_utils_cvar_manual.py @@ -18,7 +18,7 @@ def fit_cvar( treatment, normalize_ipw=True, n_rep=1, - trimming_threshold=1e-2, + clipping_threshold=1e-2, g_params=None, m_params=None, ): @@ -40,7 +40,7 @@ def fit_cvar( smpls, treatment, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, g_params=g_params, m_params=m_params, ) @@ -56,7 +56,7 @@ def fit_cvar( def fit_nuisance_cvar( - y, x, d, quantile, learner_g, learner_m, smpls, treatment, normalize_ipw, trimming_threshold, g_params, m_params + y, x, d, quantile, learner_g, learner_m, smpls, treatment, normalize_ipw, clipping_threshold, g_params, m_params ): n_folds = len(smpls) n_obs = len(y) @@ -95,7 +95,7 @@ def fit_nuisance_cvar( x_train_1 = x[train_inds_1, :] # todo change prediction method m_hat_prelim_list = fit_predict_proba( - d_train_1, x_train_1, ml_m, params=None, trimming_threshold=trimming_threshold, smpls=smpls_prelim + d_train_1, x_train_1, ml_m, params=None, clipping_threshold=clipping_threshold, smpls=smpls_prelim ) m_hat_prelim = np.full_like(y_train_1, np.nan, dtype="float64") @@ -104,8 +104,8 @@ def fit_nuisance_cvar( m_hat_prelim = _dml_cv_predict(ml_m, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)["preds"] - m_hat_prelim[m_hat_prelim < trimming_threshold] = trimming_threshold - m_hat_prelim[m_hat_prelim > 1 - trimming_threshold] = 1 - trimming_threshold + m_hat_prelim[m_hat_prelim < clipping_threshold] = clipping_threshold + m_hat_prelim[m_hat_prelim > 1 - clipping_threshold] = 1 - clipping_threshold if normalize_ipw: m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1) @@ -141,8 +141,8 @@ def ipw_score(theta): ml_m.fit(x[train_inds, :], d[train_inds]) m_hat[test_inds] = ml_m.predict_proba(x[test_inds, :])[:, 1] - m_hat[m_hat < trimming_threshold] = trimming_threshold - m_hat[m_hat > 1 - trimming_threshold] = 1 - trimming_threshold + m_hat[m_hat < clipping_threshold] = clipping_threshold + m_hat[m_hat > 1 - clipping_threshold] = 1 - clipping_threshold if normalize_ipw: m_hat = _normalize_ipw(m_hat, d) diff --git a/doubleml/irm/tests/test_cvar.py b/doubleml/irm/tests/test_cvar.py index 0eee71c6..d6b08a1c 100644 --- a/doubleml/irm/tests/test_cvar.py +++ b/doubleml/irm/tests/test_cvar.py @@ -42,12 +42,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold): +def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, clipping_threshold): n_folds = 3 # Set machine learning methods for m & g @@ -71,7 +71,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, norm n_folds=n_folds, n_rep=1, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) @@ -91,7 +91,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, norm treatment, normalize_ipw=normalize_ipw, n_rep=1, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_cvar_ps_processor.py b/doubleml/irm/tests/test_cvar_ps_processor.py new file mode 100644 index 00000000..6c78162b --- /dev/null +++ b/doubleml/irm/tests/test_cvar_ps_processor.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_cvar_ml_m_predictions_ps_processor(generate_data_quantiles, ps_config): + x, y, d = generate_data_quantiles + dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + dml_cvar = dml.DoubleMLCVAR( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=ps_config, + n_rep=1, + ) + dml_cvar.fit(store_predictions=True) + ml_m_preds = dml_cvar.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_cvar_ml_m_predictions_ps_processor_differences(generate_data_quantiles): + x, y, d = generate_data_quantiles + dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_cvar = dml.DoubleMLCVAR( + obj_dml_data=dml_data, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=cfg, + n_rep=1, + ) + dml_cvar.fit(store_predictions=True) + preds.append(dml_cvar.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/irm/tests/test_cvar_tune.py b/doubleml/irm/tests/test_cvar_tune.py index ade84769..d51e7852 100644 --- a/doubleml/irm/tests/test_cvar_tune.py +++ b/doubleml/irm/tests/test_cvar_tune.py @@ -71,7 +71,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner_g, le n_folds=n_folds, n_rep=1, normalize_ipw=normalize_ipw, - trimming_threshold=0.01, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=0.01), draw_sample_splitting=False, ) @@ -130,7 +130,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner_g, le all_smpls=all_smpls, treatment=treatment, n_rep=1, - trimming_threshold=0.01, + clipping_threshold=0.01, normalize_ipw=normalize_ipw, g_params=g_params, m_params=m_params, From c485731b599173a2b6a195ec47e091654c9dea99 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 09:16:39 +0100 Subject: [PATCH 21/38] add ps_processor to pq models --- doubleml/irm/pq.py | 71 ++++++++++++++++------ doubleml/irm/tests/_utils_pq_manual.py | 14 ++--- doubleml/irm/tests/test_pq.py | 8 +-- doubleml/irm/tests/test_pq_ps_processor.py | 65 ++++++++++++++++++++ doubleml/irm/tests/test_pq_tune.py | 4 +- 5 files changed, 131 insertions(+), 31 deletions(-) create mode 100644 doubleml/irm/tests/test_pq_ps_processor.py diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py index baf43b7e..f3b72e2c 100644 --- a/doubleml/irm/pq.py +++ b/doubleml/irm/pq.py @@ -1,3 +1,6 @@ +import warnings +from typing import Optional + import numpy as np from sklearn.base import clone from sklearn.model_selection import StratifiedKFold, train_test_split @@ -11,7 +14,6 @@ _check_quantile, _check_score, _check_treatment, - _check_trimming, _check_zero_one_treatment, ) from doubleml.utils._estimation import ( @@ -23,9 +25,11 @@ _predict_zero_one_propensity, _solve_ipw_score, ) -from doubleml.utils._propensity_score import _normalize_ipw, _trimm +from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLPQ(NonLinearScoreMixin, DoubleML): """Double machine learning for potential quantiles @@ -74,13 +78,16 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML): Default is ``'None'``, which uses :py:class:`statsmodels.nonparametric.kde.KDEUnivariate` with a gaussian kernel and silverman for bandwidth determination. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -115,8 +122,9 @@ def __init__( score="PQ", normalize_ipw=True, kde=None, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -155,10 +163,12 @@ def __init__( self._external_predictions_implemented = True - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True) _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True) @@ -195,19 +205,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def _score_element_names(self): @@ -326,7 +361,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa )["preds"] else: m_hat_prelim = m_hat["preds"][np.concatenate([test for _, test in smpls_prelim])] - m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold) + m_hat_prelim = self._ps_processor.adjust_ps(m_hat_prelim, d_train_1, cv=smpls_prelim) + if self._normalize_ipw: m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1) if self.treatment == 0: @@ -370,11 +406,10 @@ def ipw_score(theta): g_hat["models"] = fitted_models["ml_g"] m_hat["models"] = fitted_models["ml_m"] - # clip propensities and normalize ipw weights - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) - + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls) # this is not done in the score to save computation due to multiple score evaluations # to be able to evaluate the raw models the m_hat['preds'] are not changed + if self._normalize_ipw: m_hat_adj = _normalize_ipw(m_hat["preds"], d) else: diff --git a/doubleml/irm/tests/_utils_pq_manual.py b/doubleml/irm/tests/_utils_pq_manual.py index b5b27c7c..526854a0 100644 --- a/doubleml/irm/tests/_utils_pq_manual.py +++ b/doubleml/irm/tests/_utils_pq_manual.py @@ -18,7 +18,7 @@ def fit_pq( all_smpls, treatment, n_rep=1, - trimming_threshold=1e-2, + clipping_threshold=1e-2, normalize_ipw=True, g_params=None, m_params=None, @@ -40,7 +40,7 @@ def fit_pq( learner_m, smpls, treatment, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, normalize_ipw=normalize_ipw, g_params=g_params, m_params=m_params, @@ -57,7 +57,7 @@ def fit_pq( def fit_nuisance_pq( - y, x, d, quantile, learner_g, learner_m, smpls, treatment, trimming_threshold, normalize_ipw, g_params, m_params + y, x, d, quantile, learner_g, learner_m, smpls, treatment, clipping_threshold, normalize_ipw, g_params, m_params ): n_folds = len(smpls) n_obs = len(y) @@ -96,8 +96,8 @@ def fit_nuisance_pq( # todo change prediction method m_hat_prelim = _dml_cv_predict(clone(ml_m), x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)["preds"] - m_hat_prelim[m_hat_prelim < trimming_threshold] = trimming_threshold - m_hat_prelim[m_hat_prelim > 1 - trimming_threshold] = 1 - trimming_threshold + m_hat_prelim[m_hat_prelim < clipping_threshold] = clipping_threshold + m_hat_prelim[m_hat_prelim > 1 - clipping_threshold] = 1 - clipping_threshold if normalize_ipw: m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1) @@ -129,8 +129,8 @@ def ipw_score(theta): ml_m.fit(x[train_inds, :], d[train_inds]) m_hat[test_inds] = ml_m.predict_proba(x[test_inds, :])[:, 1] - m_hat[m_hat < trimming_threshold] = trimming_threshold - m_hat[m_hat > 1 - trimming_threshold] = 1 - trimming_threshold + m_hat[m_hat < clipping_threshold] = clipping_threshold + m_hat[m_hat > 1 - clipping_threshold] = 1 - clipping_threshold if normalize_ipw: m_hat = _normalize_ipw(m_hat, d) diff --git a/doubleml/irm/tests/test_pq.py b/doubleml/irm/tests/test_pq.py index 62e69d53..b3505cf5 100644 --- a/doubleml/irm/tests/test_pq.py +++ b/doubleml/irm/tests/test_pq.py @@ -35,12 +35,12 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.01, 0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold): +def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, clipping_threshold): n_folds = 3 # collect data @@ -59,7 +59,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normal quantile=quantile, n_folds=n_folds, n_rep=1, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), normalize_ipw=normalize_ipw, draw_sample_splitting=False, ) @@ -80,7 +80,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normal all_smpls, treatment, n_rep=1, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, normalize_ipw=normalize_ipw, ) diff --git a/doubleml/irm/tests/test_pq_ps_processor.py b/doubleml/irm/tests/test_pq_ps_processor.py new file mode 100644 index 00000000..c40786d7 --- /dev/null +++ b/doubleml/irm/tests/test_pq_ps_processor.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest +from sklearn.linear_model import LogisticRegression + +from doubleml import DoubleMLData, DoubleMLPQ +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_pq_ml_m_predictions_ps_processor(generate_data_quantiles, ps_config): + x, y, d = generate_data_quantiles + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + dml_pq = DoubleMLPQ( + obj_dml_data=dml_data, + ml_g=LogisticRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=ps_config, + n_rep=1, + ) + dml_pq.fit(store_predictions=True) + ml_m_preds = dml_pq.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_pq_ml_m_predictions_ps_processor_differences(generate_data_quantiles): + x, y, d = generate_data_quantiles + dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_pq = DoubleMLPQ( + obj_dml_data=dml_data, + ml_g=LogisticRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=cfg, + n_rep=1, + ) + dml_pq.fit(store_predictions=True) + preds.append(dml_pq.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/irm/tests/test_pq_tune.py b/doubleml/irm/tests/test_pq_tune.py index 815c17d4..47e7bc18 100644 --- a/doubleml/irm/tests/test_pq_tune.py +++ b/doubleml/irm/tests/test_pq_tune.py @@ -73,7 +73,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner_g, lear n_folds=n_folds, n_rep=1, normalize_ipw=normalize_ipw, - trimming_threshold=0.01, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=0.01), draw_sample_splitting=False, ) @@ -132,7 +132,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner_g, lear all_smpls=all_smpls, treatment=treatment, n_rep=1, - trimming_threshold=0.01, + clipping_threshold=0.01, normalize_ipw=normalize_ipw, g_params=g_params, m_params=m_params, From 22212014837d7aa92bc5bc809ecb9ac97f1e12b0 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 09:34:35 +0100 Subject: [PATCH 22/38] add ps_processor to LPQ models --- doubleml/irm/lpq.py | 75 +++++++++++++++------ doubleml/irm/tests/_utils_lpq_manual.py | 10 +-- doubleml/irm/tests/test_lpq.py | 12 ++-- doubleml/irm/tests/test_lpq_ps_processor.py | 65 ++++++++++++++++++ doubleml/irm/tests/test_lpq_tune.py | 4 +- 5 files changed, 134 insertions(+), 32 deletions(-) create mode 100644 doubleml/irm/tests/test_lpq_ps_processor.py diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py index 962b383b..bd62794c 100644 --- a/doubleml/irm/lpq.py +++ b/doubleml/irm/lpq.py @@ -1,3 +1,6 @@ +import warnings +from typing import Optional + import numpy as np from sklearn.base import clone from sklearn.model_selection import StratifiedKFold, train_test_split @@ -7,7 +10,7 @@ from doubleml.data.base_data import DoubleMLData from doubleml.double_ml import DoubleML from doubleml.double_ml_score_mixins import NonLinearScoreMixin -from doubleml.utils._checks import _check_quantile, _check_score, _check_treatment, _check_trimming, _check_zero_one_treatment +from doubleml.utils._checks import _check_quantile, _check_score, _check_treatment, _check_zero_one_treatment from doubleml.utils._estimation import ( _cond_targets, _default_kde, @@ -17,9 +20,11 @@ _predict_zero_one_propensity, _solve_ipw_score, ) -from doubleml.utils._propensity_score import _normalize_ipw, _trimm +from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLLPQ(NonLinearScoreMixin, DoubleML): """Double machine learning for local potential quantiles @@ -67,13 +72,16 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML): Default is ``'None'``, which uses :py:class:`statsmodels.nonparametric.kde.KDEUnivariate` with a gaussian kernel and silverman for bandwidth determination. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -108,8 +116,9 @@ def __init__( score="LPQ", normalize_ipw=True, kde=None, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -148,10 +157,12 @@ def __init__( self._external_predictions_implemented = True - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True) _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True) @@ -200,19 +211,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def _score_element_names(self): @@ -386,7 +422,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa ml_m_z_prelim, x_train_1, z_train_1, method="predict_proba", smpls=smpls_prelim )["preds"] - m_z_hat_prelim = _trimm(m_z_hat_prelim, self.trimming_rule, self.trimming_threshold) + m_z_hat_prelim = self._ps_processor.adjust_ps(m_z_hat_prelim, z_train_1, cv=smpls_prelim) if self._normalize_ipw: m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1) @@ -501,11 +537,12 @@ def ipw_score(theta): g_du_z0_hat["models"] = fitted_models["ml_g_du_z0"] g_du_z1_hat["models"] = fitted_models["ml_g_du_z1"] - # clip propensities - m_z_hat_adj = _trimm(m_z_hat["preds"], self.trimming_rule, self.trimming_threshold) - + # adjust propensity scores + m_z_hat["preds"] = self._ps_processor.adjust_ps(m_z_hat["preds"], z, cv=smpls) if self._normalize_ipw: - m_z_hat_adj = _normalize_ipw(m_z_hat_adj, z) + m_z_hat_adj = _normalize_ipw(m_z_hat["preds"], z) + else: + m_z_hat_adj = m_z_hat["preds"] # this could be adjusted to be compatible with dml1 # estimate final nuisance parameter diff --git a/doubleml/irm/tests/_utils_lpq_manual.py b/doubleml/irm/tests/_utils_lpq_manual.py index 376c7c46..839025fd 100644 --- a/doubleml/irm/tests/_utils_lpq_manual.py +++ b/doubleml/irm/tests/_utils_lpq_manual.py @@ -20,7 +20,7 @@ def fit_lpq( treatment, n_rep=1, trimming_rule="truncate", - trimming_threshold=1e-2, + clipping_threshold=1e-2, kde=_default_kde, normalize_ipw=True, m_z_params=None, @@ -48,7 +48,7 @@ def fit_lpq( smpls, treatment, trimming_rule=trimming_rule, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, normalize_ipw=normalize_ipw, m_z_params=m_z_params, m_d_z0_params=m_d_z0_params, @@ -80,7 +80,7 @@ def fit_nuisance_lpq( smpls, treatment, trimming_rule, - trimming_threshold, + clipping_threshold, normalize_ipw, m_z_params, m_d_z0_params, @@ -144,7 +144,7 @@ def fit_nuisance_lpq( "preds" ] - m_z_hat_prelim = _trimm(m_z_hat_prelim, trimming_rule, trimming_threshold) + m_z_hat_prelim = _trimm(m_z_hat_prelim, trimming_rule, clipping_threshold) if normalize_ipw: m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1) @@ -222,7 +222,7 @@ def ipw_score(theta): m_d_z1_hat[test_inds] = ml_m_d_z1.predict_proba(x[test_inds, :])[:, 1] # clip propensities - m_z_hat = _trimm(m_z_hat, trimming_rule, trimming_threshold) + m_z_hat = _trimm(m_z_hat, trimming_rule, clipping_threshold) if normalize_ipw: m_z_hat = _normalize_ipw(m_z_hat, z) diff --git a/doubleml/irm/tests/test_lpq.py b/doubleml/irm/tests/test_lpq.py index 3e0049b8..2f90156e 100644 --- a/doubleml/irm/tests/test_lpq.py +++ b/doubleml/irm/tests/test_lpq.py @@ -41,7 +41,7 @@ def normalize_ipw(request): @pytest.fixture(scope="module", params=[0.05]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -51,7 +51,7 @@ def kde(request): @pytest.fixture(scope="module") -def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold, kde): +def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw, clipping_threshold, kde): n_folds = 3 # collect data @@ -73,7 +73,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, n_folds=n_folds, n_rep=1, normalize_ipw=normalize_ipw, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) # synchronize the sample splitting @@ -94,7 +94,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw=normalize_ipw, kde=_default_kde, n_rep=1, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) else: dml_lpq_obj = dml.DoubleMLLPQ( @@ -107,7 +107,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, n_rep=1, normalize_ipw=normalize_ipw, kde=kde, - trimming_threshold=trimming_threshold, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), draw_sample_splitting=False, ) @@ -129,7 +129,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw=normalize_ipw, kde=kde, n_rep=1, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/irm/tests/test_lpq_ps_processor.py b/doubleml/irm/tests/test_lpq_ps_processor.py new file mode 100644 index 00000000..acd539ab --- /dev/null +++ b/doubleml/irm/tests/test_lpq_ps_processor.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest +from sklearn.linear_model import LogisticRegression + +import doubleml as dml +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_lpq_ml_m_predictions_ps_processor(generate_data_local_quantiles, ps_config): + x, y, d, z = generate_data_local_quantiles + dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d, z=z) + np.random.seed(3141) + dml_lpq = dml.DoubleMLLPQ( + obj_dml_data=dml_data, + ml_g=LogisticRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=ps_config, + n_rep=1, + ) + dml_lpq.fit(store_predictions=True) + ml_m_preds = dml_lpq.predictions["ml_m_z"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_lpq_ml_m_predictions_ps_processor_differences(generate_data_local_quantiles): + x, y, d, z = generate_data_local_quantiles + dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d, z=z) + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_lpq = dml.DoubleMLLPQ( + obj_dml_data=dml_data, + ml_g=LogisticRegression(), + ml_m=LogisticRegression(), + treatment=1, + quantile=0.5, + ps_processor_config=cfg, + n_rep=1, + ) + dml_lpq.fit(store_predictions=True) + preds.append(dml_lpq.predictions["ml_m_z"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/irm/tests/test_lpq_tune.py b/doubleml/irm/tests/test_lpq_tune.py index c2b7d192..30c9b718 100644 --- a/doubleml/irm/tests/test_lpq_tune.py +++ b/doubleml/irm/tests/test_lpq_tune.py @@ -73,7 +73,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, n_folds=n_folds, n_rep=1, normalize_ipw=normalize_ipw, - trimming_threshold=0.01, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=0.01), draw_sample_splitting=False, ) @@ -150,7 +150,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, all_smpls=all_smpls, treatment=treatment, n_rep=1, - trimming_threshold=0.01, + clipping_threshold=0.01, normalize_ipw=normalize_ipw, m_z_params=m_z_params, m_d_z0_params=m_d_z0_params, From d800d3f92f2f213fe96929ffacc2c09a7b174a13 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 10:05:40 +0100 Subject: [PATCH 23/38] add ps_processor to qte --- doubleml/irm/qte.py | 65 +++++++++++++++++------ doubleml/irm/tests/test_qte_exceptions.py | 19 ------- doubleml/utils/tests/test_ps_processor.py | 12 ++--- 3 files changed, 53 insertions(+), 43 deletions(-) diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py index f896b078..46c8f316 100644 --- a/doubleml/irm/qte.py +++ b/doubleml/irm/qte.py @@ -1,3 +1,6 @@ +import warnings +from typing import Optional + import numpy as np import pandas as pd from joblib import Parallel, delayed @@ -9,11 +12,13 @@ from doubleml.irm.cvar import DoubleMLCVAR from doubleml.irm.lpq import DoubleMLLPQ from doubleml.irm.pq import DoubleMLPQ -from doubleml.utils._checks import _check_score, _check_trimming, _check_zero_one_treatment +from doubleml.utils._checks import _check_score, _check_zero_one_treatment from doubleml.utils._descriptive import generate_summary from doubleml.utils._estimation import _default_kde +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLQTE(SampleSplittingMixin): """Double machine learning for quantile treatment effects @@ -56,13 +61,16 @@ class DoubleMLQTE(SampleSplittingMixin): Default is ``'None'``, which uses :py:class:`statsmodels.nonparametric.kde.KDEUnivariate` with a gaussian kernel and silverman for bandwidth determination. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -98,8 +106,9 @@ def __init__( score="PQ", normalize_ipw=True, kde=None, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, ): self._dml_data = obj_dml_data @@ -130,10 +139,12 @@ def __init__( # initialize framework which is constructed after the fit method is called self._framework = None - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold if not isinstance(self.normalize_ipw, bool): raise TypeError( @@ -250,19 +261,44 @@ def normalize_ipw(self): """ return self._normalize_ipw + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def coef(self): @@ -530,8 +566,7 @@ def _initialize_models(self): "ml_m": self._learner["ml_m"], "n_folds": self.n_folds, "n_rep": self.n_rep, - "trimming_rule": self.trimming_rule, - "trimming_threshold": self.trimming_threshold, + "ps_processor_config": self.ps_processor_config, "normalize_ipw": self.normalize_ipw, "draw_sample_splitting": False, } diff --git a/doubleml/irm/tests/test_qte_exceptions.py b/doubleml/irm/tests/test_qte_exceptions.py index f4e95110..75f08f6b 100644 --- a/doubleml/irm/tests/test_qte_exceptions.py +++ b/doubleml/irm/tests/test_qte_exceptions.py @@ -56,25 +56,6 @@ def test_exception_score(): _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), score=2) -@pytest.mark.ci -def test_exception_trimming_rule(): - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="discard") - - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLQTE( - dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1" - ) - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLQTE( - dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6 - ) - - @pytest.mark.ci def test_exception_quantiles(): msg = r"Quantiles have be between 0 or 1. Quantiles \[0.2 2. \] passed." diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py index d48a4b7d..a04354f1 100644 --- a/doubleml/utils/tests/test_ps_processor.py +++ b/doubleml/utils/tests/test_ps_processor.py @@ -1,14 +1,12 @@ -from unittest.mock import patch import warnings +from unittest.mock import patch import numpy as np import pytest from sklearn.isotonic import IsotonicRegression from sklearn.model_selection import KFold, cross_val_predict -from doubleml.utils.propensity_score_processing import ( - PSProcessorConfig, PSProcessor, init_ps_processor -) +from doubleml.utils.propensity_score_processing import PSProcessor, PSProcessorConfig, init_ps_processor # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @@ -109,13 +107,9 @@ def test_isotonic_calibration_with_cv(cv): cv = [(train, test) for train, test in KFold(n_splits=3).split(ps)] elif cv == "splitter": cv = KFold(n_splits=3) - else: - cv = cv clipping_threshold = 0.01 - processor = PSProcessor( - calibration_method="isotonic", cv_calibration=True, clipping_threshold=clipping_threshold - ) + processor = PSProcessor(calibration_method="isotonic", cv_calibration=True, clipping_threshold=clipping_threshold) isotonic_manual = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0) ps_cv = cross_val_predict(isotonic_manual, ps.reshape(-1, 1), treatment, cv=cv) From eecb6231b162cfa233eb452ffa8f1e6bd798d40a Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 10:27:12 +0100 Subject: [PATCH 24/38] update for sklearn warning --- doubleml/irm/tests/test_ssm_exceptions.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doubleml/irm/tests/test_ssm_exceptions.py b/doubleml/irm/tests/test_ssm_exceptions.py index 4ca9f263..039ed921 100644 --- a/doubleml/irm/tests/test_ssm_exceptions.py +++ b/doubleml/irm/tests/test_ssm_exceptions.py @@ -7,6 +7,7 @@ from doubleml import DoubleMLSSM from doubleml.data.base_data import DoubleMLBaseData from doubleml.irm.datasets import make_ssm_data +from doubleml.utils.propensity_score_processing import PSProcessorConfig np.random.seed(3141) n = 100 @@ -184,7 +185,7 @@ def set_params(self): pass -class _DummyNoClassifier(_DummyNoGetParams): +class _DummyNoClassifier(_DummyNoGetParams, BaseEstimator): def get_params(self): pass @@ -291,7 +292,7 @@ def test_double_ml_exception_evaluate_learner(): ml_g=Lasso(), ml_pi=LogisticRegression(), ml_m=LogisticRegression(), - trimming_threshold=0.05, + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), n_folds=5, score="missing-at-random", ) From bd3d795442a94f479f7a88d23feffb359921fef2 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 11:51:33 +0100 Subject: [PATCH 25/38] update exception tests for trimming and remove/supress warnings --- doubleml/tests/test_exceptions.py | 234 ++++++++---------- doubleml/utils/propensity_score_processing.py | 3 +- pytest.ini | 1 + 3 files changed, 102 insertions(+), 136 deletions(-) diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py index e725a562..94b5f824 100644 --- a/doubleml/tests/test_exceptions.py +++ b/doubleml/tests/test_exceptions.py @@ -24,6 +24,7 @@ from doubleml.did.datasets import make_did_SZ2020 from doubleml.irm.datasets import make_iivm_data, make_irm_data from doubleml.plm.datasets import make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018 +from doubleml.utils import PSProcessorConfig from ._utils import DummyDataClass @@ -378,114 +379,6 @@ def test_doubleml_exception_scores(): _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), score=2) -@pytest.mark.ci -def test_doubleml_exception_trimming_rule(): - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLPQ(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLCVAR(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="discard") - with pytest.raises(ValueError, match=msg): - _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="discard") - - # check the trimming_threshold exceptions - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1") - with pytest.raises(TypeError, match=msg): - _ = DoubleMLIIVM( - dml_data_iivm, - Lasso(), - LogisticRegression(), - LogisticRegression(), - trimming_rule="truncate", - trimming_threshold="0.1", - ) - with pytest.raises(TypeError, match=msg): - _ = DoubleMLPQ( - dml_data_irm, - LogisticRegression(), - LogisticRegression(), - treatment=1, - trimming_rule="truncate", - trimming_threshold="0.1", - ) - with pytest.raises(TypeError, match=msg): - _ = DoubleMLLPQ( - dml_data_iivm, - LogisticRegression(), - LogisticRegression(), - treatment=1, - trimming_rule="truncate", - trimming_threshold="0.1", - ) - with pytest.raises(TypeError, match=msg): - _ = DoubleMLCVAR( - dml_data_irm, Lasso(), LogisticRegression(), treatment=1, trimming_rule="truncate", trimming_threshold="0.1" - ) - with pytest.raises(TypeError, match=msg): - _ = DoubleMLQTE( - dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1" - ) - with pytest.raises(TypeError, match=msg): - _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1") - with pytest.raises(TypeError, match=msg): - _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1") - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLIIVM( - dml_data_iivm, - Lasso(), - LogisticRegression(), - LogisticRegression(), - trimming_rule="truncate", - trimming_threshold=0.6, - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLPQ( - dml_data_irm, - LogisticRegression(), - LogisticRegression(), - treatment=1, - trimming_rule="truncate", - trimming_threshold=0.6, - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLLPQ( - dml_data_iivm, - LogisticRegression(), - LogisticRegression(), - treatment=1, - trimming_rule="truncate", - trimming_threshold=0.6, - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLCVAR( - dml_data_irm, Lasso(), LogisticRegression(), treatment=1, trimming_rule="truncate", trimming_threshold=0.6 - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLQTE( - dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6 - ) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6) - with pytest.raises(ValueError, match=msg): - _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6) - - @pytest.mark.ci def test_doubleml_exception_weights(): msg = "weights must be a numpy array or dictionary. weights of type was passed." @@ -961,9 +854,9 @@ def set_params(self): pass -class _DummyNoClassifier(_DummyNoGetParams): - def get_params(self): - pass +class _DummyNoClassifier(_DummyNoGetParams, BaseEstimator): + def get_params(self, deep=True): + return {} def predict_proba(self): pass @@ -1063,28 +956,25 @@ def test_doubleml_exception_learner(): # construct a classifier which is not identifiable as classifier via is_classifier by sklearn # it then predicts labels and therefore an exception will be thrown log_reg = LogisticRegressionManipulatedPredict() - # TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0 - log_reg._estimator_type = None - msg = ( + msg_warn = ( r"Learner provided for ml_m is probably invalid: LogisticRegressionManipulatedPredict\(\) is \(probably\) " "neither a regressor nor a classifier. Method predict is used for prediction." ) - with pytest.warns(UserWarning, match=msg): + with pytest.warns(UserWarning, match=msg_warn): dml_plr_hidden_classifier = DoubleMLPLR(dml_data_irm, Lasso(), log_reg) msg = ( r"For the binary variable d, predictions obtained with the ml_m learner LogisticRegressionManipulatedPredict\(\) " "are also observed to be binary with values 0 and 1. Make sure that for classifiers probabilities and not " "labels are predicted." ) - with pytest.raises(ValueError, match=msg): - dml_plr_hidden_classifier.fit() + with pytest.warns(UserWarning, match=msg_warn): + with pytest.raises(ValueError, match=msg): + dml_plr_hidden_classifier.fit() # construct a classifier which is not identifiable as classifier via is_classifier by sklearn # it then predicts labels and therefore an exception will be thrown # whether predict() or predict_proba() is being called can also be manipulated via the unrelated max_iter variable log_reg = LogisticRegressionManipulatedPredict() - # TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0 - log_reg._estimator_type = None msg = ( r"Learner provided for ml_g is probably invalid: LogisticRegressionManipulatedPredict\(\) is \(probably\) " "neither a regressor nor a classifier. Method predict is used for prediction." @@ -1151,7 +1041,12 @@ def test_doubleml_sensitivity_not_yet_implemented(): @pytest.mark.ci def test_doubleml_sensitivity_inputs(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + dml_irm = DoubleMLIRM( + dml_data_irm, + Lasso(), + LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.1), + ) dml_irm.fit() # test cf_y @@ -1231,7 +1126,9 @@ def test_doubleml_sensitivity_inputs(): def test_doubleml_sensitivity_reestimation_warning(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + dml_irm = DoubleMLIRM( + dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1) + ) dml_irm.fit() dml_irm.sensitivity_elements["nu2"] = -1.0 * dml_irm.sensitivity_elements["nu2"] @@ -1242,7 +1139,9 @@ def test_doubleml_sensitivity_reestimation_warning(): def test_doubleml_sensitivity_summary(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + dml_irm = DoubleMLIRM( + dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1) + ) msg = r"Apply sensitivity_analysis\(\) before sensitivity_summary." with pytest.raises(ValueError, match=msg): _ = dml_irm.sensitivity_summary @@ -1250,7 +1149,9 @@ def test_doubleml_sensitivity_summary(): @pytest.mark.ci def test_doubleml_sensitivity_benchmark(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + dml_irm = DoubleMLIRM( + dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1) + ) dml_irm.fit() # test input @@ -1272,7 +1173,12 @@ def test_doubleml_sensitivity_benchmark(): @pytest.mark.ci def test_doubleml_sensitivity_plot_input(): - dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1) + dml_irm = DoubleMLIRM( + dml_data_irm, + Lasso(), + LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.1), + ) dml_irm.fit() msg = r"Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. " @@ -1411,7 +1317,9 @@ def test_doubleml_warning_blp(): @pytest.mark.ci def test_doubleml_exception_gate(): - dml_irm_obj = DoubleMLIRM(dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5) + dml_irm_obj = DoubleMLIRM( + dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1) + ) dml_irm_obj.fit() msg = "Groups must be of DataFrame type. Groups of type was passed." @@ -1426,7 +1334,12 @@ def test_doubleml_exception_gate(): dml_irm_obj.gate(groups=groups) dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE" + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.1), + n_folds=5, + score="ATTE", ) dml_irm_obj.fit() groups = pd.DataFrame(np.random.choice([True, False], size=dml_data_irm.n_obs)) @@ -1435,7 +1348,13 @@ def test_doubleml_exception_gate(): dml_irm_obj.gate(groups=groups) dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2 + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.1), + n_folds=5, + score="ATE", + n_rep=2, ) dml_irm_obj.fit() @@ -1447,7 +1366,12 @@ def test_doubleml_exception_gate(): @pytest.mark.ci def test_doubleml_exception_cate(): dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE" + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATTE", ) dml_irm_obj.fit() @@ -1456,7 +1380,13 @@ def test_doubleml_exception_cate(): dml_irm_obj.cate(basis=2) dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2 + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATE", + n_rep=2, ) dml_irm_obj.fit() msg = "Only implemented for one repetition. Number of repetitions is 2." @@ -1504,7 +1434,12 @@ def test_doubleml_exception_plr_gate(): @pytest.mark.ci def test_double_ml_exception_evaluate_learner(): dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE" + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATTE", ) msg = r"Apply fit\(\) before evaluate_learners\(\)." @@ -1532,7 +1467,13 @@ def eval_fct(y_pred, y_true): @pytest.mark.ci def test_doubleml_exception_policytree(): - dml_irm_obj = DoubleMLIRM(dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5) + dml_irm_obj = DoubleMLIRM( + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + ) dml_irm_obj.fit() msg = "Covariates must be of DataFrame type. Covariates of type was passed." @@ -1546,7 +1487,12 @@ def test_doubleml_exception_policytree(): dml_irm_obj.policy_tree(features=pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3))), depth=0.1) dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE" + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATTE", ) dml_irm_obj.fit() @@ -1555,7 +1501,13 @@ def test_doubleml_exception_policytree(): dml_irm_obj.policy_tree(features=2, depth=1) dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2 + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATE", + n_rep=2, ) dml_irm_obj.fit() msg = "Only implemented for one repetition. Number of repetitions is 2." @@ -1566,7 +1518,13 @@ def test_doubleml_exception_policytree(): @pytest.mark.ci def test_double_ml_external_predictions(): dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2 + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATE", + n_rep=2, ) msg = "external_predictions must be a dictionary. ml_m of type was passed." @@ -1574,7 +1532,13 @@ def test_double_ml_external_predictions(): dml_irm_obj.fit(external_predictions="ml_m") dml_irm_obj = DoubleMLIRM( - dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=1 + dml_data_irm, + ml_g=Lasso(), + ml_m=LogisticRegression(), + ps_processor_config=PSProcessorConfig(clipping_threshold=0.05), + n_folds=5, + score="ATE", + n_rep=1, ) predictions = {"d": "test", "d_f": "test"} diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index 3bec652f..e560bd41 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -66,9 +66,10 @@ class PSProcessor: Examples -------- >>> import numpy as np + >>> from doubleml.utils import PSProcessor >>> ps = np.array([0.001, 0.2, 0.5, 0.8, 0.999]) >>> treatment = np.array([0, 1, 1, 0, 1]) - >>> processor = PropensityScoreProcessor(clipping_threshold=0.01) + >>> processor = PSProcessor(clipping_threshold=0.01) >>> adjusted = processor.adjust_ps(ps, treatment) >>> print(np.round(adjusted, 3)) [0.01 0.2 0.5 0.8 0.99] diff --git a/pytest.ini b/pytest.ini index 3582830c..f7125f42 100644 --- a/pytest.ini +++ b/pytest.ini @@ -15,3 +15,4 @@ filterwarnings = ignore:.*Sensitivity analysis not implemented for callable scores.*:UserWarning ignore:.*Subsample has not common support. Results are based on adjusted propensities.*:UserWarning ignore:.*Treatment probability within bandwidth left from cutoff higher than right from cutoff.\nTreatment assignment might be based on the wrong side of the cutoff.*:UserWarning + ignore:.*The estimated nu2 for d is not positive.*:UserWarning From 07e5c279c0dcd1c450c2f5c18318e360a297fcdd Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 12:32:21 +0100 Subject: [PATCH 26/38] add ps_processor to did binary rename trimming to clipping in did --- doubleml/did/did.py | 31 +++------ doubleml/did/did_binary.py | 63 ++++++++++++----- doubleml/did/tests/_utils_did_manual.py | 8 +-- doubleml/did/tests/test_did.py | 8 +-- .../did/tests/test_did_binary_ps_processor.py | 67 +++++++++++++++++++ .../did/tests/test_did_binary_vs_did_panel.py | 8 ++- .../test_did_binary_vs_did_two_period.py | 9 +-- 7 files changed, 140 insertions(+), 54 deletions(-) create mode 100644 doubleml/did/tests/test_did_binary_ps_processor.py diff --git a/doubleml/did/did.py b/doubleml/did/did.py index 9307ae78..50270e60 100644 --- a/doubleml/did/did.py +++ b/doubleml/did/did.py @@ -7,9 +7,8 @@ from doubleml.data.did_data import DoubleMLDIDData from doubleml.double_ml import DoubleML from doubleml.double_ml_score_mixins import LinearScoreMixin -from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming +from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls -from doubleml.utils._propensity_score import _trimm class DoubleMLDID(LinearScoreMixin, DoubleML): @@ -50,12 +49,8 @@ class DoubleMLDID(LinearScoreMixin, DoubleML): Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. - - trimming_threshold : float - The threshold used for trimming. + clipping_threshold : float + The threshold used for clipping. Default is ``1e-2``. draw_sample_splitting : bool @@ -89,8 +84,7 @@ def __init__( n_rep=1, score="observational", in_sample_normalization=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + clipping_threshold=1e-2, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -142,9 +136,7 @@ def __init__( self._predict_method["ml_m"] = "predict_proba" self._initialize_ml_nuisance_params() - self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._clipping_threshold = clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -156,18 +148,11 @@ def in_sample_normalization(self): return self._in_sample_normalization @property - def trimming_rule(self): - """ - Specifies the used trimming rule. - """ - return self._trimming_rule - - @property - def trimming_threshold(self): + def clipping_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + return self._clipping_threshold def _initialize_ml_nuisance_params(self): if self.score == "observational": @@ -271,7 +256,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + m_hat["preds"] = np.clip(m_hat["preds"], self.clipping_threshold, 1 - self.clipping_threshold) # nuisance estimates of the uncond. treatment prob. p_hat = np.full_like(d, d.mean(), dtype="float64") diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index 6e3a95f2..3e94ff13 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -1,4 +1,5 @@ import warnings +from typing import Optional import numpy as np from sklearn.utils import check_X_y @@ -19,14 +20,13 @@ from doubleml.utils._checks import ( _check_bool, _check_finite_predictions, - _check_is_propensity, _check_score, - _check_trimming, ) from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls -from doubleml.utils._propensity_score import _trimm +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLDIDBinary(LinearScoreMixin, DoubleML): """Double machine learning for difference-in-differences models with panel data (binary setting in terms of group and time combinations). @@ -83,13 +83,16 @@ class DoubleMLDIDBinary(LinearScoreMixin, DoubleML): Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -115,8 +118,9 @@ def __init__( n_rep=1, score="observational", in_sample_normalization=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, print_periods=False, ): @@ -232,9 +236,12 @@ def __init__( self._predict_method["ml_m"] = "predict_proba" self._initialize_ml_nuisance_params() + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -321,19 +328,44 @@ def in_sample_normalization(self): """ return self._in_sample_normalization + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def n_obs_subset(self): @@ -500,8 +532,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa return_models=return_models, ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m") # nuisance estimates of the uncond. treatment prob. p_hat = np.full_like(d, d.mean(), dtype="float64") diff --git a/doubleml/did/tests/_utils_did_manual.py b/doubleml/did/tests/_utils_did_manual.py index b067e44d..f0713332 100644 --- a/doubleml/did/tests/_utils_did_manual.py +++ b/doubleml/did/tests/_utils_did_manual.py @@ -18,7 +18,7 @@ def fit_did( g0_params=None, g1_params=None, m_params=None, - trimming_threshold=1e-2, + clipping_threshold=1e-2, ): n_obs = len(y) @@ -44,7 +44,7 @@ def fit_did( g0_params=g0_params, g1_params=g1_params, m_params=m_params, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) all_g_hat0.append(g_hat0_list) @@ -83,7 +83,7 @@ def fit_did( def fit_nuisance_did( - y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, trimming_threshold=1e-12 + y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, clipping_threshold=1e-12 ): ml_g0 = clone(learner_g) ml_g1 = clone(learner_g) @@ -101,7 +101,7 @@ def fit_nuisance_did( else: assert score == "observational" ml_m = clone(learner_m) - m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) + m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold) p_hat_list = [] for _ in smpls: diff --git a/doubleml/did/tests/test_did.py b/doubleml/did/tests/test_did.py index 79feb110..a476adb4 100644 --- a/doubleml/did/tests/test_did.py +++ b/doubleml/did/tests/test_did.py @@ -37,12 +37,12 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -68,7 +68,7 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, score=score, in_sample_normalization=in_sample_normalization, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) # synchronize the sample splitting @@ -85,7 +85,7 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, all_smpls, score, in_sample_normalization, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/did/tests/test_did_binary_ps_processor.py b/doubleml/did/tests/test_did_binary_ps_processor.py new file mode 100644 index 00000000..226e6932 --- /dev/null +++ b/doubleml/did/tests/test_did_binary_ps_processor.py @@ -0,0 +1,67 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml.did import DoubleMLDIDBinary +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_did_binary_ml_m_predictions_ps_processor(generate_data_did_binary, ps_config): + dml_data = generate_data_did_binary + np.random.seed(3141) + dml_did = DoubleMLDIDBinary( + obj_dml_data=dml_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + score="observational", + ) + dml_did.fit(store_predictions=True) + ml_m_preds = dml_did.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_did_binary_ml_m_predictions_ps_processor_differences(generate_data_did_binary): + dml_data = generate_data_did_binary + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_did = DoubleMLDIDBinary( + obj_dml_data=dml_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + score="observational", + ) + dml_did.fit(store_predictions=True) + preds.append(dml_did.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py index 2eddccaf..9abee475 100644 --- a/doubleml/did/tests/test_did_binary_vs_did_panel.py +++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py @@ -9,6 +9,7 @@ import doubleml as dml from doubleml.did.datasets import make_did_CS2021 from doubleml.did.utils._did_utils import _get_id_positions +from doubleml.utils import PSProcessorConfig @pytest.fixture( @@ -36,7 +37,7 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -46,7 +47,7 @@ def time_type(request): @pytest.fixture(scope="module") -def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, clipping_threshold): n_obs = 500 dpg = 1 @@ -65,7 +66,6 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza "n_folds": 3, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, "draw_sample_splitting": True, } @@ -74,6 +74,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza g_value=dml_panel_data.g_values[0], t_value_pre=dml_panel_data.t_values[0], t_value_eval=dml_panel_data.t_values[1], + ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold), **dml_args, ) dml_did_binary_obj.fit() @@ -82,6 +83,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza dml_data = dml.data.DoubleMLDIDData(df_wide, y_col="y_diff", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"]) dml_did_obj = dml.DoubleMLDID( dml_data, + clipping_threshold=clipping_threshold, **dml_args, ) diff --git a/doubleml/did/tests/test_did_binary_vs_did_two_period.py b/doubleml/did/tests/test_did_binary_vs_did_two_period.py index 74575664..25416a20 100644 --- a/doubleml/did/tests/test_did_binary_vs_did_two_period.py +++ b/doubleml/did/tests/test_did_binary_vs_did_two_period.py @@ -37,12 +37,12 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_sample_normalization, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -68,7 +68,6 @@ def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_s "n_folds": n_folds, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, "draw_sample_splitting": False, } @@ -77,11 +76,13 @@ def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_s g_value=1, t_value_pre=0, t_value_eval=1, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), **dml_args, ) dml_did_obj = dml.DoubleMLDID( obj_dml_data, + clipping_threshold=clipping_threshold, **dml_args, ) @@ -107,7 +108,7 @@ def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_s all_smpls, score, in_sample_normalization, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { From 118bc8538acf6a27eca7d1b9fad98681ed6cc094 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 13:29:25 +0100 Subject: [PATCH 27/38] add ps_processor to did_cs_binary and clipping to did_cs --- doubleml/did/did_cs.py | 39 ++++-------- doubleml/did/did_cs_binary.py | 63 ++++++++++++++----- doubleml/did/tests/_utils_did_cs_manual.py | 8 +-- doubleml/did/tests/test_did_cs.py | 8 +-- .../test_did_cs_binary_vs_did_cs_panel.py | 7 ++- ...test_did_cs_binary_vs_did_cs_two_period.py | 9 +-- 6 files changed, 76 insertions(+), 58 deletions(-) diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index 11c467b5..706cfd8e 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -7,9 +7,8 @@ from doubleml.data.did_data import DoubleMLDIDData from doubleml.double_ml import DoubleML from doubleml.double_ml_score_mixins import LinearScoreMixin -from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming +from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d -from doubleml.utils._propensity_score import _trimm class DoubleMLDIDCS(LinearScoreMixin, DoubleML): @@ -50,12 +49,8 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML): Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. - - trimming_threshold : float - The threshold used for trimming. + clipping_threshold : float + The threshold used for clipping. Default is ``1e-2``. draw_sample_splitting : bool @@ -87,8 +82,7 @@ def __init__( n_rep=1, score="observational", in_sample_normalization=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + clipping_threshold=1e-2, draw_sample_splitting=True, ): super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) @@ -140,10 +134,7 @@ def __init__( self._predict_method["ml_m"] = "predict_proba" self._initialize_ml_nuisance_params() - self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) - + self._clipping_threshold = clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -155,18 +146,11 @@ def in_sample_normalization(self): return self._in_sample_normalization @property - def trimming_rule(self): + def clipping_threshold(self): """ - Specifies the used trimming rule. + Specifies the used clipping threshold. """ - return self._trimming_rule - - @property - def trimming_threshold(self): - """ - Specifies the used trimming threshold. - """ - return self._trimming_threshold + return self._clipping_threshold def _initialize_ml_nuisance_params(self): if self.score == "observational": @@ -312,9 +296,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa method=self._predict_method["ml_m"], return_models=return_models, ) - _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + + _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) + _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) + m_hat["preds"] = np.clip(m_hat["preds"], self.clipping_threshold, 1 - self.clipping_threshold) psi_a, psi_b = self._score_elements( y, diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index 5375011d..c547ff40 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -1,4 +1,5 @@ import warnings +from typing import Optional import numpy as np from sklearn.utils import check_X_y @@ -19,14 +20,13 @@ from doubleml.utils._checks import ( _check_bool, _check_finite_predictions, - _check_is_propensity, _check_score, - _check_trimming, ) from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d -from doubleml.utils._propensity_score import _trimm +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLDIDCSBinary(LinearScoreMixin, DoubleML): """Double machine learning for difference-in-differences models with repeated cross sections (binary setting in terms of group and time combinations). @@ -83,13 +83,16 @@ class DoubleMLDIDCSBinary(LinearScoreMixin, DoubleML): Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020). Default is ``True``. - trimming_rule : str - A str (``'truncate'`` is the only choice) specifying the trimming approach. - Default is ``'truncate'``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). draw_sample_splitting : bool Indicates whether the sample splitting should be drawn during initialization of the object. @@ -115,8 +118,9 @@ def __init__( n_rep=1, score="observational", in_sample_normalization=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, print_periods=False, ): @@ -221,9 +225,12 @@ def __init__( self._predict_method["ml_m"] = "predict_proba" self._initialize_ml_nuisance_params() + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold self._sensitivity_implemented = True self._external_predictions_implemented = True @@ -312,19 +319,44 @@ def in_sample_normalization(self): """ return self._in_sample_normalization + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def n_obs_subset(self): @@ -480,8 +512,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) - m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold) + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m") psi_a, psi_b = self._score_elements( y, diff --git a/doubleml/did/tests/_utils_did_cs_manual.py b/doubleml/did/tests/_utils_did_cs_manual.py index ce6f8870..0353efef 100644 --- a/doubleml/did/tests/_utils_did_cs_manual.py +++ b/doubleml/did/tests/_utils_did_cs_manual.py @@ -21,7 +21,7 @@ def fit_did_cs( g_d1_t0_params=None, g_d1_t1_params=None, m_params=None, - trimming_threshold=1e-2, + clipping_threshold=1e-2, ): n_obs = len(y) @@ -54,7 +54,7 @@ def fit_did_cs( g_d1_t0_params=g_d1_t0_params, g_d1_t1_params=g_d1_t1_params, m_params=m_params, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) ) @@ -149,7 +149,7 @@ def fit_nuisance_did_cs( g_d1_t0_params=None, g_d1_t1_params=None, m_params=None, - trimming_threshold=1e-12, + clipping_threshold=1e-12, ): ml_g_d0_t0 = clone(learner_g) ml_g_d0_t1 = clone(learner_g) @@ -169,7 +169,7 @@ def fit_nuisance_did_cs( g_hat_d1_t1_list = fit_predict(y, x, ml_g_d1_t1, g_d1_t1_params, smpls, train_cond=train_cond_d1_t1) if score == "observational": ml_m = clone(learner_m) - m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold) + m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold) else: assert score == "experimental" m_hat_list = list() diff --git a/doubleml/did/tests/test_did_cs.py b/doubleml/did/tests/test_did_cs.py index bc8e2da6..bc56def2 100644 --- a/doubleml/did/tests/test_did_cs.py +++ b/doubleml/did/tests/test_did_cs.py @@ -38,12 +38,12 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normalization, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -70,7 +70,7 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza score=score, in_sample_normalization=in_sample_normalization, draw_sample_splitting=False, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) # synchronize the sample splitting @@ -88,7 +88,7 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza all_smpls, score, in_sample_normalization, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py index da7db085..eaedba49 100644 --- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py +++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py @@ -36,7 +36,7 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -46,7 +46,7 @@ def time_type(request): @pytest.fixture(scope="module") -def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, clipping_threshold): n_obs = 500 dpg = 1 @@ -62,7 +62,6 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza "n_folds": 3, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, "draw_sample_splitting": True, } @@ -71,6 +70,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza g_value=dml_panel_data.g_values[0], t_value_pre=dml_panel_data.t_values[0], t_value_eval=dml_panel_data.t_values[1], + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), **dml_args, ) dml_did_binary_obj.fit() @@ -81,6 +81,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza ) dml_did_obj = dml.DoubleMLDIDCS( dml_data, + clipping_threshold=clipping_threshold, **dml_args, ) diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py index b9e267ce..a9ba726e 100644 --- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py +++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py @@ -38,12 +38,12 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @pytest.fixture(scope="module") -def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score, in_sample_normalization, clipping_threshold): boot_methods = ["normal"] n_folds = 2 n_rep_boot = 499 @@ -70,7 +70,6 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score "n_folds": n_folds, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, "draw_sample_splitting": False, } @@ -79,11 +78,13 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score g_value=1, t_value_pre=0, t_value_eval=1, + ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), **dml_args, ) dml_did_obj = dml.DoubleMLDIDCS( obj_dml_data, + clipping_threshold=clipping_threshold, **dml_args, ) @@ -111,7 +112,7 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score all_smpls, score, in_sample_normalization, - trimming_threshold=trimming_threshold, + clipping_threshold=clipping_threshold, ) res_dict = { From 5fbfa26534fa547bbacb20c6da53e678f0f74b11 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 13:30:01 +0100 Subject: [PATCH 28/38] update clipping terminology and enhance ps_processor usage in DID classes --- doubleml/did/did.py | 3 ++- doubleml/did/did_binary.py | 1 + doubleml/irm/irm.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doubleml/did/did.py b/doubleml/did/did.py index 50270e60..5ca19a93 100644 --- a/doubleml/did/did.py +++ b/doubleml/did/did.py @@ -150,7 +150,7 @@ def in_sample_normalization(self): @property def clipping_threshold(self): """ - Specifies the used trimming threshold. + Specifies the used clipping threshold. """ return self._clipping_threshold @@ -254,6 +254,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa method=self._predict_method["ml_m"], return_models=return_models, ) + _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12) m_hat["preds"] = np.clip(m_hat["preds"], self.clipping_threshold, 1 - self.clipping_threshold) diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index 3e94ff13..d92ebf19 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -531,6 +531,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa method=self._predict_method["ml_m"], return_models=return_models, ) + _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m") diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index e880c48e..01d288bd 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -362,7 +362,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa ) _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls) - m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls) + m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m") psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], smpls) psi_elements = {"psi_a": psi_a, "psi_b": psi_b} From 474b81fe70f77a78b8171647290861fa61ad23ef Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 13:34:34 +0100 Subject: [PATCH 29/38] add ps_processor test for did_cs_binary --- .../tests/test_did_cs_binary_ps_processor.py | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 doubleml/did/tests/test_did_cs_binary_ps_processor.py diff --git a/doubleml/did/tests/test_did_cs_binary_ps_processor.py b/doubleml/did/tests/test_did_cs_binary_ps_processor.py new file mode 100644 index 00000000..1594ee93 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_ps_processor.py @@ -0,0 +1,67 @@ +import numpy as np +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml.did import DoubleMLDIDCSBinary +from doubleml.utils.propensity_score_processing import PSProcessorConfig + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ps_config", + [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ], +) +def test_did_cs_binary_ml_m_predictions_ps_processor(generate_data_did_binary, ps_config): + dml_data = generate_data_did_binary + np.random.seed(3141) + dml_did = DoubleMLDIDCSBinary( + obj_dml_data=dml_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=ps_config, + n_rep=1, + score="observational", + ) + dml_did.fit(store_predictions=True) + ml_m_preds = dml_did.predictions["ml_m"][:, 0, 0] + # Just check that predictions are within [clipping_threshold, 1-clipping_threshold] + assert np.all(ml_m_preds >= ps_config.clipping_threshold) + assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold) + + +@pytest.mark.ci +def test_did_cs_binary_ml_m_predictions_ps_processor_differences(generate_data_did_binary): + dml_data = generate_data_did_binary + np.random.seed(3141) + configs = [ + PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False), + PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True), + ] + preds = [] + for cfg in configs: + dml_did = DoubleMLDIDCSBinary( + obj_dml_data=dml_data, + g_value=1, + t_value_pre=0, + t_value_eval=1, + ml_g=LinearRegression(), + ml_m=LogisticRegression(), + ps_processor_config=cfg, + n_rep=1, + score="observational", + ) + dml_did.fit(store_predictions=True) + preds.append(dml_did.predictions["ml_m"][:, 0, 0]) + # Check that at least two configurations yield different predictions (element-wise) + diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))] + assert any(diffs) From adfe40d91eac15f433af0bb87130281b5b0ce33c Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 14:00:22 +0100 Subject: [PATCH 30/38] add ps_processor to did_multi --- doubleml/did/did_multi.py | 63 ++++++++++++++----- .../test_did_multi_aggregation_single_gt.py | 6 +- .../did/tests/test_did_multi_exceptions.py | 16 ----- .../did/tests/test_did_multi_vs_binary.py | 6 +- .../did/tests/test_did_multi_vs_cs_binary.py | 6 +- 5 files changed, 57 insertions(+), 40 deletions(-) diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py index 66e7f837..a9e9e790 100644 --- a/doubleml/did/did_multi.py +++ b/doubleml/did/did_multi.py @@ -1,5 +1,6 @@ import copy import warnings +from typing import Optional import matplotlib.pyplot as plt import numpy as np @@ -33,11 +34,13 @@ from doubleml.did.utils._plot import add_jitter from doubleml.double_ml import DoubleML from doubleml.double_ml_framework import concat -from doubleml.utils._checks import _check_bool, _check_score, _check_trimming +from doubleml.utils._checks import _check_bool, _check_score from doubleml.utils._descriptive import generate_summary from doubleml.utils.gain_statistics import gain_statistics +from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor +# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). class DoubleMLDIDMulti: """Double machine learning for multi-period difference-in-differences models. @@ -96,13 +99,16 @@ class DoubleMLDIDMulti: A str (``'truncate'`` is the only choice) specifying the trimming approach. Default is ``'truncate'``. - trimming_threshold : float - The threshold used for trimming. - Default is ``1e-2``. + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use `ps_processor_config` instead. Will be removed in a future version. - draw_sample_splitting : bool - Indicates whether the sample splitting should be drawn during initialization. - Default is ``True``. + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use `ps_processor_config` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). print_periods : bool Indicates whether to print information about the evaluated periods. @@ -165,8 +171,9 @@ def __init__( score="observational", panel=True, in_sample_normalization=True, - trimming_rule="truncate", - trimming_threshold=1e-2, + trimming_rule="truncate", # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + trimming_threshold=1e-2, # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + ps_processor_config: Optional[PSProcessorConfig] = None, draw_sample_splitting=True, print_periods=False, ): @@ -214,10 +221,12 @@ def __init__( # initialize framework which is constructed after the fit method is called self._framework = None - # initialize and check trimming + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). + self._ps_processor_config, self._ps_processor = init_ps_processor( + ps_processor_config, trimming_rule, trimming_threshold + ) self._trimming_rule = trimming_rule - self._trimming_threshold = trimming_threshold - _check_trimming(self._trimming_rule, self._trimming_threshold) + self._trimming_threshold = self._ps_processor.clipping_threshold ml_g_is_classifier = DoubleML._check_learner(ml_g, "ml_g", regressor=True, classifier=True) if self.score == "observational": @@ -378,19 +387,44 @@ def in_sample_normalization(self): """ return self._in_sample_normalization + @property + def ps_processor_config(self): + """ + Configuration for propensity score processing (clipping, calibration, etc.). + """ + return self._ps_processor_config + + @property + def ps_processor(self): + """ + Propensity score processor. + """ + return self._ps_processor + + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_rule(self): """ Specifies the used trimming rule. """ + warnings.warn( + "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2 + ) return self._trimming_rule + # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated). @property def trimming_threshold(self): """ Specifies the used trimming threshold. """ - return self._trimming_threshold + warnings.warn( + "'trimming_threshold' is deprecated and will be removed in a future version. " + "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._ps_processor.clipping_threshold @property def n_folds(self): @@ -1355,8 +1389,7 @@ def _initialize_models(self): "score": self.score, "n_folds": self.n_folds, "n_rep": self.n_rep, - "trimming_rule": self.trimming_rule, - "trimming_threshold": self.trimming_threshold, + "ps_processor_config": self.ps_processor_config, "in_sample_normalization": self.in_sample_normalization, "draw_sample_splitting": True, "print_periods": self._print_periods, diff --git a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py index a6ffcd49..ede8ed74 100644 --- a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py +++ b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py @@ -38,7 +38,7 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -48,7 +48,7 @@ def time_type(request): @pytest.fixture(scope="module") -def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_sample_normalization, trimming_threshold): +def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_sample_normalization, clipping_threshold): n_obs = 500 dpg = 1 @@ -63,7 +63,7 @@ def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_ "score": score, "panel": panel, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, + "ps_processor_config": dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), "draw_sample_splitting": True, } gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[3])] diff --git a/doubleml/did/tests/test_did_multi_exceptions.py b/doubleml/did/tests/test_did_multi_exceptions.py index c53d79d3..a9e432a5 100644 --- a/doubleml/did/tests/test_did_multi_exceptions.py +++ b/doubleml/did/tests/test_did_multi_exceptions.py @@ -62,22 +62,6 @@ def test_input(): invalid_arguments = {"score": "test"} _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) - # trimming - msg = "Invalid trimming_rule discard. Valid trimming_rule truncate." - with pytest.raises(ValueError, match=msg): - invalid_arguments = {"trimming_rule": "discard"} - _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) - - msg = "trimming_threshold has to be a float. Object of type passed." - with pytest.raises(TypeError, match=msg): - invalid_arguments = {"trimming_threshold": "test"} - _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) - - msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5." - with pytest.raises(ValueError, match=msg): - invalid_arguments = {"trimming_threshold": 0.6} - _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments)) - @pytest.mark.ci def test_exception_learners(): diff --git a/doubleml/did/tests/test_did_multi_vs_binary.py b/doubleml/did/tests/test_did_multi_vs_binary.py index 15d3fd0c..86cb2ae4 100644 --- a/doubleml/did/tests/test_did_multi_vs_binary.py +++ b/doubleml/did/tests/test_did_multi_vs_binary.py @@ -35,7 +35,7 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -45,7 +45,7 @@ def time_type(request): @pytest.fixture(scope="module") -def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_normalization, clipping_threshold): n_obs = 500 dpg = 1 boot_methods = ["normal"] @@ -61,7 +61,7 @@ def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_nor "n_folds": 3, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, + "ps_processor_config": dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), "draw_sample_splitting": True, } gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[1])] diff --git a/doubleml/did/tests/test_did_multi_vs_cs_binary.py b/doubleml/did/tests/test_did_multi_vs_cs_binary.py index 7af8d74d..a658aeee 100644 --- a/doubleml/did/tests/test_did_multi_vs_cs_binary.py +++ b/doubleml/did/tests/test_did_multi_vs_cs_binary.py @@ -35,7 +35,7 @@ def in_sample_normalization(request): @pytest.fixture(scope="module", params=[0.1]) -def trimming_threshold(request): +def clipping_threshold(request): return request.param @@ -50,7 +50,7 @@ def lambda_t(request): @pytest.fixture(scope="module") -def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_sample_normalization, trimming_threshold): +def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_sample_normalization, clipping_threshold): n_obs = 500 dpg = 1 boot_methods = ["normal"] @@ -66,7 +66,7 @@ def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_ "n_folds": 3, "score": score, "in_sample_normalization": in_sample_normalization, - "trimming_threshold": trimming_threshold, + "ps_processor_config": dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold), "draw_sample_splitting": True, } gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[1])] From ca07266ee83f6e5795bae99c47d88b893e9717a5 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 14:07:46 +0100 Subject: [PATCH 31/38] update defaults tests for PSProcessor and clipping_threshold --- doubleml/tests/test_model_defaults.py | 44 +++++++++++++-------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/doubleml/tests/test_model_defaults.py b/doubleml/tests/test_model_defaults.py index b04117eb..14650f39 100644 --- a/doubleml/tests/test_model_defaults.py +++ b/doubleml/tests/test_model_defaults.py @@ -92,8 +92,8 @@ def test_irm_defaults(): _fit_bootstrap(dml_irm) _assert_resampling_default_settings(dml_irm) assert dml_irm.score == "ATE" - assert dml_irm.trimming_rule == "truncate" - assert dml_irm.trimming_threshold == 1e-2 + assert isinstance(dml_irm.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_irm.ps_processor, dml.utils.PSProcessor) assert not dml_irm.normalize_ipw assert set(dml_irm.weights.keys()) == set(["weights"]) assert np.array_equal(dml_irm.weights["weights"], np.ones((dml_irm._dml_data.n_obs,))) @@ -106,8 +106,8 @@ def test_iivm_defaults(): _assert_resampling_default_settings(dml_iivm) assert dml_iivm.score == "LATE" assert dml_iivm.subgroups == {"always_takers": True, "never_takers": True} - assert dml_iivm.trimming_rule == "truncate" - assert dml_iivm.trimming_threshold == 1e-2 + assert isinstance(dml_iivm.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_iivm.ps_processor, dml.utils.PSProcessor) assert not dml_iivm.normalize_ipw @@ -119,8 +119,8 @@ def test_cvar_defaults(): assert dml_cvar.quantile == 0.5 assert dml_cvar.treatment == 1 assert dml_cvar.score == "CVaR" - assert dml_cvar.trimming_rule == "truncate" - assert dml_cvar.trimming_threshold == 1e-2 + assert isinstance(dml_cvar.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_cvar.ps_processor, dml.utils.PSProcessor) @pytest.mark.ci @@ -131,8 +131,8 @@ def test_pq_defaults(): assert dml_pq.quantile == 0.5 assert dml_pq.treatment == 1 assert dml_pq.score == "PQ" - assert dml_pq.trimming_rule == "truncate" - assert dml_pq.trimming_threshold == 1e-2 + assert isinstance(dml_pq.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_pq.ps_processor, dml.utils.PSProcessor) assert dml_pq.normalize_ipw @@ -144,8 +144,8 @@ def test_lpq_defaults(): assert dml_lpq.quantile == 0.5 assert dml_lpq.treatment == 1 assert dml_lpq.score == "LPQ" - assert dml_lpq.trimming_rule == "truncate" - assert dml_lpq.trimming_threshold == 1e-2 + assert isinstance(dml_lpq.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_lpq.ps_processor, dml.utils.PSProcessor) assert dml_lpq.normalize_ipw @@ -159,8 +159,8 @@ def test_qte_defaults(): # not fix since its a differen object added in future versions _assert_resampling_default_settings(dml_qte) assert dml_qte.quantiles == 0.5 assert dml_qte.score == "PQ" - assert dml_qte.trimming_rule == "truncate" - assert dml_qte.trimming_threshold == 1e-2 + assert isinstance(dml_qte.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_qte.ps_processor, dml.utils.PSProcessor) assert dml_qte.normalize_ipw @@ -171,8 +171,7 @@ def test_did_defaults(): _assert_resampling_default_settings(dml_did) assert dml_did.score == "observational" assert dml_did.in_sample_normalization - assert dml_did.trimming_rule == "truncate" - assert dml_did.trimming_threshold == 1e-2 + assert dml_did.clipping_threshold == 1e-2 @pytest.mark.ci @@ -182,8 +181,7 @@ def test_did_cs_defaults(): _assert_resampling_default_settings(dml_did_cs) assert dml_did.score == "observational" assert dml_did_cs.in_sample_normalization - assert dml_did_cs.trimming_rule == "truncate" - assert dml_did_cs.trimming_threshold == 1e-2 + assert dml_did_cs.clipping_threshold == 1e-2 @pytest.mark.ci @@ -192,8 +190,8 @@ def test_ssm_defaults(): _fit_bootstrap(dml_ssm) _assert_resampling_default_settings(dml_ssm) assert dml_ssm.score == "missing-at-random" - assert dml_ssm.trimming_rule == "truncate" - assert dml_ssm.trimming_threshold == 1e-2 + assert isinstance(dml_ssm.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_ssm.ps_processor, dml.utils.PSProcessor) assert not dml_ssm.normalize_ipw @@ -203,8 +201,8 @@ def test_apo_defaults(): _fit_bootstrap(dml_apo) _assert_resampling_default_settings(dml_apo) assert dml_apo.score == "APO" - assert dml_apo.trimming_rule == "truncate" - assert dml_apo.trimming_threshold == 1e-2 + assert isinstance(dml_apo.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_apo.ps_processor, dml.utils.PSProcessor) assert not dml_apo.normalize_ipw assert set(dml_apo.weights.keys()) == set(["weights"]) assert np.array_equal(dml_apo.weights["weights"], np.ones((dml_apo._dml_data.n_obs,))) @@ -216,10 +214,10 @@ def test_apos_defaults(): assert dml_apos.boot_method is None assert dml_apos.framework is None assert dml_apos.boot_t_stat is None - _fit_bootstrap(dml_qte) + _fit_bootstrap(dml_apos) assert dml_apos.score == "APO" - assert dml_apos.trimming_rule == "truncate" - assert dml_apos.trimming_threshold == 1e-2 + assert isinstance(dml_apos.ps_processor_config, dml.utils.PSProcessorConfig) + assert isinstance(dml_apos.ps_processor, dml.utils.PSProcessor) assert not dml_apos.normalize_ipw assert np.array_equal(dml_apos.weights, np.ones((dml_apos._dml_data.n_obs,))) From 5d13b0a6e0e441f7865aa4a82be6c0baa0a42692 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 14:21:36 +0100 Subject: [PATCH 32/38] add deprecation warninigs for DoubleMLDID and DoubleMLDIDCS and DoubleMLDIDData --- doubleml/data/__init__.py | 1 + doubleml/data/did_data.py | 10 ++++++- doubleml/did/did.py | 6 ++++ doubleml/did/did_cs.py | 6 ++++ .../tests/test_did_deprecation_warnings.py | 29 +++++++++++++++++++ 5 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 doubleml/did/tests/test_did_deprecation_warnings.py diff --git a/doubleml/data/__init__.py b/doubleml/data/__init__.py index 8343c228..73fd71d3 100644 --- a/doubleml/data/__init__.py +++ b/doubleml/data/__init__.py @@ -11,6 +11,7 @@ from .ssm_data import DoubleMLSSMData +# TODO: Remove DoubleMLClusterData with version 0.12.0 class DoubleMLClusterData(DoubleMLData): """ Backwards compatibility wrapper for DoubleMLData with cluster_cols. diff --git a/doubleml/data/did_data.py b/doubleml/data/did_data.py index 57d486a3..30cc2900 100644 --- a/doubleml/data/did_data.py +++ b/doubleml/data/did_data.py @@ -1,4 +1,5 @@ import io +import warnings import pandas as pd from sklearn.utils import assert_all_finite @@ -7,6 +8,7 @@ from doubleml.data.base_data import DoubleMLData +# TODO: Remove DoubleMLDIDData with version 0.12.0 class DoubleMLDIDData(DoubleMLData): """Double machine learning data-backend for Difference-in-Differences models. @@ -81,7 +83,13 @@ def __init__( use_other_treat_as_covariate=True, force_all_x_finite=True, force_all_d_finite=True, - ): # Initialize _t_col to None first to avoid AttributeError during parent init + ): + warnings.warn( + "DoubleMLDIDData is deprecated and will be removed with version 0.12.0." "Use DoubleMLPanelData instead.", + FutureWarning, + stacklevel=2, + ) + # Initialize _t_col to None first to avoid AttributeError during parent init self._t_col = None # Store whether x_cols was originally None to reset it later diff --git a/doubleml/did/did.py b/doubleml/did/did.py index 5ca19a93..87eb4aaa 100644 --- a/doubleml/did/did.py +++ b/doubleml/did/did.py @@ -11,6 +11,7 @@ from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls +# TODO: Remove DoubleMLDIDData with version 0.12.0 class DoubleMLDID(LinearScoreMixin, DoubleML): """Double machine learning for difference-in-differences models with panel data (two time periods). @@ -87,6 +88,11 @@ def __init__( clipping_threshold=1e-2, draw_sample_splitting=True, ): + warnings.warn( + "DoubleMLDID is deprecated and will be removed with version 0.12.0. " "Please use DoubleMLDIDBinary instead.", + DeprecationWarning, + stacklevel=2, + ) super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) self._check_data(self._dml_data) diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index 706cfd8e..da833fd5 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -11,6 +11,7 @@ from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d +# TODO: Remove DoubleMLDIDData with version 0.12.0 class DoubleMLDIDCS(LinearScoreMixin, DoubleML): """Double machine learning for difference-in-difference with repeated cross-sections. @@ -85,6 +86,11 @@ def __init__( clipping_threshold=1e-2, draw_sample_splitting=True, ): + warnings.warn( + "DoubleMLDIDCS is deprecated and will be removed with version 0.12.0. " "Please use DoubleMLDIDCSBinary instead.", + DeprecationWarning, + stacklevel=2, + ) super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting) self._check_data(self._dml_data) diff --git a/doubleml/did/tests/test_did_deprecation_warnings.py b/doubleml/did/tests/test_did_deprecation_warnings.py new file mode 100644 index 00000000..75158fdb --- /dev/null +++ b/doubleml/did/tests/test_did_deprecation_warnings.py @@ -0,0 +1,29 @@ +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +from doubleml.data.did_data import DoubleMLDIDData +from doubleml.did.did import DoubleMLDID +from doubleml.did.did_cs import DoubleMLDIDCS + + +@pytest.mark.ci +def test_deprecation_DoubleMLDIDData(generate_data_did): + (x, y, d, _) = generate_data_did + with pytest.warns(FutureWarning, match="DoubleMLDIDData is deprecated"): + _ = DoubleMLDIDData.from_arrays(x, y, d) + + +@pytest.mark.ci +def test_deprecation_DoubleMLDID(generate_data_did): + (x, y, d, _) = generate_data_did + obj_dml_data = DoubleMLDIDData.from_arrays(x, y, d) + with pytest.warns(DeprecationWarning, match="DoubleMLDID is deprecated"): + _ = DoubleMLDID(obj_dml_data, ml_g=LinearRegression(), ml_m=LogisticRegression()) + + +@pytest.mark.ci +def test_deprecation_DoubleMLDIDCS(generate_data_did_cs): + (x, y, d, t) = generate_data_did_cs + obj_dml_data = DoubleMLDIDData.from_arrays(x, y, d, t=t) + with pytest.warns(DeprecationWarning, match="DoubleMLDIDCS is deprecated"): + _ = DoubleMLDIDCS(obj_dml_data, ml_g=LinearRegression(), ml_m=LogisticRegression()) From c69af49811258896d3afed16a36fe2a68e945600 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 16:46:39 +0100 Subject: [PATCH 33/38] update type hint for cv parameter in PSProcessor to use Union[int, list] --- doubleml/utils/propensity_score_processing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index e560bd41..c5a50c88 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -1,6 +1,6 @@ import warnings from dataclasses import dataclass -from typing import Optional +from typing import Optional, Union import numpy as np from sklearn.isotonic import IsotonicRegression @@ -131,7 +131,7 @@ def adjust_ps( self, propensity_scores: np.ndarray, treatment: np.ndarray, - cv: Optional[int | list] = None, + cv: Optional[Union[int, list]] = None, learner_name: Optional[str] = None, ) -> np.ndarray: """ From f7dcc983ad71d42b5508d7f6250dfd129955e74b Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Mon, 27 Oct 2025 17:14:31 +0100 Subject: [PATCH 34/38] update type hint for cv parameter in PSProcessor to use Union[int, list] --- doubleml/utils/propensity_score_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index c5a50c88..c04c8f18 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -171,7 +171,7 @@ def _apply_calibration( self, propensity_scores: np.ndarray, treatment: np.ndarray, - cv: Optional[int | list] = None, + cv: Optional[Union[int, list]] = None, ) -> np.ndarray: """Apply calibration method to propensity scores if specified.""" if self.calibration_method is None: From 915ec4b24649f70ad1f0223b0ef7bfc45185dd4a Mon Sep 17 00:00:00 2001 From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Tue, 4 Nov 2025 11:01:52 +0100 Subject: [PATCH 35/38] correct docstring --- doubleml/utils/propensity_score_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index c04c8f18..37b42634 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -135,7 +135,7 @@ def adjust_ps( learner_name: Optional[str] = None, ) -> np.ndarray: """ - Adjust propensity scores via validation, clipping, and warnings. + Adjust propensity scores via calibration and clipping. Parameters ---------- From 8a79909c3c3e090ed6a26c9e1566797431423257 Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 5 Nov 2025 01:13:06 +0100 Subject: [PATCH 36/38] add docstring for data class --- doubleml/utils/propensity_score_processing.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index 37b42634..fdc6f6f4 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -10,6 +10,45 @@ @dataclass class PSProcessorConfig: + """ + Configuration for propensity score processing. + + This dataclass holds the configuration parameters used by PSProcessor + for propensity score calibration, clipping, and validation. + + Parameters + ---------- + clipping_threshold : float, default=1e-2 + Minimum and maximum bound for propensity scores after clipping. + Must be between 0 and 0.5. + + extreme_threshold : float, default=1e-12 + Threshold below which propensity scores are considered extreme. + Propensity scores are clipped based on this value when scores are too close to 0 or 1 + to avoid numerical instability. + Must be between 0 and 0.5. + + calibration_method : {'isotonic', None}, optional + If provided, applies the specified calibration method to + the propensity scores before clipping. Currently supports: + - 'isotonic': Isotonic regression calibration + - None: No calibration applied + + cv_calibration : bool, default=False + Whether to use cross-validation for calibration. + Only applies if a calibration method is specified. + Requires calibration_method to be set. + + Examples + -------- + >>> from doubleml.utils import PSProcessorConfig, PSProcessor + >>> config = PSProcessorConfig( + ... clipping_threshold=0.05, + ... calibration_method='isotonic', + ... cv_calibration=True + ... ) + >>> processor = PSProcessor.from_config(config) + """ clipping_threshold: float = 1e-2 extreme_threshold: float = 1e-12 calibration_method: Optional[str] = None From ec55d304e870076a01539878c97b48aa90688cca Mon Sep 17 00:00:00 2001 From: SvenKlaassen Date: Wed, 5 Nov 2025 01:14:02 +0100 Subject: [PATCH 37/38] add docstring to from_config method in PSProcessor class --- doubleml/utils/propensity_score_processing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index fdc6f6f4..f57f17cd 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -132,6 +132,7 @@ def __init__( @classmethod def from_config(cls, config: PSProcessorConfig): + """Create PSProcessor from PSProcessorConfig.""" return cls( clipping_threshold=config.clipping_threshold, extreme_threshold=config.extreme_threshold, From c09660423babe7613c37062591a6c9352ad7f663 Mon Sep 17 00:00:00 2001 From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com> Date: Wed, 5 Nov 2025 01:56:55 +0100 Subject: [PATCH 38/38] add missing newline after docstring in PSProcessorConfig class --- doubleml/utils/propensity_score_processing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py index f57f17cd..2c5428c9 100644 --- a/doubleml/utils/propensity_score_processing.py +++ b/doubleml/utils/propensity_score_processing.py @@ -49,6 +49,7 @@ class PSProcessorConfig: ... ) >>> processor = PSProcessor.from_config(config) """ + clipping_threshold: float = 1e-2 extreme_threshold: float = 1e-12 calibration_method: Optional[str] = None