From 202808dc69fe3714bf3541fd65f6258343abe27a Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Tue, 21 Oct 2025 15:41:46 +0200
Subject: [PATCH 01/38] first ps version

---
 doubleml/utils/propensity_score_processing.py | 206 ++++++++++++++++++
 1 file changed, 206 insertions(+)
 create mode 100644 doubleml/utils/propensity_score_processing.py

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
new file mode 100644
index 00000000..76214349
--- /dev/null
+++ b/doubleml/utils/propensity_score_processing.py
@@ -0,0 +1,206 @@
+import warnings
+import numpy as np
+from typing import Any, Dict, List, Optional
+
+from doubleml.utils._checks import _check_is_propensity, _check_trimming
+from doubleml.utils._propensity_score import _trimm
+
+
+class PropensityScoreProcessor:
+    """
+    Processor for propensity score validation, clipping, and warnings.
+    
+    Parameters
+    ----------
+    clipping_threshold : float, default=1e-2
+        Threshold used for clipping propensity scores.
+    warn_extreme_values : bool, default=True
+        Whether to warn about extreme propensity score values.
+    extreme_threshold : float, default=0.05
+        Threshold for extreme value warnings.
+    warning_proportion : float, default=0.1
+        Proportion threshold for triggering extreme value warnings.
+    
+    Examples
+    --------
+    >>> processor = PropensityScoreProcessor(clipping_threshold=0.01)
+    >>> clipped_scores = processor.adjust(raw_scores)
+    """
+
+    _DEFAULT_CONFIG: Dict[str, Any] = {
+        "clipping_threshold": 1e-2,
+        "warn_extreme_values": True,
+        "extreme_threshold": 0.05,
+        "warning_proportion": 0.1,
+    }
+
+    def __init__(self, **config: Any) -> None:
+        
+        unknown_params = set(config.keys()) - set(self._DEFAULT_CONFIG.keys())
+        if unknown_params:
+            raise ValueError(f"Unknown parameters: {unknown_params}")
+
+        self._config: Dict[str, Any] = {**self._DEFAULT_CONFIG, **config}
+        self._validate_params()
+
+    # -------------------------------------------------------------------------
+    # Configuration methods
+    # -------------------------------------------------------------------------
+    def _validate_params(self) -> None:
+        """Validate configuration parameters."""
+        _check_trimming("truncate", self._config["clipping_threshold"])
+
+        if not isinstance(self._config["warn_extreme_values"], bool):
+            raise TypeError("warn_extreme_values must be boolean.")
+
+        if not (0 < self._config["extreme_threshold"] < 0.5):
+            raise ValueError("extreme_threshold must be between 0 and 0.5.")
+
+        if not (0 < self._config["warning_proportion"] < 1):
+            raise ValueError("warning_proportion must be between 0 and 1.")
+
+    @property
+    def clipping_threshold(self) -> float:
+        """Get the clipping threshold."""
+        return self._config["clipping_threshold"]
+
+    @property
+    def warn_extreme_values(self) -> bool:
+        """Get the warn extreme values setting."""
+        return self._config["warn_extreme_values"]
+
+    @property
+    def extreme_threshold(self) -> float:
+        """Get the extreme threshold."""
+        return self._config["extreme_threshold"]
+
+    @property
+    def warning_proportion(self) -> float:
+        """Get the warning proportion."""
+        return self._config["warning_proportion"]
+
+    @classmethod
+    def get_default_config(cls) -> Dict[str, Any]:
+        """Return the default configuration dictionary."""
+        return cls._DEFAULT_CONFIG.copy()
+
+    def get_config(self) -> Dict[str, Any]:
+        """Return a copy of the current configuration dictionary."""
+        return self._config.copy()
+
+    def update_config(self, **new_config: Any) -> None:
+        """
+        Update configuration parameters.
+
+        Reinitializes the instance to ensure all validation and defaults
+        are applied consistently.
+        """
+        updated = {**self._config, **new_config}
+        self.__init__(**updated)
+
+    # -------------------------------------------------------------------------
+    # Core functionality
+    # -------------------------------------------------------------------------
+    def adjust(
+        self,
+        propensity_scores: np.ndarray,
+        learner_name: str = "ml_m",
+        smpls: Optional[List[Any]] = None,
+    ) -> np.ndarray:
+        """
+        Adjust propensity scores via validation, clipping, and warnings.
+
+        Parameters
+        ----------
+        propensity_scores : array-like
+            Raw propensity score predictions.
+        learner_name : str, default="ml_m"
+            Name of the learner for error messages.
+        smpls : list, optional
+            Sample splits for validation.
+
+        Returns
+        -------
+        np.ndarray
+            Clipped and validated propensity scores.
+        """
+        # Validation
+        _check_is_propensity(
+            propensity_scores,
+            learner_name,
+            learner_name,
+            smpls,
+            eps=1e-12,
+        )
+
+        # Warnings for extreme values
+        if self.warn_extreme_values:
+            self._warn_extreme_values(propensity_scores)
+
+        # Clipping
+        clipped_scores = _trimm(
+            propensity_scores,
+            "truncate",
+            self.clipping_threshold,
+        )
+
+        return np.asarray(clipped_scores)
+
+    # -------------------------------------------------------------------------
+    # Private helper methods
+    # -------------------------------------------------------------------------
+    def _warn_extreme_values(self, propensity_scores: np.ndarray) -> None:
+        """Emit warnings for extreme or clipped propensity scores."""
+        min_prop = np.min(propensity_scores)
+        max_prop = np.max(propensity_scores)
+
+        extreme_low = np.mean(propensity_scores < self.extreme_threshold)
+        extreme_high = np.mean(propensity_scores > (1 - self.extreme_threshold))
+
+        if extreme_low > self.warning_proportion:
+            warnings.warn(
+                f"Large proportion ({extreme_low:.1%}) of propensity scores "
+                f"below {self.extreme_threshold}. This may indicate poor overlap. "
+                f"Consider adjusting the model or increasing clipping_threshold "
+                f"(current: {self.clipping_threshold}).",
+                UserWarning,
+            )
+
+        if extreme_high > self.warning_proportion:
+            warnings.warn(
+                f"Large proportion ({extreme_high:.1%}) of propensity scores "
+                f"above {1 - self.extreme_threshold}. This may indicate poor overlap. "
+                f"Consider adjusting the model or increasing clipping_threshold "
+                f"(current: {self.clipping_threshold}).",
+                UserWarning,
+            )
+
+        if min_prop <= self.clipping_threshold:
+            warnings.warn(
+                f"Minimum propensity score ({min_prop:.6f}) is at or below "
+                f"clipping threshold ({self.clipping_threshold}). "
+                f"Some observations may be heavily clipped.",
+                UserWarning,
+            )
+
+        if max_prop >= (1 - self.clipping_threshold):
+            warnings.warn(
+                f"Maximum propensity score ({max_prop:.6f}) is at or above "
+                f"clipping threshold ({1 - self.clipping_threshold}). "
+                f"Some observations may be heavily clipped.",
+                UserWarning,
+            )
+
+    # -------------------------------------------------------------------------
+    # Representations
+    # -------------------------------------------------------------------------
+    def __repr__(self) -> str:
+        config_str = ", ".join(
+            [f"{k}={v}" for k, v in sorted(self._config.items())]
+        )
+        return f"{self.__class__.__name__}({config_str})"
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, PropensityScoreProcessor):
+            return False
+        return self._config == other._config

From 2ff5ecb64bb2446a33775000166c1ffced2c1cb8 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 22 Oct 2025 09:10:18 +0200
Subject: [PATCH 02/38] add exceptions for ps processor

---
 doubleml/utils/__init__.py                    |  2 +
 doubleml/utils/propensity_score_processing.py | 60 +++++++-----
 .../tests/test_ps_processor_exceptions.py     | 94 +++++++++++++++++++
 3 files changed, 131 insertions(+), 25 deletions(-)
 create mode 100644 doubleml/utils/tests/test_ps_processor_exceptions.py

diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py
index 386586ce..ce1ba3c6 100644
--- a/doubleml/utils/__init__.py
+++ b/doubleml/utils/__init__.py
@@ -7,6 +7,7 @@
 from .gain_statistics import gain_statistics
 from .global_learner import GlobalClassifier, GlobalRegressor
 from .policytree import DoubleMLPolicyTree
+from .propensity_score_processing import PropensityScoreProcessor
 from .resampling import DoubleMLClusterResampling, DoubleMLResampling
 
 __all__ = [
@@ -19,4 +20,5 @@
     "gain_statistics",
     "GlobalClassifier",
     "GlobalRegressor",
+    "PropensityScoreProcessor",
 ]
diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index 76214349..381c443e 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -1,15 +1,15 @@
 import warnings
-import numpy as np
 from typing import Any, Dict, List, Optional
 
-from doubleml.utils._checks import _check_is_propensity, _check_trimming
-from doubleml.utils._propensity_score import _trimm
+import numpy as np
+
+from doubleml.utils._checks import _check_is_propensity
 
 
 class PropensityScoreProcessor:
     """
     Processor for propensity score validation, clipping, and warnings.
-    
+
     Parameters
     ----------
     clipping_threshold : float, default=1e-2
@@ -20,7 +20,7 @@ class PropensityScoreProcessor:
         Threshold for extreme value warnings.
     warning_proportion : float, default=0.1
         Proportion threshold for triggering extreme value warnings.
-    
+
     Examples
     --------
     >>> processor = PropensityScoreProcessor(clipping_threshold=0.01)
@@ -35,28 +35,38 @@ class PropensityScoreProcessor:
     }
 
     def __init__(self, **config: Any) -> None:
-        
+
         unknown_params = set(config.keys()) - set(self._DEFAULT_CONFIG.keys())
         if unknown_params:
             raise ValueError(f"Unknown parameters: {unknown_params}")
 
-        self._config: Dict[str, Any] = {**self._DEFAULT_CONFIG, **config}
-        self._validate_params()
+        updated_config = {**self._DEFAULT_CONFIG, **config}
+        self._validate_config(updated_config)
+        self._config = updated_config
 
     # -------------------------------------------------------------------------
     # Configuration methods
     # -------------------------------------------------------------------------
-    def _validate_params(self) -> None:
+    def _validate_config(self, config: Dict[str, Any]) -> None:
         """Validate configuration parameters."""
-        _check_trimming("truncate", self._config["clipping_threshold"])
 
-        if not isinstance(self._config["warn_extreme_values"], bool):
+        clipping_threshold = config["clipping_threshold"]
+        if not isinstance(clipping_threshold, float):
+            raise TypeError("clipping_threshold must be of float type. " f"Object of type {type(clipping_threshold)} passed.")
+        if (clipping_threshold <= 0) or (clipping_threshold >= 0.5):
+            raise ValueError(f"clipping_threshold must be between 0 and 0.5. " f"{clipping_threshold} was passed.")
+
+        if not isinstance(config["warn_extreme_values"], bool):
             raise TypeError("warn_extreme_values must be boolean.")
 
-        if not (0 < self._config["extreme_threshold"] < 0.5):
+        if not (0 < config["extreme_threshold"] < 0.5):
             raise ValueError("extreme_threshold must be between 0 and 0.5.")
 
-        if not (0 < self._config["warning_proportion"] < 1):
+        if not isinstance(config["warning_proportion"], float):
+            raise TypeError(
+                "warning_proportion must be of float type. " f"Object of type {type(config['warning_proportion'])} passed."
+            )
+        if not (0 < config["warning_proportion"] < 1):
             raise ValueError("warning_proportion must be between 0 and 1.")
 
     @property
@@ -92,11 +102,17 @@ def update_config(self, **new_config: Any) -> None:
         """
         Update configuration parameters.
 
-        Reinitializes the instance to ensure all validation and defaults
-        are applied consistently.
+        Validates the new configuration before applying changes to ensure
+        the object remains in a consistent state.
         """
-        updated = {**self._config, **new_config}
-        self.__init__(**updated)
+
+        unknown_params = set(new_config.keys()) - set(self._DEFAULT_CONFIG.keys())
+        if unknown_params:
+            raise ValueError(f"Unknown parameters: {unknown_params}")
+
+        updated_config = {**self._config, **new_config}
+        self._validate_config(updated_config)
+        self._config = updated_config
 
     # -------------------------------------------------------------------------
     # Core functionality
@@ -138,11 +154,7 @@ def adjust(
             self._warn_extreme_values(propensity_scores)
 
         # Clipping
-        clipped_scores = _trimm(
-            propensity_scores,
-            "truncate",
-            self.clipping_threshold,
-        )
+        clipped_scores = np.clip(propensity_scores, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold)
 
         return np.asarray(clipped_scores)
 
@@ -195,9 +207,7 @@ def _warn_extreme_values(self, propensity_scores: np.ndarray) -> None:
     # Representations
     # -------------------------------------------------------------------------
     def __repr__(self) -> str:
-        config_str = ", ".join(
-            [f"{k}={v}" for k, v in sorted(self._config.items())]
-        )
+        config_str = ", ".join([f"{k}={v}" for k, v in sorted(self._config.items())])
         return f"{self.__class__.__name__}({config_str})"
 
     def __eq__(self, other: object) -> bool:
diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py
new file mode 100644
index 00000000..1f739b2b
--- /dev/null
+++ b/doubleml/utils/tests/test_ps_processor_exceptions.py
@@ -0,0 +1,94 @@
+import pytest
+
+from doubleml.utils import PropensityScoreProcessor
+
+# -------------------------------------------------------------------------
+# Tests for __init__ method
+# -------------------------------------------------------------------------
+
+
+@pytest.mark.ci
+def test_init_unknown_parameter():
+    """Test that unknown parameters raise ValueError during initialization."""
+    with pytest.raises(ValueError, match="Unknown parameters: {'invalid_param'}"):
+        PropensityScoreProcessor(invalid_param=0.5)
+
+
+@pytest.mark.ci
+def test_init_clipping_threshold_type_error():
+    """Test that non-float clipping_threshold raises TypeError."""
+    with pytest.raises(TypeError, match="clipping_threshold must be of float type"):
+        PropensityScoreProcessor(clipping_threshold="0.01")
+
+
+@pytest.mark.ci
+def test_init_clipping_threshold_value_error():
+    """Test that invalid clipping_threshold values raise ValueError."""
+    with pytest.raises(ValueError, match="clipping_threshold must be between 0 and 0.5"):
+        PropensityScoreProcessor(clipping_threshold=0.0)  # exactly 0
+
+    with pytest.raises(ValueError, match="clipping_threshold must be between 0 and 0.5"):
+        PropensityScoreProcessor(clipping_threshold=0.6)  # above 0.5
+
+
+@pytest.mark.ci
+def test_init_warn_extreme_values_type_error():
+    """Test that non-bool warn_extreme_values raises TypeError."""
+    with pytest.raises(TypeError, match="warn_extreme_values must be boolean"):
+        PropensityScoreProcessor(warn_extreme_values="True")
+
+
+@pytest.mark.ci
+def test_init_extreme_threshold_value_error():
+    """Test that invalid extreme_threshold values raise ValueError."""
+    with pytest.raises(ValueError, match="extreme_threshold must be between 0 and 0.5"):
+        PropensityScoreProcessor(extreme_threshold=0.0)  # exactly 0
+
+    with pytest.raises(ValueError, match="extreme_threshold must be between 0 and 0.5"):
+        PropensityScoreProcessor(extreme_threshold=0.6)  # above 0.5
+
+
+@pytest.mark.ci
+def test_init_warning_proportion_type_error():
+    """Test that invalid warning_proportion values raise TypeError."""
+    with pytest.raises(TypeError, match="warning_proportion must be of float type"):
+        PropensityScoreProcessor(warning_proportion="0.0")
+
+
+@pytest.mark.ci
+def test_init_warning_proportion_value_error():
+    """Test that invalid warning_proportion values raise ValueError."""
+    with pytest.raises(ValueError, match="warning_proportion must be between 0 and 1"):
+        PropensityScoreProcessor(warning_proportion=0.0)  # exactly 0
+
+    with pytest.raises(ValueError, match="warning_proportion must be between 0 and 1"):
+        PropensityScoreProcessor(warning_proportion=1.1)  # above 1
+
+
+# -------------------------------------------------------------------------
+# Tests for update_config method
+# -------------------------------------------------------------------------
+
+
+@pytest.mark.ci
+def test_update_config_unknown_parameter():
+    """Test that unknown parameters raise ValueError during config update."""
+    processor = PropensityScoreProcessor()
+
+    with pytest.raises(ValueError, match="Unknown parameters: {'invalid_param'}"):
+        processor.update_config(invalid_param=0.5)
+
+
+@pytest.mark.ci
+def test_update_config_preserves_state_on_failure():
+    """Test that failed config updates don't change the processor state."""
+    processor = PropensityScoreProcessor(clipping_threshold=0.1)
+    original_config = processor.get_config()
+
+    # Try to update with invalid value
+    with pytest.raises(ValueError):
+        processor.update_config(clipping_threshold=0.6)
+
+    # Verify state hasn't changed
+    assert processor.get_config() == original_config
+    assert processor.clipping_threshold == 0.1

From 97726940eaa9151edc9f27d5c4151169df72583f Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 22 Oct 2025 09:26:10 +0200
Subject: [PATCH 03/38] add representation tests

---
 .../test_ps_processor_representations.py      | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 doubleml/utils/tests/test_ps_processor_representations.py

diff --git a/doubleml/utils/tests/test_ps_processor_representations.py b/doubleml/utils/tests/test_ps_processor_representations.py
new file mode 100644
index 00000000..e25e10a1
--- /dev/null
+++ b/doubleml/utils/tests/test_ps_processor_representations.py
@@ -0,0 +1,41 @@
+import pytest
+
+from doubleml.utils import PropensityScoreProcessor
+
+
+@pytest.mark.ci
+def test_repr_default_config():
+    """Test __repr__ with default configuration."""
+    processor = PropensityScoreProcessor()
+    expected = (
+        "PropensityScoreProcessor(clipping_threshold=0.01, extreme_threshold=0.05, "
+        "warn_extreme_values=True, warning_proportion=0.1)"
+    )
+    assert repr(processor) == expected
+
+
+@pytest.mark.ci
+def test_repr_custom_config():
+    """Test __repr__ with custom configuration."""
+    processor = PropensityScoreProcessor(clipping_threshold=0.05, warn_extreme_values=False, warning_proportion=0.2)
+    expected = (
+        "PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=0.05, "
+        "warn_extreme_values=False, warning_proportion=0.2)"
+    )
+    assert repr(processor) == expected
+
+
+@pytest.mark.ci
+def test_eq_same_config():
+    """Test equality with same configuration."""
+    processor1 = PropensityScoreProcessor(clipping_threshold=0.05)
+    processor2 = PropensityScoreProcessor(clipping_threshold=0.05)
+    assert processor1 == processor2
+
+
+@pytest.mark.ci
+def test_eq_different_config():
+    """Test inequality with different configuration."""
+    processor1 = PropensityScoreProcessor(clipping_threshold=0.05)
+    processor2 = PropensityScoreProcessor(clipping_threshold=0.1)
+    assert processor1 != processor2

From b0f52a0b47200adba8db4796683d02c56be1b3c0 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 22 Oct 2025 09:56:24 +0200
Subject: [PATCH 04/38] clean up ps processor class to only have clipping
 threshold and extreme threshold

---
 doubleml/utils/propensity_score_processing.py | 107 ++++--------------
 doubleml/utils/tests/test_ps_processor.py     |  27 +++++
 .../tests/test_ps_processor_exceptions.py     |  24 ----
 .../test_ps_processor_representations.py      |  12 +-
 4 files changed, 55 insertions(+), 115 deletions(-)
 create mode 100644 doubleml/utils/tests/test_ps_processor.py

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index 381c443e..f0eb5e19 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -1,10 +1,8 @@
 import warnings
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, Optional
 
 import numpy as np
 
-from doubleml.utils._checks import _check_is_propensity
-
 
 class PropensityScoreProcessor:
     """
@@ -23,15 +21,18 @@ class PropensityScoreProcessor:
 
     Examples
     --------
+    >>> import numpy as np
+    >>> from doubleml.utils import PropensityScoreProcessor
+    >>> raw_scores = np.array([0.001, 0.2, 0.5, 0.8, 0.999])
     >>> processor = PropensityScoreProcessor(clipping_threshold=0.01)
     >>> clipped_scores = processor.adjust(raw_scores)
+    >>> print(clipped_scores)
+    [0.01 0.2  0.5  0.8  0.99]
     """
 
     _DEFAULT_CONFIG: Dict[str, Any] = {
         "clipping_threshold": 1e-2,
-        "warn_extreme_values": True,
-        "extreme_threshold": 0.05,
-        "warning_proportion": 0.1,
+        "extreme_threshold": 1e-12,
     }
 
     def __init__(self, **config: Any) -> None:
@@ -56,39 +57,19 @@ def _validate_config(self, config: Dict[str, Any]) -> None:
         if (clipping_threshold <= 0) or (clipping_threshold >= 0.5):
             raise ValueError(f"clipping_threshold must be between 0 and 0.5. " f"{clipping_threshold} was passed.")
 
-        if not isinstance(config["warn_extreme_values"], bool):
-            raise TypeError("warn_extreme_values must be boolean.")
-
         if not (0 < config["extreme_threshold"] < 0.5):
             raise ValueError("extreme_threshold must be between 0 and 0.5.")
 
-        if not isinstance(config["warning_proportion"], float):
-            raise TypeError(
-                "warning_proportion must be of float type. " f"Object of type {type(config['warning_proportion'])} passed."
-            )
-        if not (0 < config["warning_proportion"] < 1):
-            raise ValueError("warning_proportion must be between 0 and 1.")
-
     @property
     def clipping_threshold(self) -> float:
         """Get the clipping threshold."""
         return self._config["clipping_threshold"]
 
-    @property
-    def warn_extreme_values(self) -> bool:
-        """Get the warn extreme values setting."""
-        return self._config["warn_extreme_values"]
-
     @property
     def extreme_threshold(self) -> float:
         """Get the extreme threshold."""
         return self._config["extreme_threshold"]
 
-    @property
-    def warning_proportion(self) -> float:
-        """Get the warning proportion."""
-        return self._config["warning_proportion"]
-
     @classmethod
     def get_default_config(cls) -> Dict[str, Any]:
         """Return the default configuration dictionary."""
@@ -117,12 +98,7 @@ def update_config(self, **new_config: Any) -> None:
     # -------------------------------------------------------------------------
     # Core functionality
     # -------------------------------------------------------------------------
-    def adjust(
-        self,
-        propensity_scores: np.ndarray,
-        learner_name: str = "ml_m",
-        smpls: Optional[List[Any]] = None,
-    ) -> np.ndarray:
+    def adjust(self, propensity_scores: np.ndarray, learner_name: Optional[str] = None) -> np.ndarray:
         """
         Adjust propensity scores via validation, clipping, and warnings.
 
@@ -130,76 +106,43 @@ def adjust(
         ----------
         propensity_scores : array-like
             Raw propensity score predictions.
-        learner_name : str, default="ml_m"
-            Name of the learner for error messages.
-        smpls : list, optional
-            Sample splits for validation.
+        learner_name : str, optional
+            Name of the learner providing the propensity scores, used in warnings.
 
         Returns
         -------
         np.ndarray
             Clipped and validated propensity scores.
         """
-        # Validation
-        _check_is_propensity(
+        self._validate_propensity_scores(
             propensity_scores,
             learner_name,
-            learner_name,
-            smpls,
-            eps=1e-12,
         )
-
-        # Warnings for extreme values
-        if self.warn_extreme_values:
-            self._warn_extreme_values(propensity_scores)
-
-        # Clipping
         clipped_scores = np.clip(propensity_scores, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold)
 
-        return np.asarray(clipped_scores)
+        return clipped_scores
 
     # -------------------------------------------------------------------------
     # Private helper methods
     # -------------------------------------------------------------------------
-    def _warn_extreme_values(self, propensity_scores: np.ndarray) -> None:
-        """Emit warnings for extreme or clipped propensity scores."""
-        min_prop = np.min(propensity_scores)
-        max_prop = np.max(propensity_scores)
 
-        extreme_low = np.mean(propensity_scores < self.extreme_threshold)
-        extreme_high = np.mean(propensity_scores > (1 - self.extreme_threshold))
+    def _validate_propensity_scores(
+        self,
+        preds: np.ndarray,
+        learner_name: Optional[str] = None,
+    ) -> None:
+        """Validate if propensity predictions are valid."""
+        learner_msg = f" from learner {learner_name}" if learner_name is not None else ""
 
-        if extreme_low > self.warning_proportion:
-            warnings.warn(
-                f"Large proportion ({extreme_low:.1%}) of propensity scores "
-                f"below {self.extreme_threshold}. This may indicate poor overlap. "
-                f"Consider adjusting the model or increasing clipping_threshold "
-                f"(current: {self.clipping_threshold}).",
-                UserWarning,
-            )
+        if not isinstance(preds, np.ndarray):
+            raise TypeError(f"Propensity predictions {learner_msg} must be of type np.ndarray. " f"Type {type(preds)} found.")
 
-        if extreme_high > self.warning_proportion:
-            warnings.warn(
-                f"Large proportion ({extreme_high:.1%}) of propensity scores "
-                f"above {1 - self.extreme_threshold}. This may indicate poor overlap. "
-                f"Consider adjusting the model or increasing clipping_threshold "
-                f"(current: {self.clipping_threshold}).",
-                UserWarning,
-            )
-
-        if min_prop <= self.clipping_threshold:
-            warnings.warn(
-                f"Minimum propensity score ({min_prop:.6f}) is at or below "
-                f"clipping threshold ({self.clipping_threshold}). "
-                f"Some observations may be heavily clipped.",
-                UserWarning,
-            )
+        if preds.ndim != 1:
+            raise ValueError(f"Propensity predictions {learner_msg} must be 1-dimensional. " f"Shape {preds.shape} found.")
 
-        if max_prop >= (1 - self.clipping_threshold):
+        if any((preds < self.extreme_threshold) | (preds > 1 - self.extreme_threshold)):
             warnings.warn(
-                f"Maximum propensity score ({max_prop:.6f}) is at or above "
-                f"clipping threshold ({1 - self.clipping_threshold}). "
-                f"Some observations may be heavily clipped.",
+                f"Propensity predictions {learner_msg} " f"are close to zero or one (eps={self.extreme_threshold}).",
                 UserWarning,
             )
 
diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py
new file mode 100644
index 00000000..5dc1b190
--- /dev/null
+++ b/doubleml/utils/tests/test_ps_processor.py
@@ -0,0 +1,27 @@
+import numpy as np
+import pytest
+
+from doubleml.utils.propensity_score_processing import PropensityScoreProcessor
+
+
+@pytest.mark.ci
+def test_adjust_basic_clipping():
+    """Test basic clipping functionality."""
+    processor = PropensityScoreProcessor(clipping_threshold=0.1)
+
+    scores = np.array([0.05, 0.2, 0.8, 0.95])
+    adjusted = processor.adjust(scores)
+
+    expected = np.array([0.1, 0.2, 0.8, 0.9])
+    np.testing.assert_array_equal(adjusted, expected)
+
+
+@pytest.mark.ci
+def test_adjust_no_clipping_needed():
+    """Test when no clipping is needed."""
+    processor = PropensityScoreProcessor(clipping_threshold=0.01)
+
+    scores = np.array([0.2, 0.3, 0.7, 0.8])
+    adjusted = processor.adjust(scores)
+
+    np.testing.assert_array_equal(adjusted, scores)
diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py
index 1f739b2b..64859092 100644
--- a/doubleml/utils/tests/test_ps_processor_exceptions.py
+++ b/doubleml/utils/tests/test_ps_processor_exceptions.py
@@ -31,13 +31,6 @@ def test_init_clipping_threshold_value_error():
         PropensityScoreProcessor(clipping_threshold=0.6)  # above 0.5
 
 
-@pytest.mark.ci
-def test_init_warn_extreme_values_type_error():
-    """Test that non-bool warn_extreme_values raises TypeError."""
-    with pytest.raises(TypeError, match="warn_extreme_values must be boolean"):
-        PropensityScoreProcessor(warn_extreme_values="True")
-
-
 @pytest.mark.ci
 def test_init_extreme_threshold_value_error():
     """Test that invalid extreme_threshold values raise ValueError."""
@@ -48,23 +41,6 @@ def test_init_extreme_threshold_value_error():
         PropensityScoreProcessor(extreme_threshold=0.6)  # above 0.5
 
 
-@pytest.mark.ci
-def test_init_warning_proportion_type_error():
-    """Test that invalid warning_proportion values raise TypeError."""
-    with pytest.raises(TypeError, match="warning_proportion must be of float type"):
-        PropensityScoreProcessor(warning_proportion="0.0")
-
-
-@pytest.mark.ci
-def test_init_warning_proportion_value_error():
-    """Test that invalid warning_proportion values raise ValueError."""
-    with pytest.raises(ValueError, match="warning_proportion must be between 0 and 1"):
-        PropensityScoreProcessor(warning_proportion=0.0)  # exactly 0
-
-    with pytest.raises(ValueError, match="warning_proportion must be between 0 and 1"):
-        PropensityScoreProcessor(warning_proportion=1.1)  # above 1
-
-
 # -------------------------------------------------------------------------
 # Tests for update_config method
 # -------------------------------------------------------------------------
diff --git a/doubleml/utils/tests/test_ps_processor_representations.py b/doubleml/utils/tests/test_ps_processor_representations.py
index e25e10a1..215258ca 100644
--- a/doubleml/utils/tests/test_ps_processor_representations.py
+++ b/doubleml/utils/tests/test_ps_processor_representations.py
@@ -7,21 +7,15 @@
 def test_repr_default_config():
     """Test __repr__ with default configuration."""
     processor = PropensityScoreProcessor()
-    expected = (
-        "PropensityScoreProcessor(clipping_threshold=0.01, extreme_threshold=0.05, "
-        "warn_extreme_values=True, warning_proportion=0.1)"
-    )
+    expected = "PropensityScoreProcessor(clipping_threshold=0.01, extreme_threshold=1e-12)"
     assert repr(processor) == expected
 
 
 @pytest.mark.ci
 def test_repr_custom_config():
     """Test __repr__ with custom configuration."""
-    processor = PropensityScoreProcessor(clipping_threshold=0.05, warn_extreme_values=False, warning_proportion=0.2)
-    expected = (
-        "PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=0.05, "
-        "warn_extreme_values=False, warning_proportion=0.2)"
-    )
+    processor = PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=1e-6)
+    expected = "PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=1e-06)"
     assert repr(processor) == expected
 
 

From 6b746d4cf6fc325b12d588926eeb894dfd14f7cd Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 22 Oct 2025 12:43:06 +0200
Subject: [PATCH 05/38] add treatment to adjust method calls

---
 doubleml/utils/propensity_score_processing.py | 28 +++++--
 doubleml/utils/tests/test_ps_processor.py     |  6 +-
 .../tests/test_ps_processor_exceptions.py     | 77 +++++++++++++++++++
 .../test_ps_processor_representations.py      |  7 ++
 4 files changed, 111 insertions(+), 7 deletions(-)

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index f0eb5e19..5c1d0e05 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -2,6 +2,7 @@
 from typing import Any, Dict, Optional
 
 import numpy as np
+from sklearn.utils.multiclass import type_of_target
 
 
 class PropensityScoreProcessor:
@@ -23,10 +24,11 @@ class PropensityScoreProcessor:
     --------
     >>> import numpy as np
     >>> from doubleml.utils import PropensityScoreProcessor
-    >>> raw_scores = np.array([0.001, 0.2, 0.5, 0.8, 0.999])
+    >>> ps_scores = np.array([0.001, 0.2, 0.5, 0.8, 0.999])
+    >>> treatment = np.array([0, 1, 1, 0, 1])
     >>> processor = PropensityScoreProcessor(clipping_threshold=0.01)
-    >>> clipped_scores = processor.adjust(raw_scores)
-    >>> print(clipped_scores)
+    >>> adj_scores = processor.adjust(ps_scores, treatment)
+    >>> print(adj_scores)
     [0.01 0.2  0.5  0.8  0.99]
     """
 
@@ -98,14 +100,16 @@ def update_config(self, **new_config: Any) -> None:
     # -------------------------------------------------------------------------
     # Core functionality
     # -------------------------------------------------------------------------
-    def adjust(self, propensity_scores: np.ndarray, learner_name: Optional[str] = None) -> np.ndarray:
+    def adjust(self, propensity_scores: np.ndarray, treatment: np.ndarray, learner_name: Optional[str] = None) -> np.ndarray:
         """
         Adjust propensity scores via validation, clipping, and warnings.
 
         Parameters
         ----------
-        propensity_scores : array-like
+        propensity_scores : np.ndarray
             Raw propensity score predictions.
+        treatment : np.ndarray
+            Treatment assignments (1 for treated, 0 for control).
         learner_name : str, optional
             Name of the learner providing the propensity scores, used in warnings.
 
@@ -118,6 +122,7 @@ def adjust(self, propensity_scores: np.ndarray, learner_name: Optional[str] = No
             propensity_scores,
             learner_name,
         )
+        self._validate_treatment(treatment)
         clipped_scores = np.clip(propensity_scores, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold)
 
         return clipped_scores
@@ -146,6 +151,19 @@ def _validate_propensity_scores(
                 UserWarning,
             )
 
+    def _validate_treatment(self, treatment: np.ndarray) -> None:
+        """Validate treatment vector."""
+        if not isinstance(treatment, np.ndarray):
+            raise TypeError(f"Treatment assignments must be of type np.ndarray. " f"Type {type(treatment)} found.")
+
+        if treatment.ndim != 1:
+            raise ValueError(f"Treatment assignments must be 1-dimensional. " f"Shape {treatment.shape} found.")
+
+        binary_treat = type_of_target(treatment) == "binary"
+        zero_one_treat = np.all((np.power(treatment, 2) - treatment) == 0)
+        if not (binary_treat and zero_one_treat):
+            raise ValueError("Treatment vector must be binary (0 and 1).")
+
     # -------------------------------------------------------------------------
     # Representations
     # -------------------------------------------------------------------------
diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py
index 5dc1b190..d16e266a 100644
--- a/doubleml/utils/tests/test_ps_processor.py
+++ b/doubleml/utils/tests/test_ps_processor.py
@@ -10,7 +10,8 @@ def test_adjust_basic_clipping():
     processor = PropensityScoreProcessor(clipping_threshold=0.1)
 
     scores = np.array([0.05, 0.2, 0.8, 0.95])
-    adjusted = processor.adjust(scores)
+    treatment = np.array([0, 1, 1, 0])
+    adjusted = processor.adjust(scores, treatment)
 
     expected = np.array([0.1, 0.2, 0.8, 0.9])
     np.testing.assert_array_equal(adjusted, expected)
@@ -22,6 +23,7 @@ def test_adjust_no_clipping_needed():
     processor = PropensityScoreProcessor(clipping_threshold=0.01)
 
     scores = np.array([0.2, 0.3, 0.7, 0.8])
-    adjusted = processor.adjust(scores)
+    treatment = np.array([0, 1, 1, 0])
+    adjusted = processor.adjust(scores, treatment)
 
     np.testing.assert_array_equal(adjusted, scores)
diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py
index 64859092..479d1cb7 100644
--- a/doubleml/utils/tests/test_ps_processor_exceptions.py
+++ b/doubleml/utils/tests/test_ps_processor_exceptions.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pytest
 
 from doubleml.utils import PropensityScoreProcessor
@@ -68,3 +69,79 @@ def test_update_config_preserves_state_on_failure():
     # Verify state hasn't changed
     assert processor.get_config() == original_config
     assert processor.clipping_threshold == 0.1
+
+
+@pytest.mark.ci
+def test_update_config_successful_update():
+    """Test successful configuration updates."""
+    processor = PropensityScoreProcessor(clipping_threshold=0.1)
+
+    processor.update_config(clipping_threshold=0.05)
+    assert processor.clipping_threshold == 0.05
+
+
+@pytest.mark.ci
+def test_update_config_defaults():
+    """Test updating configuration back to defaults."""
+    processor = PropensityScoreProcessor(clipping_threshold=0.1)
+
+    processor.update_config(clipping_threshold=0.01)
+    assert processor.clipping_threshold == 0.01
+
+    # Update back to default
+    default_config = PropensityScoreProcessor.get_default_config()
+    processor.update_config(**default_config)
+    assert processor.clipping_threshold == default_config["clipping_threshold"]
+
+
+# -------------------------------------------------------------------------
+# Tests for propensity score validation
+# -------------------------------------------------------------------------
+
+
+@pytest.mark.ci
+def test_validate_propensity_scores_type_error_with_learner():
+    """Test TypeError includes learner name."""
+    processor = PropensityScoreProcessor()
+    with pytest.raises(TypeError, match="from learner test_learner"):
+        processor.adjust([0.1, 0.2], np.array([0, 1]), learner_name="test_learner")
+
+
+@pytest.mark.ci
+def test_validate_propensity_scores_dimension_error():
+    """Test that non-1D propensity scores raise ValueError."""
+    processor = PropensityScoreProcessor()
+    with pytest.raises(ValueError, match="must be 1-dimensional"):
+        processor.adjust(np.array([[0.1, 0.2]]), np.array([0, 1]))
+
+
+@pytest.mark.ci
+def test_validate_propensity_scores_extreme_warning():
+    """Test extreme values trigger warnings."""
+    processor = PropensityScoreProcessor(extreme_threshold=0.05)
+    with pytest.warns(UserWarning, match="close to zero or one"):
+        processor.adjust(np.array([0.01, 0.99]), np.array([0, 1]))
+
+
+@pytest.mark.ci
+def test_validate_treatment_type_error():
+    """Test that non-numpy array treatment raises TypeError."""
+    processor = PropensityScoreProcessor()
+    with pytest.raises(TypeError, match="Treatment assignments must be of type np.ndarray"):
+        processor.adjust(np.array([0.2, 0.8]), [0, 1])
+
+
+@pytest.mark.ci
+def test_validate_treatment_dimension_error():
+    """Test that non-1D treatment raises ValueError."""
+    processor = PropensityScoreProcessor()
+    with pytest.raises(ValueError, match="must be 1-dimensional"):
+        processor.adjust(np.array([0.2, 0.8]), np.array([[0, 1]]))
+
+
+@pytest.mark.ci
+def test_validate_treatment_binary_error():
+    """Test that non-binary treatment values raise ValueError."""
+    processor = PropensityScoreProcessor()
+    with pytest.raises(ValueError, match="must be binary"):
+        processor.adjust(np.array([0.2, 0.8]), np.array([0, 2]))
diff --git a/doubleml/utils/tests/test_ps_processor_representations.py b/doubleml/utils/tests/test_ps_processor_representations.py
index 215258ca..e58d67bd 100644
--- a/doubleml/utils/tests/test_ps_processor_representations.py
+++ b/doubleml/utils/tests/test_ps_processor_representations.py
@@ -33,3 +33,10 @@ def test_eq_different_config():
     processor1 = PropensityScoreProcessor(clipping_threshold=0.05)
     processor2 = PropensityScoreProcessor(clipping_threshold=0.1)
     assert processor1 != processor2
+
+
+@pytest.mark.ci
+def test_eq_different_type():
+    """Test inequality with different object type."""
+    processor = PropensityScoreProcessor()
+    assert processor != "NotAPropensityScoreProcessor"

From 478a04ed0f9bdddd6bde60b6bd7eab9518efc190 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 22 Oct 2025 13:49:25 +0200
Subject: [PATCH 06/38] include calibration via isotonic regression

---
 doubleml/utils/propensity_score_processing.py | 75 ++++++++++++++++++-
 .../tests/test_ps_processor_exceptions.py     | 41 +++++++++-
 .../test_ps_processor_representations.py      | 10 ++-
 3 files changed, 120 insertions(+), 6 deletions(-)

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index 5c1d0e05..35ef3043 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -2,12 +2,14 @@
 from typing import Any, Dict, Optional
 
 import numpy as np
+from sklearn.isotonic import IsotonicRegression
+from sklearn.model_selection import cross_val_predict
 from sklearn.utils.multiclass import type_of_target
 
 
 class PropensityScoreProcessor:
     """
-    Processor for propensity score validation, clipping, and warnings.
+    Processor for propensity score calibration, clipping, and validation.
 
     Parameters
     ----------
@@ -35,8 +37,12 @@ class PropensityScoreProcessor:
     _DEFAULT_CONFIG: Dict[str, Any] = {
         "clipping_threshold": 1e-2,
         "extreme_threshold": 1e-12,
+        "calibration_method": None,
+        "cv_calibration": False,
     }
 
+    _VALID_CALIBRATION_METHODS = {None, "isotonic"}
+
     def __init__(self, **config: Any) -> None:
 
         unknown_params = set(config.keys()) - set(self._DEFAULT_CONFIG.keys())
@@ -62,6 +68,17 @@ def _validate_config(self, config: Dict[str, Any]) -> None:
         if not (0 < config["extreme_threshold"] < 0.5):
             raise ValueError("extreme_threshold must be between 0 and 0.5.")
 
+        calibration_method = config["calibration_method"]
+        if calibration_method not in self._VALID_CALIBRATION_METHODS:
+            raise ValueError(
+                f"calibration_method must be one of {self._VALID_CALIBRATION_METHODS}. " f"Got {calibration_method}."
+            )
+
+        if not isinstance(config["cv_calibration"], bool):
+            raise TypeError("cv_calibration must be of bool type.")
+        if config["cv_calibration"] and config["calibration_method"] is None:
+            raise ValueError("cv_calibration can only be used with a calibration_method.")
+
     @property
     def clipping_threshold(self) -> float:
         """Get the clipping threshold."""
@@ -72,6 +89,16 @@ def extreme_threshold(self) -> float:
         """Get the extreme threshold."""
         return self._config["extreme_threshold"]
 
+    @property
+    def calibration_method(self) -> Optional[str]:
+        """Get the calibration method."""
+        return self._config["calibration_method"]
+
+    @property
+    def cv_calibration(self) -> bool:
+        """Get whether cross-validation calibration is used."""
+        return self._config["cv_calibration"]
+
     @classmethod
     def get_default_config(cls) -> Dict[str, Any]:
         """Return the default configuration dictionary."""
@@ -100,7 +127,13 @@ def update_config(self, **new_config: Any) -> None:
     # -------------------------------------------------------------------------
     # Core functionality
     # -------------------------------------------------------------------------
-    def adjust(self, propensity_scores: np.ndarray, treatment: np.ndarray, learner_name: Optional[str] = None) -> np.ndarray:
+    def adjust(
+        self,
+        propensity_scores: np.ndarray,
+        treatment: np.ndarray,
+        cv: Optional[int | list] = None,
+        learner_name: Optional[str] = None,
+    ) -> np.ndarray:
         """
         Adjust propensity scores via validation, clipping, and warnings.
 
@@ -110,6 +143,8 @@ def adjust(self, propensity_scores: np.ndarray, treatment: np.ndarray, learner_n
             Raw propensity score predictions.
         treatment : np.ndarray
             Treatment assignments (1 for treated, 0 for control).
+        cv : int or list, optional
+            Cross-validation strategy for calibration. Used only if calibration is applied.
         learner_name : str, optional
             Name of the learner providing the propensity scores, used in warnings.
 
@@ -123,13 +158,47 @@ def adjust(self, propensity_scores: np.ndarray, treatment: np.ndarray, learner_n
             learner_name,
         )
         self._validate_treatment(treatment)
-        clipped_scores = np.clip(propensity_scores, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold)
+
+        if self.cv_calibration:
+            cv = cv
+        else:
+            cv = None
+        calibrated_ps = self._apply_calibration(propensity_scores, treatment, cv=cv)
+        clipped_scores = np.clip(calibrated_ps, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold)
 
         return clipped_scores
 
     # -------------------------------------------------------------------------
     # Private helper methods
     # -------------------------------------------------------------------------
+    def _apply_calibration(
+        self,
+        propensity_scores: np.ndarray,
+        treatment: np.ndarray,
+        cv: Optional[int | list] = None,
+    ) -> np.ndarray:
+        """Apply calibration method to propensity scores if specified."""
+        if self.calibration_method is None:
+            calibrated_ps = propensity_scores
+        elif self.calibration_method == "isotonic":
+            calibration_model = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0)
+
+            if cv is None:
+                calibration_model.fit(propensity_scores.reshape(-1, 1), treatment)
+                calibrated_ps = calibration_model.predict(propensity_scores.reshape(-1, 1))
+            else:
+                calibrated_ps = cross_val_predict(
+                    estimator=calibration_model, X=propensity_scores.reshape(-1, 1), y=treatment, cv=cv, method="predict"
+                )
+
+        else:
+            # This point should never be reached due to prior validation
+            raise ValueError(
+                f"Unsupported calibration method: {self.calibration_method}. "
+                f"Valid methods are: {self._VALID_CALIBRATION_METHODS}"
+            )
+
+        return calibrated_ps
 
     def _validate_propensity_scores(
         self,
diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py
index 479d1cb7..f07cd787 100644
--- a/doubleml/utils/tests/test_ps_processor_exceptions.py
+++ b/doubleml/utils/tests/test_ps_processor_exceptions.py
@@ -42,6 +42,27 @@ def test_init_extreme_threshold_value_error():
         PropensityScoreProcessor(extreme_threshold=0.6)  # above 0.5
 
 
+@pytest.mark.ci
+def test_init_calibration_method_value_error():
+    """Test that invalid calibration_method raises ValueError."""
+    with pytest.raises(ValueError, match="calibration_method must be one of"):
+        PropensityScoreProcessor(calibration_method="invalid_method")
+
+
+@pytest.mark.ci
+def test_init_cv_calibration_type_error():
+    """Test that non-bool cv_calibration raises TypeError."""
+    with pytest.raises(TypeError, match="cv_calibration must be of bool type."):
+        PropensityScoreProcessor(cv_calibration="True")
+
+
+@pytest.mark.ci
+def test_init_cv_calibration_value_error():
+    """Test that cv_calibration True with None calibration_method raises ValueError."""
+    with pytest.raises(ValueError, match="cv_calibration can only be used with a calibration_method."):
+        PropensityScoreProcessor(calibration_method=None, cv_calibration=True)
+
+
 # -------------------------------------------------------------------------
 # Tests for update_config method
 # -------------------------------------------------------------------------
@@ -95,7 +116,7 @@ def test_update_config_defaults():
 
 
 # -------------------------------------------------------------------------
-# Tests for propensity score validation
+# Tests for propensity score & treatment validation
 # -------------------------------------------------------------------------
 
 
@@ -145,3 +166,21 @@ def test_validate_treatment_binary_error():
     processor = PropensityScoreProcessor()
     with pytest.raises(ValueError, match="must be binary"):
         processor.adjust(np.array([0.2, 0.8]), np.array([0, 2]))
+
+
+# -------------------------------------------------------------------------
+# Other exception tests
+# -------------------------------------------------------------------------
+
+
+@pytest.mark.ci
+def test_apply_calibration_unsupported_method_error():
+    """Test that unsupported calibration method raises ValueError."""
+    processor = PropensityScoreProcessor()
+    processor._config["calibration_method"] = "unsupported_method"
+
+    propensity_scores = np.array([0.2, 0.8])
+    treatment = np.array([0, 1])
+
+    with pytest.raises(ValueError, match="Unsupported calibration method: unsupported_method"):
+        processor._apply_calibration(propensity_scores, treatment)
diff --git a/doubleml/utils/tests/test_ps_processor_representations.py b/doubleml/utils/tests/test_ps_processor_representations.py
index e58d67bd..365f9ea4 100644
--- a/doubleml/utils/tests/test_ps_processor_representations.py
+++ b/doubleml/utils/tests/test_ps_processor_representations.py
@@ -7,7 +7,10 @@
 def test_repr_default_config():
     """Test __repr__ with default configuration."""
     processor = PropensityScoreProcessor()
-    expected = "PropensityScoreProcessor(clipping_threshold=0.01, extreme_threshold=1e-12)"
+    expected = (
+        "PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.01, "
+        "cv_calibration=False, extreme_threshold=1e-12)"
+    )
     assert repr(processor) == expected
 
 
@@ -15,7 +18,10 @@ def test_repr_default_config():
 def test_repr_custom_config():
     """Test __repr__ with custom configuration."""
     processor = PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=1e-6)
-    expected = "PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=1e-06)"
+    expected = (
+        "PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.05, "
+        "cv_calibration=False, extreme_threshold=1e-06)"
+    )
     assert repr(processor) == expected
 
 

From 61a529b1e0d3cedccd12c44be4c273398d157311 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 22 Oct 2025 14:17:57 +0200
Subject: [PATCH 07/38] add cv to ps calibration

---
 doubleml/utils/tests/test_ps_processor.py | 73 +++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py
index d16e266a..7b2c0d81 100644
--- a/doubleml/utils/tests/test_ps_processor.py
+++ b/doubleml/utils/tests/test_ps_processor.py
@@ -1,5 +1,9 @@
+from unittest.mock import patch
+
 import numpy as np
 import pytest
+from sklearn.isotonic import IsotonicRegression
+from sklearn.model_selection import KFold, cross_val_predict
 
 from doubleml.utils.propensity_score_processing import PropensityScoreProcessor
 
@@ -27,3 +31,72 @@ def test_adjust_no_clipping_needed():
     adjusted = processor.adjust(scores, treatment)
 
     np.testing.assert_array_equal(adjusted, scores)
+
+
+@pytest.mark.ci
+def test_isotonic_calibration_without_cv():
+    """Test isotonic calibration without cross-validation."""
+    ps = np.random.uniform(0, 1, size=100)
+    treatment = np.random.binomial(1, 0.5, size=100)
+
+    clipping_threshold = 0.01
+    processor = PropensityScoreProcessor(
+        calibration_method="isotonic",
+        cv_calibration=False,
+        clipping_threshold=clipping_threshold,
+    )
+
+    isotonic_manual = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0)
+    isotonic_manual.fit(ps.reshape(-1, 1), treatment)
+    expected_ps_manual = isotonic_manual.predict(ps.reshape(-1, 1))
+    expected_ps_manual = np.clip(expected_ps_manual, clipping_threshold, 1 - clipping_threshold)
+
+    adjusted_ps = processor.adjust(ps, treatment)
+    np.testing.assert_array_equal(adjusted_ps, expected_ps_manual)
+
+
+@pytest.fixture(scope="module", params=[3, "iterable", "splitter"])
+def cv(request):
+    return request.param
+
+
+@pytest.mark.ci
+def test_isotonic_calibration_with_cv(cv):
+    """Test isotonic calibration with cross-validation."""
+    n_obs = 100
+    ps = np.random.uniform(0, 1, size=n_obs)
+    treatment = np.random.binomial(1, 0.5, size=n_obs)
+    if cv == "iterable":
+        cv = [(train, test) for train, test in KFold(n_splits=3).split(ps)]
+    elif cv == "splitter":
+        cv = KFold(n_splits=3)
+    else:
+        cv = cv
+
+    clipping_threshold = 0.01
+    processor = PropensityScoreProcessor(
+        calibration_method="isotonic", cv_calibration=True, clipping_threshold=clipping_threshold
+    )
+
+    isotonic_manual = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0)
+    ps_cv = cross_val_predict(isotonic_manual, ps.reshape(-1, 1), treatment, cv=cv)
+    expected_ps_manual = np.clip(ps_cv, clipping_threshold, 1 - clipping_threshold)
+
+    adjusted_ps = processor.adjust(ps, treatment, cv=cv)
+    np.testing.assert_array_equal(adjusted_ps, expected_ps_manual)
+
+
+@pytest.mark.ci
+def test_no_calibration():
+    """Test that no calibration is applied when calibration_method is None."""
+    processor = PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.01)
+
+    scores = np.array([0.2, 0.3, 0.7, 0.8])
+    treatment = np.array([0, 1, 1, 0])
+
+    # Should not call any calibration methods
+    with patch("sklearn.isotonic.IsotonicRegression") as mock_isotonic:
+        adjusted = processor.adjust(scores, treatment)
+        mock_isotonic.assert_not_called()
+
+    np.testing.assert_array_equal(adjusted, scores)

From cd3605bc15b3baed5c1f8f8da90f76c7260a5efb Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 22 Oct 2025 15:05:43 +0200
Subject: [PATCH 08/38] remove print from rdd example in docstring as test
 fails

---
 doubleml/rdd/rdd.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py
index 0d97ed0a..fffa9a0a 100644
--- a/doubleml/rdd/rdd.py
+++ b/doubleml/rdd/rdd.py
@@ -91,16 +91,6 @@ class RDFlex:
     >>> ml_g = RandomForestRegressor()
     >>> ml_m = RandomForestClassifier()
     >>> rdflex_obj = dml.rdd.RDFlex(obj_dml_data, ml_g, ml_m, fuzzy=True)
-    >>> print(rdflex_obj.fit())
-    Method             Coef.     S.E.     t-stat       P>|t|           95% CI
-    -------------------------------------------------------------------------
-    Conventional      0.950     0.225     4.230    2.333e-05  [0.510, 1.391]
-    Robust                 -        -     3.653    2.589e-04  [0.431, 1.429]
-    Design Type:        Fuzzy
-    Cutoff:             0
-    First Stage Kernel: triangular
-    Final Bandwidth:    [0.74746872]
-
     """
 
     def __init__(

From dce29b4fc91e96b188666fab8a7fdd52b6c681cc Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 22 Oct 2025 17:56:56 +0200
Subject: [PATCH 09/38] change to direct arguments for propensity score
 processer

---
 doubleml/utils/propensity_score_processing.py | 162 ++++++------------
 doubleml/utils/tests/test_ps_processor.py     |  10 +-
 .../tests/test_ps_processor_exceptions.py     |  77 +--------
 .../test_ps_processor_representations.py      |  48 ------
 4 files changed, 71 insertions(+), 226 deletions(-)
 delete mode 100644 doubleml/utils/tests/test_ps_processor_representations.py

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index 35ef3043..3c65d23a 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -1,5 +1,5 @@
 import warnings
-from typing import Any, Dict, Optional
+from typing import Optional
 
 import numpy as np
 from sklearn.isotonic import IsotonicRegression
@@ -14,120 +14,71 @@ class PropensityScoreProcessor:
     Parameters
     ----------
     clipping_threshold : float, default=1e-2
-        Threshold used for clipping propensity scores.
-    warn_extreme_values : bool, default=True
-        Whether to warn about extreme propensity score values.
-    extreme_threshold : float, default=0.05
-        Threshold for extreme value warnings.
-    warning_proportion : float, default=0.1
-        Proportion threshold for triggering extreme value warnings.
+        Minimum and maximum bound for propensity scores after clipping.
+
+    extreme_threshold : float, default=1e-12
+        Threshold below which propensity scores are considered extreme.
+        Used for generating warnings.
+
+    calibration_method : {'isotonic', None}, optional
+        If provided, applies the specified calibration method to
+        the propensity scores before clipping.
+
+    cv_calibration : bool, default=False
+        Whether to use cross-validation for calibration.
+        Only applies if a calibration method is specified.
 
     Examples
     --------
     >>> import numpy as np
-    >>> from doubleml.utils import PropensityScoreProcessor
-    >>> ps_scores = np.array([0.001, 0.2, 0.5, 0.8, 0.999])
+    >>> ps = np.array([0.001, 0.2, 0.5, 0.8, 0.999])
     >>> treatment = np.array([0, 1, 1, 0, 1])
     >>> processor = PropensityScoreProcessor(clipping_threshold=0.01)
-    >>> adj_scores = processor.adjust(ps_scores, treatment)
-    >>> print(adj_scores)
+    >>> adjusted = processor.adjust_ps(ps, treatment)
+    >>> print(np.round(adjusted, 3))
     [0.01 0.2  0.5  0.8  0.99]
     """
 
-    _DEFAULT_CONFIG: Dict[str, Any] = {
-        "clipping_threshold": 1e-2,
-        "extreme_threshold": 1e-12,
-        "calibration_method": None,
-        "cv_calibration": False,
-    }
-
     _VALID_CALIBRATION_METHODS = {None, "isotonic"}
 
-    def __init__(self, **config: Any) -> None:
-
-        unknown_params = set(config.keys()) - set(self._DEFAULT_CONFIG.keys())
-        if unknown_params:
-            raise ValueError(f"Unknown parameters: {unknown_params}")
-
-        updated_config = {**self._DEFAULT_CONFIG, **config}
-        self._validate_config(updated_config)
-        self._config = updated_config
-
-    # -------------------------------------------------------------------------
-    # Configuration methods
-    # -------------------------------------------------------------------------
-    def _validate_config(self, config: Dict[str, Any]) -> None:
-        """Validate configuration parameters."""
-
-        clipping_threshold = config["clipping_threshold"]
-        if not isinstance(clipping_threshold, float):
-            raise TypeError("clipping_threshold must be of float type. " f"Object of type {type(clipping_threshold)} passed.")
-        if (clipping_threshold <= 0) or (clipping_threshold >= 0.5):
-            raise ValueError(f"clipping_threshold must be between 0 and 0.5. " f"{clipping_threshold} was passed.")
-
-        if not (0 < config["extreme_threshold"] < 0.5):
-            raise ValueError("extreme_threshold must be between 0 and 0.5.")
-
-        calibration_method = config["calibration_method"]
-        if calibration_method not in self._VALID_CALIBRATION_METHODS:
-            raise ValueError(
-                f"calibration_method must be one of {self._VALID_CALIBRATION_METHODS}. " f"Got {calibration_method}."
-            )
-
-        if not isinstance(config["cv_calibration"], bool):
-            raise TypeError("cv_calibration must be of bool type.")
-        if config["cv_calibration"] and config["calibration_method"] is None:
-            raise ValueError("cv_calibration can only be used with a calibration_method.")
+    def __init__(
+        self,
+        clipping_threshold: float = 1e-2,
+        extreme_threshold: float = 1e-12,
+        calibration_method: Optional[str] = None,
+        cv_calibration: bool = False,
+    ):
+        self._clipping_threshold = clipping_threshold
+        self._extreme_threshold = extreme_threshold
+        self._calibration_method = calibration_method
+        self._cv_calibration = cv_calibration
+
+        self._validate_config()
 
     @property
     def clipping_threshold(self) -> float:
         """Get the clipping threshold."""
-        return self._config["clipping_threshold"]
+        return self._clipping_threshold
 
     @property
     def extreme_threshold(self) -> float:
         """Get the extreme threshold."""
-        return self._config["extreme_threshold"]
+        return self._extreme_threshold
 
     @property
     def calibration_method(self) -> Optional[str]:
         """Get the calibration method."""
-        return self._config["calibration_method"]
+        return self._calibration_method
 
     @property
     def cv_calibration(self) -> bool:
         """Get whether cross-validation calibration is used."""
-        return self._config["cv_calibration"]
-
-    @classmethod
-    def get_default_config(cls) -> Dict[str, Any]:
-        """Return the default configuration dictionary."""
-        return cls._DEFAULT_CONFIG.copy()
-
-    def get_config(self) -> Dict[str, Any]:
-        """Return a copy of the current configuration dictionary."""
-        return self._config.copy()
-
-    def update_config(self, **new_config: Any) -> None:
-        """
-        Update configuration parameters.
-
-        Validates the new configuration before applying changes to ensure
-        the object remains in a consistent state.
-        """
-
-        unknown_params = set(new_config.keys()) - set(self._DEFAULT_CONFIG.keys())
-        if unknown_params:
-            raise ValueError(f"Unknown parameters: {unknown_params}")
-
-        updated_config = {**self._config, **new_config}
-        self._validate_config(updated_config)
-        self._config = updated_config
+        return self._cv_calibration
 
     # -------------------------------------------------------------------------
     # Core functionality
     # -------------------------------------------------------------------------
-    def adjust(
+    def adjust_ps(
         self,
         propensity_scores: np.ndarray,
         treatment: np.ndarray,
@@ -159,10 +110,6 @@ def adjust(
         )
         self._validate_treatment(treatment)
 
-        if self.cv_calibration:
-            cv = cv
-        else:
-            cv = None
         calibrated_ps = self._apply_calibration(propensity_scores, treatment, cv=cv)
         clipped_scores = np.clip(calibrated_ps, a_min=self.clipping_threshold, a_max=1 - self.clipping_threshold)
 
@@ -183,14 +130,13 @@ def _apply_calibration(
         elif self.calibration_method == "isotonic":
             calibration_model = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0)
 
-            if cv is None:
-                calibration_model.fit(propensity_scores.reshape(-1, 1), treatment)
-                calibrated_ps = calibration_model.predict(propensity_scores.reshape(-1, 1))
-            else:
+            if self.cv_calibration and cv is not None:
                 calibrated_ps = cross_val_predict(
                     estimator=calibration_model, X=propensity_scores.reshape(-1, 1), y=treatment, cv=cv, method="predict"
                 )
-
+            else:
+                calibration_model.fit(propensity_scores.reshape(-1, 1), treatment)
+                calibrated_ps = calibration_model.predict(propensity_scores.reshape(-1, 1))
         else:
             # This point should never be reached due to prior validation
             raise ValueError(
@@ -200,6 +146,24 @@ def _apply_calibration(
 
         return calibrated_ps
 
+    def _validate_config(self) -> None:
+        """Validate configuration parameters."""
+        if not isinstance(self.clipping_threshold, float):
+            raise TypeError("clipping_threshold must be a float.")
+        if not (0 < self.clipping_threshold < 0.5):
+            raise ValueError("clipping_threshold must be between 0 and 0.5.")
+
+        if not (0 < self.extreme_threshold < 0.5):
+            raise ValueError("extreme_threshold must be between 0 and 0.5.")
+
+        if self.calibration_method not in self._VALID_CALIBRATION_METHODS:
+            raise ValueError(f"calibration_method must be one of {self._VALID_CALIBRATION_METHODS}.")
+
+        if not isinstance(self.cv_calibration, bool):
+            raise TypeError("cv_calibration must be of bool type.")
+        if self.cv_calibration and self.calibration_method is None:
+            raise ValueError("cv_calibration=True requires a calibration_method.")
+
     def _validate_propensity_scores(
         self,
         preds: np.ndarray,
@@ -232,15 +196,3 @@ def _validate_treatment(self, treatment: np.ndarray) -> None:
         zero_one_treat = np.all((np.power(treatment, 2) - treatment) == 0)
         if not (binary_treat and zero_one_treat):
             raise ValueError("Treatment vector must be binary (0 and 1).")
-
-    # -------------------------------------------------------------------------
-    # Representations
-    # -------------------------------------------------------------------------
-    def __repr__(self) -> str:
-        config_str = ", ".join([f"{k}={v}" for k, v in sorted(self._config.items())])
-        return f"{self.__class__.__name__}({config_str})"
-
-    def __eq__(self, other: object) -> bool:
-        if not isinstance(other, PropensityScoreProcessor):
-            return False
-        return self._config == other._config
diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py
index 7b2c0d81..d4f8bdcb 100644
--- a/doubleml/utils/tests/test_ps_processor.py
+++ b/doubleml/utils/tests/test_ps_processor.py
@@ -15,7 +15,7 @@ def test_adjust_basic_clipping():
 
     scores = np.array([0.05, 0.2, 0.8, 0.95])
     treatment = np.array([0, 1, 1, 0])
-    adjusted = processor.adjust(scores, treatment)
+    adjusted = processor.adjust_ps(scores, treatment)
 
     expected = np.array([0.1, 0.2, 0.8, 0.9])
     np.testing.assert_array_equal(adjusted, expected)
@@ -28,7 +28,7 @@ def test_adjust_no_clipping_needed():
 
     scores = np.array([0.2, 0.3, 0.7, 0.8])
     treatment = np.array([0, 1, 1, 0])
-    adjusted = processor.adjust(scores, treatment)
+    adjusted = processor.adjust_ps(scores, treatment)
 
     np.testing.assert_array_equal(adjusted, scores)
 
@@ -51,7 +51,7 @@ def test_isotonic_calibration_without_cv():
     expected_ps_manual = isotonic_manual.predict(ps.reshape(-1, 1))
     expected_ps_manual = np.clip(expected_ps_manual, clipping_threshold, 1 - clipping_threshold)
 
-    adjusted_ps = processor.adjust(ps, treatment)
+    adjusted_ps = processor.adjust_ps(ps, treatment)
     np.testing.assert_array_equal(adjusted_ps, expected_ps_manual)
 
 
@@ -82,7 +82,7 @@ def test_isotonic_calibration_with_cv(cv):
     ps_cv = cross_val_predict(isotonic_manual, ps.reshape(-1, 1), treatment, cv=cv)
     expected_ps_manual = np.clip(ps_cv, clipping_threshold, 1 - clipping_threshold)
 
-    adjusted_ps = processor.adjust(ps, treatment, cv=cv)
+    adjusted_ps = processor.adjust_ps(ps, treatment, cv=cv)
     np.testing.assert_array_equal(adjusted_ps, expected_ps_manual)
 
 
@@ -96,7 +96,7 @@ def test_no_calibration():
 
     # Should not call any calibration methods
     with patch("sklearn.isotonic.IsotonicRegression") as mock_isotonic:
-        adjusted = processor.adjust(scores, treatment)
+        adjusted = processor.adjust_ps(scores, treatment)
         mock_isotonic.assert_not_called()
 
     np.testing.assert_array_equal(adjusted, scores)
diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py
index f07cd787..eb6d2b7b 100644
--- a/doubleml/utils/tests/test_ps_processor_exceptions.py
+++ b/doubleml/utils/tests/test_ps_processor_exceptions.py
@@ -8,17 +8,10 @@
 # -------------------------------------------------------------------------
 
 
-@pytest.mark.ci
-def test_init_unknown_parameter():
-    """Test that unknown parameters raise ValueError during initialization."""
-    with pytest.raises(ValueError, match="Unknown parameters: {'invalid_param'}"):
-        PropensityScoreProcessor(invalid_param=0.5)
-
-
 @pytest.mark.ci
 def test_init_clipping_threshold_type_error():
     """Test that non-float clipping_threshold raises TypeError."""
-    with pytest.raises(TypeError, match="clipping_threshold must be of float type"):
+    with pytest.raises(TypeError, match="clipping_threshold must be a float."):
         PropensityScoreProcessor(clipping_threshold="0.01")
 
 
@@ -59,62 +52,10 @@ def test_init_cv_calibration_type_error():
 @pytest.mark.ci
 def test_init_cv_calibration_value_error():
     """Test that cv_calibration True with None calibration_method raises ValueError."""
-    with pytest.raises(ValueError, match="cv_calibration can only be used with a calibration_method."):
+    with pytest.raises(ValueError, match="cv_calibration=True requires a calibration_method."):
         PropensityScoreProcessor(calibration_method=None, cv_calibration=True)
 
 
-# -------------------------------------------------------------------------
-# Tests for update_config method
-# -------------------------------------------------------------------------
-
-
-@pytest.mark.ci
-def test_update_config_unknown_parameter():
-    """Test that unknown parameters raise ValueError during config update."""
-    processor = PropensityScoreProcessor()
-
-    with pytest.raises(ValueError, match="Unknown parameters: {'invalid_param'}"):
-        processor.update_config(invalid_param=0.5)
-
-
-@pytest.mark.ci
-def test_update_config_preserves_state_on_failure():
-    """Test that failed config updates don't change the processor state."""
-    processor = PropensityScoreProcessor(clipping_threshold=0.1)
-    original_config = processor.get_config()
-
-    # Try to update with invalid value
-    with pytest.raises(ValueError):
-        processor.update_config(clipping_threshold=0.6)
-
-    # Verify state hasn't changed
-    assert processor.get_config() == original_config
-    assert processor.clipping_threshold == 0.1
-
-
-@pytest.mark.ci
-def test_update_config_successful_update():
-    """Test successful configuration updates."""
-    processor = PropensityScoreProcessor(clipping_threshold=0.1)
-
-    processor.update_config(clipping_threshold=0.05)
-    assert processor.clipping_threshold == 0.05
-
-
-@pytest.mark.ci
-def test_update_config_defaults():
-    """Test updating configuration back to defaults."""
-    processor = PropensityScoreProcessor(clipping_threshold=0.1)
-
-    processor.update_config(clipping_threshold=0.01)
-    assert processor.clipping_threshold == 0.01
-
-    # Update back to default
-    default_config = PropensityScoreProcessor.get_default_config()
-    processor.update_config(**default_config)
-    assert processor.clipping_threshold == default_config["clipping_threshold"]
-
-
 # -------------------------------------------------------------------------
 # Tests for propensity score & treatment validation
 # -------------------------------------------------------------------------
@@ -125,7 +66,7 @@ def test_validate_propensity_scores_type_error_with_learner():
     """Test TypeError includes learner name."""
     processor = PropensityScoreProcessor()
     with pytest.raises(TypeError, match="from learner test_learner"):
-        processor.adjust([0.1, 0.2], np.array([0, 1]), learner_name="test_learner")
+        processor.adjust_ps([0.1, 0.2], np.array([0, 1]), learner_name="test_learner")
 
 
 @pytest.mark.ci
@@ -133,7 +74,7 @@ def test_validate_propensity_scores_dimension_error():
     """Test that non-1D propensity scores raise ValueError."""
     processor = PropensityScoreProcessor()
     with pytest.raises(ValueError, match="must be 1-dimensional"):
-        processor.adjust(np.array([[0.1, 0.2]]), np.array([0, 1]))
+        processor.adjust_ps(np.array([[0.1, 0.2]]), np.array([0, 1]))
 
 
 @pytest.mark.ci
@@ -141,7 +82,7 @@ def test_validate_propensity_scores_extreme_warning():
     """Test extreme values trigger warnings."""
     processor = PropensityScoreProcessor(extreme_threshold=0.05)
     with pytest.warns(UserWarning, match="close to zero or one"):
-        processor.adjust(np.array([0.01, 0.99]), np.array([0, 1]))
+        processor.adjust_ps(np.array([0.01, 0.99]), np.array([0, 1]))
 
 
 @pytest.mark.ci
@@ -149,7 +90,7 @@ def test_validate_treatment_type_error():
     """Test that non-numpy array treatment raises TypeError."""
     processor = PropensityScoreProcessor()
     with pytest.raises(TypeError, match="Treatment assignments must be of type np.ndarray"):
-        processor.adjust(np.array([0.2, 0.8]), [0, 1])
+        processor.adjust_ps(np.array([0.2, 0.8]), [0, 1])
 
 
 @pytest.mark.ci
@@ -157,7 +98,7 @@ def test_validate_treatment_dimension_error():
     """Test that non-1D treatment raises ValueError."""
     processor = PropensityScoreProcessor()
     with pytest.raises(ValueError, match="must be 1-dimensional"):
-        processor.adjust(np.array([0.2, 0.8]), np.array([[0, 1]]))
+        processor.adjust_ps(np.array([0.2, 0.8]), np.array([[0, 1]]))
 
 
 @pytest.mark.ci
@@ -165,7 +106,7 @@ def test_validate_treatment_binary_error():
     """Test that non-binary treatment values raise ValueError."""
     processor = PropensityScoreProcessor()
     with pytest.raises(ValueError, match="must be binary"):
-        processor.adjust(np.array([0.2, 0.8]), np.array([0, 2]))
+        processor.adjust_ps(np.array([0.2, 0.8]), np.array([0, 2]))
 
 
 # -------------------------------------------------------------------------
@@ -177,7 +118,7 @@ def test_validate_treatment_binary_error():
 def test_apply_calibration_unsupported_method_error():
     """Test that unsupported calibration method raises ValueError."""
     processor = PropensityScoreProcessor()
-    processor._config["calibration_method"] = "unsupported_method"
+    processor._calibration_method = "unsupported_method"
 
     propensity_scores = np.array([0.2, 0.8])
     treatment = np.array([0, 1])
diff --git a/doubleml/utils/tests/test_ps_processor_representations.py b/doubleml/utils/tests/test_ps_processor_representations.py
deleted file mode 100644
index 365f9ea4..00000000
--- a/doubleml/utils/tests/test_ps_processor_representations.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import pytest
-
-from doubleml.utils import PropensityScoreProcessor
-
-
-@pytest.mark.ci
-def test_repr_default_config():
-    """Test __repr__ with default configuration."""
-    processor = PropensityScoreProcessor()
-    expected = (
-        "PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.01, "
-        "cv_calibration=False, extreme_threshold=1e-12)"
-    )
-    assert repr(processor) == expected
-
-
-@pytest.mark.ci
-def test_repr_custom_config():
-    """Test __repr__ with custom configuration."""
-    processor = PropensityScoreProcessor(clipping_threshold=0.05, extreme_threshold=1e-6)
-    expected = (
-        "PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.05, "
-        "cv_calibration=False, extreme_threshold=1e-06)"
-    )
-    assert repr(processor) == expected
-
-
-@pytest.mark.ci
-def test_eq_same_config():
-    """Test equality with same configuration."""
-    processor1 = PropensityScoreProcessor(clipping_threshold=0.05)
-    processor2 = PropensityScoreProcessor(clipping_threshold=0.05)
-    assert processor1 == processor2
-
-
-@pytest.mark.ci
-def test_eq_different_config():
-    """Test inequality with different configuration."""
-    processor1 = PropensityScoreProcessor(clipping_threshold=0.05)
-    processor2 = PropensityScoreProcessor(clipping_threshold=0.1)
-    assert processor1 != processor2
-
-
-@pytest.mark.ci
-def test_eq_different_type():
-    """Test inequality with different object type."""
-    processor = PropensityScoreProcessor()
-    assert processor != "NotAPropensityScoreProcessor"

From 08a00737b1f6af8b52c4e1353c07525ba28b2ece Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Thu, 23 Oct 2025 09:49:22 +0200
Subject: [PATCH 10/38] add data class with config for psprocessor

---
 doubleml/utils/__init__.py                    |  5 +--
 doubleml/utils/propensity_score_processing.py | 26 +++++++++++++--
 doubleml/utils/tests/test_ps_processor.py     | 30 +++++++++++++----
 .../tests/test_ps_processor_exceptions.py     | 32 +++++++++----------
 4 files changed, 66 insertions(+), 27 deletions(-)

diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py
index ce1ba3c6..4f6269dd 100644
--- a/doubleml/utils/__init__.py
+++ b/doubleml/utils/__init__.py
@@ -7,7 +7,7 @@
 from .gain_statistics import gain_statistics
 from .global_learner import GlobalClassifier, GlobalRegressor
 from .policytree import DoubleMLPolicyTree
-from .propensity_score_processing import PropensityScoreProcessor
+from .propensity_score_processing import PSProcessor, PSProcessorConfig
 from .resampling import DoubleMLClusterResampling, DoubleMLResampling
 
 __all__ = [
@@ -20,5 +20,6 @@
     "gain_statistics",
     "GlobalClassifier",
     "GlobalRegressor",
-    "PropensityScoreProcessor",
+    "PSProcessor",
+    "PSProcessorConfig",
 ]
diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index 3c65d23a..9a42a1b3 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -1,4 +1,5 @@
 import warnings
+from dataclasses import dataclass
 from typing import Optional
 
 import numpy as np
@@ -7,7 +8,15 @@
 from sklearn.utils.multiclass import type_of_target
 
 
-class PropensityScoreProcessor:
+@dataclass
+class PSProcessorConfig:
+    clipping_threshold: float = 1e-2
+    extreme_threshold: float = 1e-12
+    calibration_method: Optional[str] = None
+    cv_calibration: bool = False
+
+
+class PSProcessor:
     """
     Processor for propensity score calibration, clipping, and validation.
 
@@ -55,6 +64,19 @@ def __init__(
 
         self._validate_config()
 
+    @classmethod
+    def from_config(cls, config: PSProcessorConfig):
+        return cls(
+            clipping_threshold=config.clipping_threshold,
+            extreme_threshold=config.extreme_threshold,
+            calibration_method=config.calibration_method,
+            cv_calibration=config.cv_calibration,
+        )
+
+    # -------------------------------------------------------------------------
+    # Properties
+    # -------------------------------------------------------------------------
+
     @property
     def clipping_threshold(self) -> float:
         """Get the clipping threshold."""
@@ -130,7 +152,7 @@ def _apply_calibration(
         elif self.calibration_method == "isotonic":
             calibration_model = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0)
 
-            if self.cv_calibration and cv is not None:
+            if self.cv_calibration:
                 calibrated_ps = cross_val_predict(
                     estimator=calibration_model, X=propensity_scores.reshape(-1, 1), y=treatment, cv=cv, method="predict"
                 )
diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py
index d4f8bdcb..4251e0b8 100644
--- a/doubleml/utils/tests/test_ps_processor.py
+++ b/doubleml/utils/tests/test_ps_processor.py
@@ -5,13 +5,29 @@
 from sklearn.isotonic import IsotonicRegression
 from sklearn.model_selection import KFold, cross_val_predict
 
-from doubleml.utils.propensity_score_processing import PropensityScoreProcessor
+from doubleml.utils.propensity_score_processing import PSProcessor, PSProcessorConfig
+
+
+@pytest.mark.ci
+def test_from_config_initialization():
+    """Test initialization of PSProcessor from PSProcessorConfig."""
+    config = PSProcessorConfig(
+        clipping_threshold=0.05,
+        extreme_threshold=1e-8,
+        calibration_method="isotonic",
+        cv_calibration=True,
+    )
+    processor = PSProcessor.from_config(config)
+    assert processor.clipping_threshold == 0.05
+    assert processor.extreme_threshold == 1e-8
+    assert processor.calibration_method == "isotonic"
+    assert processor.cv_calibration is True
 
 
 @pytest.mark.ci
 def test_adjust_basic_clipping():
     """Test basic clipping functionality."""
-    processor = PropensityScoreProcessor(clipping_threshold=0.1)
+    processor = PSProcessor(clipping_threshold=0.1)
 
     scores = np.array([0.05, 0.2, 0.8, 0.95])
     treatment = np.array([0, 1, 1, 0])
@@ -24,7 +40,7 @@ def test_adjust_basic_clipping():
 @pytest.mark.ci
 def test_adjust_no_clipping_needed():
     """Test when no clipping is needed."""
-    processor = PropensityScoreProcessor(clipping_threshold=0.01)
+    processor = PSProcessor(clipping_threshold=0.01)
 
     scores = np.array([0.2, 0.3, 0.7, 0.8])
     treatment = np.array([0, 1, 1, 0])
@@ -40,7 +56,7 @@ def test_isotonic_calibration_without_cv():
     treatment = np.random.binomial(1, 0.5, size=100)
 
     clipping_threshold = 0.01
-    processor = PropensityScoreProcessor(
+    processor = PSProcessor(
         calibration_method="isotonic",
         cv_calibration=False,
         clipping_threshold=clipping_threshold,
@@ -55,7 +71,7 @@ def test_isotonic_calibration_without_cv():
     np.testing.assert_array_equal(adjusted_ps, expected_ps_manual)
 
 
-@pytest.fixture(scope="module", params=[3, "iterable", "splitter"])
+@pytest.fixture(scope="module", params=[None, 3, "iterable", "splitter"])
 def cv(request):
     return request.param
 
@@ -74,7 +90,7 @@ def test_isotonic_calibration_with_cv(cv):
         cv = cv
 
     clipping_threshold = 0.01
-    processor = PropensityScoreProcessor(
+    processor = PSProcessor(
         calibration_method="isotonic", cv_calibration=True, clipping_threshold=clipping_threshold
     )
 
@@ -89,7 +105,7 @@ def test_isotonic_calibration_with_cv(cv):
 @pytest.mark.ci
 def test_no_calibration():
     """Test that no calibration is applied when calibration_method is None."""
-    processor = PropensityScoreProcessor(calibration_method=None, clipping_threshold=0.01)
+    processor = PSProcessor(calibration_method=None, clipping_threshold=0.01)
 
     scores = np.array([0.2, 0.3, 0.7, 0.8])
     treatment = np.array([0, 1, 1, 0])
diff --git a/doubleml/utils/tests/test_ps_processor_exceptions.py b/doubleml/utils/tests/test_ps_processor_exceptions.py
index eb6d2b7b..0c3971a7 100644
--- a/doubleml/utils/tests/test_ps_processor_exceptions.py
+++ b/doubleml/utils/tests/test_ps_processor_exceptions.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from doubleml.utils import PropensityScoreProcessor
+from doubleml.utils.propensity_score_processing import PSProcessor
 
 # -------------------------------------------------------------------------
 # Tests for __init__ method
@@ -12,48 +12,48 @@
 def test_init_clipping_threshold_type_error():
     """Test that non-float clipping_threshold raises TypeError."""
     with pytest.raises(TypeError, match="clipping_threshold must be a float."):
-        PropensityScoreProcessor(clipping_threshold="0.01")
+        PSProcessor(clipping_threshold="0.01")
 
 
 @pytest.mark.ci
 def test_init_clipping_threshold_value_error():
     """Test that invalid clipping_threshold values raise ValueError."""
     with pytest.raises(ValueError, match="clipping_threshold must be between 0 and 0.5"):
-        PropensityScoreProcessor(clipping_threshold=0.0)  # exactly 0
+        PSProcessor(clipping_threshold=0.0)  # exactly 0
 
     with pytest.raises(ValueError, match="clipping_threshold must be between 0 and 0.5"):
-        PropensityScoreProcessor(clipping_threshold=0.6)  # above 0.5
+        PSProcessor(clipping_threshold=0.6)  # above 0.5
 
 
 @pytest.mark.ci
 def test_init_extreme_threshold_value_error():
     """Test that invalid extreme_threshold values raise ValueError."""
     with pytest.raises(ValueError, match="extreme_threshold must be between 0 and 0.5"):
-        PropensityScoreProcessor(extreme_threshold=0.0)  # exactly 0
+        PSProcessor(extreme_threshold=0.0)  # exactly 0
 
     with pytest.raises(ValueError, match="extreme_threshold must be between 0 and 0.5"):
-        PropensityScoreProcessor(extreme_threshold=0.6)  # above 0.5
+        PSProcessor(extreme_threshold=0.6)  # above 0.5
 
 
 @pytest.mark.ci
 def test_init_calibration_method_value_error():
     """Test that invalid calibration_method raises ValueError."""
     with pytest.raises(ValueError, match="calibration_method must be one of"):
-        PropensityScoreProcessor(calibration_method="invalid_method")
+        PSProcessor(calibration_method="invalid_method")
 
 
 @pytest.mark.ci
 def test_init_cv_calibration_type_error():
     """Test that non-bool cv_calibration raises TypeError."""
     with pytest.raises(TypeError, match="cv_calibration must be of bool type."):
-        PropensityScoreProcessor(cv_calibration="True")
+        PSProcessor(cv_calibration="True")
 
 
 @pytest.mark.ci
 def test_init_cv_calibration_value_error():
     """Test that cv_calibration True with None calibration_method raises ValueError."""
     with pytest.raises(ValueError, match="cv_calibration=True requires a calibration_method."):
-        PropensityScoreProcessor(calibration_method=None, cv_calibration=True)
+        PSProcessor(calibration_method=None, cv_calibration=True)
 
 
 # -------------------------------------------------------------------------
@@ -64,7 +64,7 @@ def test_init_cv_calibration_value_error():
 @pytest.mark.ci
 def test_validate_propensity_scores_type_error_with_learner():
     """Test TypeError includes learner name."""
-    processor = PropensityScoreProcessor()
+    processor = PSProcessor()
     with pytest.raises(TypeError, match="from learner test_learner"):
         processor.adjust_ps([0.1, 0.2], np.array([0, 1]), learner_name="test_learner")
 
@@ -72,7 +72,7 @@ def test_validate_propensity_scores_type_error_with_learner():
 @pytest.mark.ci
 def test_validate_propensity_scores_dimension_error():
     """Test that non-1D propensity scores raise ValueError."""
-    processor = PropensityScoreProcessor()
+    processor = PSProcessor()
     with pytest.raises(ValueError, match="must be 1-dimensional"):
         processor.adjust_ps(np.array([[0.1, 0.2]]), np.array([0, 1]))
 
@@ -80,7 +80,7 @@ def test_validate_propensity_scores_dimension_error():
 @pytest.mark.ci
 def test_validate_propensity_scores_extreme_warning():
     """Test extreme values trigger warnings."""
-    processor = PropensityScoreProcessor(extreme_threshold=0.05)
+    processor = PSProcessor(extreme_threshold=0.05)
     with pytest.warns(UserWarning, match="close to zero or one"):
         processor.adjust_ps(np.array([0.01, 0.99]), np.array([0, 1]))
 
@@ -88,7 +88,7 @@ def test_validate_propensity_scores_extreme_warning():
 @pytest.mark.ci
 def test_validate_treatment_type_error():
     """Test that non-numpy array treatment raises TypeError."""
-    processor = PropensityScoreProcessor()
+    processor = PSProcessor()
     with pytest.raises(TypeError, match="Treatment assignments must be of type np.ndarray"):
         processor.adjust_ps(np.array([0.2, 0.8]), [0, 1])
 
@@ -96,7 +96,7 @@ def test_validate_treatment_type_error():
 @pytest.mark.ci
 def test_validate_treatment_dimension_error():
     """Test that non-1D treatment raises ValueError."""
-    processor = PropensityScoreProcessor()
+    processor = PSProcessor()
     with pytest.raises(ValueError, match="must be 1-dimensional"):
         processor.adjust_ps(np.array([0.2, 0.8]), np.array([[0, 1]]))
 
@@ -104,7 +104,7 @@ def test_validate_treatment_dimension_error():
 @pytest.mark.ci
 def test_validate_treatment_binary_error():
     """Test that non-binary treatment values raise ValueError."""
-    processor = PropensityScoreProcessor()
+    processor = PSProcessor()
     with pytest.raises(ValueError, match="must be binary"):
         processor.adjust_ps(np.array([0.2, 0.8]), np.array([0, 2]))
 
@@ -117,7 +117,7 @@ def test_validate_treatment_binary_error():
 @pytest.mark.ci
 def test_apply_calibration_unsupported_method_error():
     """Test that unsupported calibration method raises ValueError."""
-    processor = PropensityScoreProcessor()
+    processor = PSProcessor()
     processor._calibration_method = "unsupported_method"
 
     propensity_scores = np.array([0.2, 0.8])

From 4c5f850dbc727dcf1d77926f655dbaaffd316563 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Klaa=C3=9Fen?=
 <47529404+SvenKlaassen@users.noreply.github.com>
Date: Thu, 23 Oct 2025 15:35:59 +0200
Subject: [PATCH 11/38] add init_ps_processor function

---
 doubleml/utils/propensity_score_processing.py | 30 +++++++++++++++++++
 doubleml/utils/tests/test_ps_processor.py     | 25 +++++++++++++++-
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index 9a42a1b3..dedb0268 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -16,6 +16,36 @@ class PSProcessorConfig:
     cv_calibration: bool = False
 
 
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+def init_ps_processor(
+    ps_processor_config: Optional[PSProcessorConfig],
+    trimming_rule: Optional[str],
+    trimming_threshold: Optional[float]
+):
+    if trimming_rule is not None:
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config' with 'clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=3
+        )
+    if trimming_threshold is not None:
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config' with 'clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=3
+        )
+    if ps_processor_config is not None:
+        config = ps_processor_config
+    else:
+        config = PSProcessorConfig(
+            clipping_threshold=trimming_threshold if trimming_threshold is not None else 1e-2
+        )
+    processor = PSProcessor.from_config(config)
+    return config, processor
+
+
 class PSProcessor:
     """
     Processor for propensity score calibration, clipping, and validation.
diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py
index 4251e0b8..d48a4b7d 100644
--- a/doubleml/utils/tests/test_ps_processor.py
+++ b/doubleml/utils/tests/test_ps_processor.py
@@ -1,11 +1,34 @@
 from unittest.mock import patch
+import warnings
 
 import numpy as np
 import pytest
 from sklearn.isotonic import IsotonicRegression
 from sklearn.model_selection import KFold, cross_val_predict
 
-from doubleml.utils.propensity_score_processing import PSProcessor, PSProcessorConfig
+from doubleml.utils.propensity_score_processing import (
+    PSProcessorConfig, PSProcessor, init_ps_processor
+)
+
+
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+@pytest.mark.ci
+def test_init_ps_processor_with_deprecated():
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+        cfg, proc = init_ps_processor(None, "truncate", 0.02)
+        assert any("deprecated" in str(warn.message) for warn in w)
+        assert isinstance(cfg, PSProcessorConfig)
+        assert proc.clipping_threshold == 0.02
+
+
+@pytest.mark.ci
+def test_init_ps_processor_with_config():
+    config = PSProcessorConfig(clipping_threshold=0.05)
+    cfg, proc = init_ps_processor(config, None, None)
+    assert isinstance(cfg, PSProcessorConfig)
+    assert isinstance(proc, PSProcessor)
+    assert proc.clipping_threshold == 0.05
 
 
 @pytest.mark.ci

From f284fa464ff97f190ecd6323f37d0ffc07c0b507 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 24 Oct 2025 12:46:07 +0200
Subject: [PATCH 12/38] update irm class and tests with ps_processor

---
 doubleml/irm/irm.py                           | 67 ++++++++++++++-----
 doubleml/irm/tests/_utils_irm_manual.py       |  8 +--
 doubleml/irm/tests/test_irm.py                | 23 ++++---
 doubleml/irm/tests/test_irm_classifier.py     |  9 +--
 .../tests/test_irm_external_predictions.py    | 12 +++-
 .../irm/tests/test_irm_weighted_scores.py     | 13 +++-
 doubleml/irm/tests/test_irm_with_missings.py  | 15 +++--
 doubleml/tests/_utils.py                      |  8 +--
 doubleml/utils/propensity_score_processing.py | 16 ++---
 9 files changed, 116 insertions(+), 55 deletions(-)

diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index 5e2d693b..270fb4b5 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -1,4 +1,5 @@
 import warnings
+from typing import Optional
 
 import numpy as np
 import pandas as pd
@@ -12,17 +13,17 @@
     _check_binary_predictions,
     _check_finite_predictions,
     _check_integer,
-    _check_is_propensity,
     _check_score,
-    _check_trimming,
     _check_weights,
 )
 from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls
-from doubleml.utils._propensity_score import _propensity_score_adjustment, _trimm
+from doubleml.utils._propensity_score import _propensity_score_adjustment
 from doubleml.utils.blp import DoubleMLBLP
 from doubleml.utils.policytree import DoubleMLPolicyTree
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
 class DoubleMLIRM(LinearScoreMixin, DoubleML):
     """Double machine learning for interactive regression models
 
@@ -68,13 +69,16 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
         Indicates whether the inverse probability weights are normalized.
         Default is ``False``.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     draw_sample_splitting : bool
         Indicates whether the sample splitting should be drawn during initialization of the object.
@@ -131,8 +135,9 @@ def __init__(
         score="ATE",
         weights=None,
         normalize_ipw=False,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
@@ -167,9 +172,13 @@ def __init__(
             raise TypeError(
                 "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
             )
+
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
@@ -184,19 +193,44 @@ def normalize_ipw(self):
         """
         return self._normalize_ipw
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     @property
     def weights(self):
@@ -327,9 +361,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 return_models=return_models,
             )
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
-            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
-        # also trimm external predictions
-        m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+
+        m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], self._dml_data.d, cv=smpls)
 
         psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], smpls)
         psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
diff --git a/doubleml/irm/tests/_utils_irm_manual.py b/doubleml/irm/tests/_utils_irm_manual.py
index f5a5bad7..d5bf3dc3 100644
--- a/doubleml/irm/tests/_utils_irm_manual.py
+++ b/doubleml/irm/tests/_utils_irm_manual.py
@@ -20,7 +20,7 @@ def fit_irm(
     g1_params=None,
     m_params=None,
     normalize_ipw=True,
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
 ):
     n_obs = len(y)
 
@@ -44,7 +44,7 @@ def fit_irm(
             g0_params=g0_params,
             g1_params=g1_params,
             m_params=m_params,
-            trimming_threshold=trimming_threshold,
+            clipping_threshold=clipping_threshold,
         )
 
         all_g_hat0.append(g_hat0)
@@ -72,7 +72,7 @@ def fit_irm(
 
 
 def fit_nuisance_irm(
-    y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, trimming_threshold=1e-12
+    y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, clipping_threshold=1e-12
 ):
     ml_g0 = clone(learner_g)
     ml_g1 = clone(learner_g)
@@ -89,7 +89,7 @@ def fit_nuisance_irm(
         g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1)
 
     ml_m = clone(learner_m)
-    m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
+    m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold)
 
     p_hat_list = []
     for _ in smpls:
diff --git a/doubleml/irm/tests/test_irm.py b/doubleml/irm/tests/test_irm.py
index 856c7f59..f3b60ea9 100644
--- a/doubleml/irm/tests/test_irm.py
+++ b/doubleml/irm/tests/test_irm.py
@@ -9,6 +9,7 @@
 
 import doubleml as dml
 from doubleml.irm.datasets import make_irm_data
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 from doubleml.utils.resampling import DoubleMLResampling
 
 from ...tests._utils import draw_smpls
@@ -40,12 +41,12 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.2, 0.15])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold):
+def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, clipping_threshold):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -62,6 +63,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t
     all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d)
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
 
+    ps_processor_config = PSProcessorConfig(clipping_threshold=clipping_threshold)
     np.random.seed(3141)
     dml_irm_obj = dml.DoubleMLIRM(
         obj_dml_data,
@@ -71,7 +73,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t
         score=score,
         normalize_ipw=normalize_ipw,
         draw_sample_splitting=False,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=ps_processor_config,
     )
 
     # synchronize the sample splitting
@@ -88,7 +90,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t
         all_smpls,
         score,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     np.random.seed(3141)
@@ -101,7 +103,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_t
         score=score,
         normalize_ipw=normalize_ipw,
         draw_sample_splitting=False,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=ps_processor_config,
     )
 
     # synchronize the sample splitting
@@ -235,8 +237,8 @@ def test_dml_irm_cate_gate(cov_type):
     # First stage estimation
     ml_g = RandomForestRegressor(n_estimators=10)
     ml_m = RandomForestClassifier(n_estimators=10)
-
-    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_m=ml_m, ml_g=ml_g, trimming_threshold=0.05, n_folds=5)
+    ps_processor_config = PSProcessorConfig(clipping_threshold=0.05)
+    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_m=ml_m, ml_g=ml_g, ps_processor_config=ps_processor_config, n_folds=5)
 
     dml_irm_obj.fit()
     # create a random basis
@@ -279,7 +281,12 @@ def dml_irm_weights_fixture(n_rep):
     # collect data
     np.random.seed(42)
     obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
-    kwargs = {"trimming_threshold": 0.05, "n_folds": 5, "n_rep": n_rep, "draw_sample_splitting": False}
+    kwargs = {
+        "ps_processor_config": PSProcessorConfig(clipping_threshold=0.05),
+        "n_folds": 5,
+        "n_rep": n_rep,
+        "draw_sample_splitting": False,
+    }
 
     smpls = DoubleMLResampling(n_folds=5, n_rep=n_rep, n_obs=n, stratify=obj_dml_data.d).split_samples()
 
diff --git a/doubleml/irm/tests/test_irm_classifier.py b/doubleml/irm/tests/test_irm_classifier.py
index 9389439d..afe8ca65 100644
--- a/doubleml/irm/tests/test_irm_classifier.py
+++ b/doubleml/irm/tests/test_irm_classifier.py
@@ -7,6 +7,7 @@
 from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 from ...tests._utils import draw_smpls
 from ._utils_irm_manual import boot_irm, fit_irm
@@ -37,12 +38,12 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.01, 0.05])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normalize_ipw, trimming_threshold):
+def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normalize_ipw, clipping_threshold):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -65,7 +66,7 @@ def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normali
         n_folds,
         score=score,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold),
         draw_sample_splitting=False,
     )
     # synchronize the sample splitting
@@ -82,7 +83,7 @@ def dml_irm_classifier_fixture(generate_data_irm_binary, learner, score, normali
         all_smpls,
         score,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     res_dict = {
diff --git a/doubleml/irm/tests/test_irm_external_predictions.py b/doubleml/irm/tests/test_irm_external_predictions.py
index 5d0412d5..17bf44dc 100644
--- a/doubleml/irm/tests/test_irm_external_predictions.py
+++ b/doubleml/irm/tests/test_irm_external_predictions.py
@@ -64,7 +64,12 @@ def doubleml_irm_fixture(irm_score, n_rep, set_ml_m_ext, set_ml_g_ext):
     np.random.seed(3141)
     dml_irm_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_irm.coef[0], "coef_ext": dml_irm_ext.coef[0]}
+    res_dict = {
+        "coef_normal": dml_irm.coef[0],
+        "coef_ext": dml_irm_ext.coef[0],
+        "se": dml_irm.se[0],
+        "se_ext": dml_irm_ext.se[0],
+    }
 
     return res_dict
 
@@ -72,3 +77,8 @@ def doubleml_irm_fixture(irm_score, n_rep, set_ml_m_ext, set_ml_g_ext):
 @pytest.mark.ci
 def test_doubleml_irm_coef(doubleml_irm_fixture):
     assert math.isclose(doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_doubleml_irm_se(doubleml_irm_fixture):
+    assert math.isclose(doubleml_irm_fixture["se"], doubleml_irm_fixture["se_ext"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/irm/tests/test_irm_weighted_scores.py b/doubleml/irm/tests/test_irm_weighted_scores.py
index 0592c3d3..56d841cc 100644
--- a/doubleml/irm/tests/test_irm_weighted_scores.py
+++ b/doubleml/irm/tests/test_irm_weighted_scores.py
@@ -6,6 +6,7 @@
 
 import doubleml as dml
 from doubleml.utils._propensity_score import _normalize_ipw
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 
 def old_score_elements(y, d, g_hat0, g_hat1, m_hat, score, normalize_ipw):
@@ -65,12 +66,12 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.2, 0.15])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold):
+def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_ipw, clipping_threshold):
     n_folds = 2
 
     # collect data
@@ -83,7 +84,13 @@ def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_i
 
     np.random.seed(3141)
     dml_irm_obj = dml.DoubleMLIRM(
-        obj_dml_data, ml_g, ml_m, n_folds, score=score, normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        n_folds,
+        score=score,
+        normalize_ipw=normalize_ipw,
+        ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold),
     )
     dml_irm_obj.fit()
 
diff --git a/doubleml/irm/tests/test_irm_with_missings.py b/doubleml/irm/tests/test_irm_with_missings.py
index a6c30cae..838ea98a 100644
--- a/doubleml/irm/tests/test_irm_with_missings.py
+++ b/doubleml/irm/tests/test_irm_with_missings.py
@@ -9,6 +9,7 @@
 from xgboost import XGBClassifier, XGBRegressor
 
 import doubleml as dml
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 from ...tests._utils import draw_smpls
 from ._utils_irm_manual import boot_irm, fit_irm
@@ -43,12 +44,12 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.01, 0.05])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, score, normalize_ipw, trimming_threshold):
+def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, score, normalize_ipw, clipping_threshold):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -66,7 +67,13 @@ def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, sco
     np.random.seed(3141)
     obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, force_all_x_finite="allow-nan")
     dml_irm_obj = dml.DoubleMLIRM(
-        obj_dml_data, ml_g, ml_m, n_folds, score=score, normalize_ipw=normalize_ipw, trimming_threshold=trimming_threshold
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        n_folds,
+        score=score,
+        normalize_ipw=normalize_ipw,
+        ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold),
     )
     # synchronize the sample splitting
     dml_irm_obj.set_sample_splitting(all_smpls=all_smpls)
@@ -83,7 +90,7 @@ def dml_irm_w_missing_fixture(generate_data_irm_w_missings, learner_xgboost, sco
         all_smpls,
         score,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     res_dict = {
diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py
index 907d03d1..60416246 100644
--- a/doubleml/tests/_utils.py
+++ b/doubleml/tests/_utils.py
@@ -44,7 +44,7 @@ def fit_predict(y, x, ml_model, params, smpls, train_cond=None):
     return y_hat
 
 
-def fit_predict_proba(y, x, ml_model, params, smpls, trimming_threshold=0, train_cond=None):
+def fit_predict_proba(y, x, ml_model, params, smpls, clipping_threshold=0, train_cond=None):
     y_hat = []
     for idx, (train_index, test_index) in enumerate(smpls):
         if params is not None:
@@ -55,9 +55,9 @@ def fit_predict_proba(y, x, ml_model, params, smpls, trimming_threshold=0, train
             train_index_cond = np.intersect1d(train_cond, train_index)
             preds = ml_model.fit(x[train_index_cond], y[train_index_cond]).predict_proba(x[test_index])[:, 1]
 
-        if trimming_threshold > 0:
-            preds[preds < trimming_threshold] = trimming_threshold
-            preds[preds > 1 - trimming_threshold] = 1 - trimming_threshold
+        if clipping_threshold > 0:
+            preds[preds < clipping_threshold] = clipping_threshold
+            preds[preds > 1 - clipping_threshold] = 1 - clipping_threshold
         y_hat.append(preds)
 
     return y_hat
diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index dedb0268..3bec652f 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -18,30 +18,26 @@ class PSProcessorConfig:
 
 # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
 def init_ps_processor(
-    ps_processor_config: Optional[PSProcessorConfig],
-    trimming_rule: Optional[str],
-    trimming_threshold: Optional[float]
+    ps_processor_config: Optional[PSProcessorConfig], trimming_rule: Optional[str], trimming_threshold: Optional[float]
 ):
-    if trimming_rule is not None:
+    if trimming_rule != "truncate":
         warnings.warn(
             "'trimming_rule' is deprecated and will be removed in a future version. "
             "Use 'ps_processor_config' with 'clipping_threshold' instead.",
             DeprecationWarning,
-            stacklevel=3
+            stacklevel=3,
         )
-    if trimming_threshold is not None:
+    if trimming_threshold != 1e-2:
         warnings.warn(
             "'trimming_threshold' is deprecated and will be removed in a future version. "
             "Use 'ps_processor_config' with 'clipping_threshold' instead.",
             DeprecationWarning,
-            stacklevel=3
+            stacklevel=3,
         )
     if ps_processor_config is not None:
         config = ps_processor_config
     else:
-        config = PSProcessorConfig(
-            clipping_threshold=trimming_threshold if trimming_threshold is not None else 1e-2
-        )
+        config = PSProcessorConfig(clipping_threshold=trimming_threshold if trimming_threshold is not None else 1e-2)
     processor = PSProcessor.from_config(config)
     return config, processor
 

From dac26ece4853d98eaf12fe34d02f3bad593c2e6a Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 24 Oct 2025 12:47:32 +0200
Subject: [PATCH 13/38] add test for ps_processor with irm

---
 doubleml/irm/tests/test_irm_ps_processor.py | 59 +++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 doubleml/irm/tests/test_irm_ps_processor.py

diff --git a/doubleml/irm/tests/test_irm_ps_processor.py b/doubleml/irm/tests/test_irm_ps_processor.py
new file mode 100644
index 00000000..efd5fe0c
--- /dev/null
+++ b/doubleml/irm/tests/test_irm_ps_processor.py
@@ -0,0 +1,59 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml import DoubleMLData, DoubleMLIRM
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
+
+
+@pytest.mark.parametrize(
+    "ps_config",
+    [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ],
+)
+def test_irm_ml_m_predictions_ps_processor(generate_data_irm, ps_config):
+    x, y, d = generate_data_irm
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+    np.random.seed(3141)
+    dml_irm = DoubleMLIRM(
+        obj_dml_data=dml_data,
+        ml_g=LinearRegression(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=ps_config,
+        n_rep=1,
+    )
+    dml_irm.fit(store_predictions=True)
+    ml_m_preds = dml_irm.predictions["ml_m"][:, 0, 0]
+    # Just check that predictions are within [clipping_threshold, 1-clipping_threshold]
+    assert np.all(ml_m_preds >= ps_config.clipping_threshold)
+    assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold)
+
+
+def test_irm_ml_m_predictions_ps_processor_differences(generate_data_irm):
+    x, y, d = generate_data_irm
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+    np.random.seed(3141)
+    configs = [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ]
+    preds = []
+    for cfg in configs:
+        dml_irm = DoubleMLIRM(
+            obj_dml_data=dml_data,
+            ml_g=LinearRegression(),
+            ml_m=LogisticRegression(),
+            ps_processor_config=cfg,
+            n_rep=1,
+        )
+        dml_irm.fit(store_predictions=True)
+        preds.append(dml_irm.predictions["ml_m"][:, 0, 0])
+    # Check that at least two configurations yield different predictions (element-wise)
+    diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))]
+    assert any(diffs)

From 5ec4648e23775a8cc469aaec50c4250fd5e22c5f Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 24 Oct 2025 15:52:47 +0200
Subject: [PATCH 14/38] update apo and tests for ps_processor_config

---
 doubleml/irm/apo.py                           | 64 ++++++++++++++-----
 doubleml/irm/tests/_utils_apo_manual.py       |  8 +--
 doubleml/irm/tests/test_apo.py                | 18 ++++--
 doubleml/irm/tests/test_apo_classifier.py     |  9 +--
 doubleml/irm/tests/test_apo_exceptions.py     | 16 -----
 .../irm/tests/test_apo_weighted_scores.py     |  7 +-
 6 files changed, 73 insertions(+), 49 deletions(-)

diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py
index 0de311bc..be7e327e 100644
--- a/doubleml/irm/apo.py
+++ b/doubleml/irm/apo.py
@@ -1,4 +1,5 @@
 import warnings
+from typing import Optional
 
 import numpy as np
 import pandas as pd
@@ -9,14 +10,13 @@
 from doubleml.utils._checks import (
     _check_binary_predictions,
     _check_finite_predictions,
-    _check_is_propensity,
     _check_score,
-    _check_trimming,
     _check_weights,
 )
 from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls
-from doubleml.utils._propensity_score import _propensity_score_adjustment, _trimm
+from doubleml.utils._propensity_score import _propensity_score_adjustment
 from doubleml.utils.blp import DoubleMLBLP
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
 class DoubleMLAPO(LinearScoreMixin, DoubleML):
@@ -66,13 +66,16 @@ class DoubleMLAPO(LinearScoreMixin, DoubleML):
         Indicates whether the inverse probability weights are normalized.
         Default is ``False``.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     draw_sample_splitting : bool
         Indicates whether the sample splitting should be drawn during initialization of the object.
@@ -91,8 +94,9 @@ def __init__(
         score="APO",
         weights=None,
         normalize_ipw=False,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
@@ -131,9 +135,13 @@ def __init__(
             raise TypeError(
                 "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
             )
+
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
@@ -163,19 +171,44 @@ def normalize_ipw(self):
         """
         return self._normalize_ipw
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     @property
     def weights(self):
@@ -288,10 +321,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 return_models=return_models,
             )
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
-            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
 
         # also trimm external predictions
-        m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+        m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], self.treated, cv=smpls)
 
         psi_a, psi_b = self._score_elements(y, treated, g_hat_d_lvl0["preds"], g_hat_d_lvl1["preds"], m_hat["preds"], smpls)
         psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
diff --git a/doubleml/irm/tests/_utils_apo_manual.py b/doubleml/irm/tests/_utils_apo_manual.py
index 0ec84417..8abcb029 100644
--- a/doubleml/irm/tests/_utils_apo_manual.py
+++ b/doubleml/irm/tests/_utils_apo_manual.py
@@ -21,7 +21,7 @@ def fit_apo(
     g1_params=None,
     m_params=None,
     normalize_ipw=False,
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
 ):
     n_obs = len(y)
     treated = d == treatment_level
@@ -46,7 +46,7 @@ def fit_apo(
             g0_params=g0_params,
             g1_params=g1_params,
             m_params=m_params,
-            trimming_threshold=trimming_threshold,
+            clipping_threshold=clipping_threshold,
         )
 
         all_g_hat0.append(g_hat0)
@@ -83,7 +83,7 @@ def fit_nuisance_apo(
     g0_params=None,
     g1_params=None,
     m_params=None,
-    trimming_threshold=1e-12,
+    clipping_threshold=1e-12,
 ):
     ml_g0 = clone(learner_g)
     ml_g1 = clone(learner_g)
@@ -102,7 +102,7 @@ def fit_nuisance_apo(
         g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1)
 
     ml_m = clone(learner_m)
-    m_hat_list = fit_predict_proba(treated, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
+    m_hat_list = fit_predict_proba(treated, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold)
 
     return g_hat0_list, g_hat1_list, m_hat_list
 
diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py
index 7558b7c1..1b41705f 100644
--- a/doubleml/irm/tests/test_apo.py
+++ b/doubleml/irm/tests/test_apo.py
@@ -9,6 +9,7 @@
 
 import doubleml as dml
 from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 from ...tests._utils import draw_smpls
 from ._utils_apo_manual import boot_apo, fit_apo, fit_sensitivity_elements_apo
@@ -34,7 +35,7 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.2, 0.15])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -44,7 +45,7 @@ def treatment_level(request):
 
 
 @pytest.fixture(scope="module")
-def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level):
+def dml_apo_fixture(learner, normalize_ipw, clipping_threshold, treatment_level):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -76,7 +77,7 @@ def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level)
         score="APO",
         normalize_ipw=normalize_ipw,
         draw_sample_splitting=False,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold),
     )
 
     # synchronize the sample splitting
@@ -94,7 +95,7 @@ def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level)
         all_smpls=all_smpls,
         score="APO",
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     np.random.seed(3141)
@@ -108,7 +109,7 @@ def dml_apo_fixture(learner, normalize_ipw, trimming_threshold, treatment_level)
         score="APO",
         normalize_ipw=normalize_ipw,
         draw_sample_splitting=False,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold),
     )
 
     # synchronize the sample splitting
@@ -242,7 +243,12 @@ def test_dml_apo_capo_gapo(treatment_level, cov_type):
     ml_m = RandomForestClassifier(n_estimators=10)
 
     dml_obj = dml.DoubleMLAPO(
-        obj_dml_data, ml_m=ml_m, ml_g=ml_g, treatment_level=treatment_level, trimming_threshold=0.05, n_folds=5
+        obj_dml_data,
+        ml_m=ml_m,
+        ml_g=ml_g,
+        treatment_level=treatment_level,
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
+        n_folds=5,
     )
 
     dml_obj.fit()
diff --git a/doubleml/irm/tests/test_apo_classifier.py b/doubleml/irm/tests/test_apo_classifier.py
index 042f3fe8..0b471956 100644
--- a/doubleml/irm/tests/test_apo_classifier.py
+++ b/doubleml/irm/tests/test_apo_classifier.py
@@ -7,6 +7,7 @@
 from sklearn.linear_model import LogisticRegression
 
 import doubleml as dml
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 from ...tests._utils import draw_smpls
 from ._utils_apo_manual import boot_apo, fit_apo
@@ -32,12 +33,12 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.01, 0.05])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, trimming_threshold):
+def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw, clipping_threshold):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -64,7 +65,7 @@ def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw,
         n_folds=n_folds,
         score=score,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold),
         draw_sample_splitting=False,
     )
     # synchronize the sample splitting
@@ -82,7 +83,7 @@ def dml_apo_classifier_fixture(generate_data_irm_binary, learner, normalize_ipw,
         all_smpls,
         score,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     res_dict = {
diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py
index 5991ee5e..f428de6b 100644
--- a/doubleml/irm/tests/test_apo_exceptions.py
+++ b/doubleml/irm/tests/test_apo_exceptions.py
@@ -76,22 +76,6 @@ def test_apo_exception_scores():
         _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, score="MAR")
 
 
-@pytest.mark.ci
-def test_apo_exception_trimming_rule():
-    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="discard")
-
-    # check the trimming_threshold exceptions
-    msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="truncate", trimming_threshold="0.1")
-
-    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPO(dml_data, ml_g, ml_m, treatment_level=0, trimming_rule="truncate", trimming_threshold=0.6)
-
-
 @pytest.mark.ci
 def test_apo_exception_ipw_normalization():
     msg = "Normalization indicator has to be boolean. Object of type <class 'int'> passed."
diff --git a/doubleml/irm/tests/test_apo_weighted_scores.py b/doubleml/irm/tests/test_apo_weighted_scores.py
index 63687ebd..b5ba8a32 100644
--- a/doubleml/irm/tests/test_apo_weighted_scores.py
+++ b/doubleml/irm/tests/test_apo_weighted_scores.py
@@ -5,6 +5,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 from ...tests._utils import draw_smpls
 
@@ -39,7 +40,7 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.2, 0.15])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -49,7 +50,7 @@ def treatment_level(request):
 
 
 @pytest.fixture(scope="module")
-def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normalize_ipw, trimming_threshold, treatment_level):
+def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normalize_ipw, clipping_threshold, treatment_level):
     n_folds = 2
 
     # collect data
@@ -67,7 +68,7 @@ def weighted_apo_score_fixture(generate_data_irm, learner, score, n_rep, normali
         "n_rep": n_rep,
         "score": score,
         "normalize_ipw": normalize_ipw,
-        "trimming_threshold": trimming_threshold,
+        "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold),
         "draw_sample_splitting": False,
     }
 

From 8e57bc7303c88aa923ce366f3226058226cae527 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 24 Oct 2025 16:37:05 +0200
Subject: [PATCH 15/38] add ps_processor to apos

---
 doubleml/irm/apos.py                          | 48 +++++++++++++++----
 doubleml/irm/tests/_utils_apos_manual.py      |  8 ++--
 doubleml/irm/tests/test_apos.py               | 11 ++---
 doubleml/irm/tests/test_apos_classfier.py     | 11 ++---
 doubleml/irm/tests/test_apos_exceptions.py    | 16 -------
 .../irm/tests/test_apos_weighted_scores.py    |  8 ++--
 doubleml/irm/tests/test_irm_vs_apos.py        | 11 +++--
 7 files changed, 63 insertions(+), 50 deletions(-)

diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index 5a6d41fc..23e7085e 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -1,5 +1,7 @@
 import copy
+import warnings
 from collections.abc import Iterable
+from typing import Optional
 
 import numpy as np
 import pandas as pd
@@ -11,10 +13,11 @@
 from doubleml.double_ml_framework import concat
 from doubleml.double_ml_sampling_mixins import SampleSplittingMixin
 from doubleml.irm.apo import DoubleMLAPO
-from doubleml.utils._checks import _check_score, _check_trimming, _check_weights
+from doubleml.utils._checks import _check_score, _check_weights
 from doubleml.utils._descriptive import generate_summary
 from doubleml.utils._sensitivity import _compute_sensitivity_bias
 from doubleml.utils.gain_statistics import gain_statistics
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
 class DoubleMLAPOS(SampleSplittingMixin):
@@ -31,8 +34,9 @@ def __init__(
         score="APO",
         weights=None,
         normalize_ipw=False,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
     ):
         self._dml_data = obj_dml_data
@@ -58,10 +62,12 @@ def __init__(
         # initialize framework which is constructed after the fit method is called
         self._framework = None
 
-        # initialize and check trimming
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         if not isinstance(self.normalize_ipw, bool):
             raise TypeError(
@@ -131,19 +137,44 @@ def normalize_ipw(self):
         """
         return self._normalize_ipw
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     @property
     def weights(self):
@@ -819,8 +850,7 @@ def _initialize_models(self):
             "n_folds": self.n_folds,
             "n_rep": self.n_rep,
             "weights": self.weights,
-            "trimming_rule": self.trimming_rule,
-            "trimming_threshold": self.trimming_threshold,
+            "ps_processor_config": self.ps_processor_config,
             "normalize_ipw": self.normalize_ipw,
             "draw_sample_splitting": False,
         }
diff --git a/doubleml/irm/tests/_utils_apos_manual.py b/doubleml/irm/tests/_utils_apos_manual.py
index 88fc59c2..d1eb575f 100644
--- a/doubleml/irm/tests/_utils_apos_manual.py
+++ b/doubleml/irm/tests/_utils_apos_manual.py
@@ -1,6 +1,8 @@
 import numpy as np
 from sklearn.base import clone
 
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
+
 from ...data.base_data import DoubleMLData
 from ...tests._utils_boot import draw_weights
 from ..apo import DoubleMLAPO
@@ -16,9 +18,8 @@ def fit_apos(
     all_smpls,
     score,
     n_rep=1,
-    trimming_rule="truncate",
     normalize_ipw=False,
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
 ):
     n_obs = len(y)
     n_treatments = len(treatment_levels)
@@ -39,8 +40,7 @@ def fit_apos(
             n_folds=n_folds,
             n_rep=n_rep,
             score=score,
-            trimming_rule=trimming_rule,
-            trimming_threshold=trimming_threshold,
+            ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold),
             normalize_ipw=normalize_ipw,
             draw_sample_splitting=False,
         )
diff --git a/doubleml/irm/tests/test_apos.py b/doubleml/irm/tests/test_apos.py
index 55a48ced..a3897352 100644
--- a/doubleml/irm/tests/test_apos.py
+++ b/doubleml/irm/tests/test_apos.py
@@ -7,6 +7,7 @@
 
 import doubleml as dml
 from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 from ...tests._utils import confint_manual
 from ._utils_apos_manual import boot_apos, fit_apos
@@ -90,7 +91,7 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.2, 0.15])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -100,7 +101,7 @@ def treatment_levels(request):
 
 
 @pytest.fixture(scope="module")
-def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels):
+def dml_apos_fixture(learner, n_rep, normalize_ipw, clipping_threshold, treatment_levels):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -124,8 +125,7 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen
         "n_rep": n_rep,
         "score": "APO",
         "normalize_ipw": normalize_ipw,
-        "trimming_rule": "truncate",
-        "trimming_threshold": trimming_threshold,
+        "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold),
     }
 
     unfitted_apos_model = dml.DoubleMLAPOS(**input_args)
@@ -151,9 +151,8 @@ def dml_apos_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatmen
         all_smpls=all_smpls,
         n_rep=n_rep,
         score="APO",
-        trimming_rule="truncate",
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     ci = dml_obj.confint(joint=False, level=0.95)
diff --git a/doubleml/irm/tests/test_apos_classfier.py b/doubleml/irm/tests/test_apos_classfier.py
index f9cfc10c..a044a979 100644
--- a/doubleml/irm/tests/test_apos_classfier.py
+++ b/doubleml/irm/tests/test_apos_classfier.py
@@ -7,6 +7,7 @@
 
 import doubleml as dml
 from doubleml.irm.datasets import make_irm_data_discrete_treatments
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 from ...tests._utils import confint_manual
 from ._utils_apos_manual import boot_apos, fit_apos
@@ -37,7 +38,7 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.2, 0.15])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -47,7 +48,7 @@ def treatment_levels(request):
 
 
 @pytest.fixture(scope="module")
-def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshold, treatment_levels):
+def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, clipping_threshold, treatment_levels):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -71,8 +72,7 @@ def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshol
         "n_rep": n_rep,
         "score": "APO",
         "normalize_ipw": normalize_ipw,
-        "trimming_rule": "truncate",
-        "trimming_threshold": trimming_threshold,
+        "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold),
     }
 
     unfitted_apos_model = dml.DoubleMLAPOS(**input_args)
@@ -97,9 +97,8 @@ def dml_apos_classifier_fixture(learner, n_rep, normalize_ipw, trimming_threshol
         treatment_levels=treatment_levels,
         all_smpls=all_smpls,
         score="APO",
-        trimming_rule="truncate",
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     ci = dml_obj.confint(joint=False, level=0.95)
diff --git a/doubleml/irm/tests/test_apos_exceptions.py b/doubleml/irm/tests/test_apos_exceptions.py
index 93274cee..f5b741ef 100644
--- a/doubleml/irm/tests/test_apos_exceptions.py
+++ b/doubleml/irm/tests/test_apos_exceptions.py
@@ -59,22 +59,6 @@ def test_apos_exception_scores():
         _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, score="MAR")
 
 
-@pytest.mark.ci
-def test_apos_exception_trimming_rule():
-    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="discard")
-
-    # check the trimming_threshold exceptions
-    msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="truncate", trimming_threshold="0.1")
-
-    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLAPOS(dml_data, ml_g, ml_m, treatment_levels=0, trimming_rule="truncate", trimming_threshold=0.6)
-
-
 @pytest.mark.ci
 def test_apos_exception_ipw_normalization():
     msg = "Normalization indicator has to be boolean. Object of type <class 'int'> passed."
diff --git a/doubleml/irm/tests/test_apos_weighted_scores.py b/doubleml/irm/tests/test_apos_weighted_scores.py
index 6d0a7f65..e400532d 100644
--- a/doubleml/irm/tests/test_apos_weighted_scores.py
+++ b/doubleml/irm/tests/test_apos_weighted_scores.py
@@ -7,6 +7,7 @@
 
 import doubleml as dml
 from doubleml.irm.datasets import make_irm_data_discrete_treatments
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 
 @pytest.fixture(
@@ -39,7 +40,7 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.2, 0.15])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -49,7 +50,7 @@ def treatment_levels(request):
 
 
 @pytest.fixture(scope="module")
-def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, trimming_threshold, treatment_levels):
+def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, clipping_threshold, treatment_levels):
     n_obs = 500
     n_folds = 2
 
@@ -71,8 +72,7 @@ def weighted_apos_score_fixture(learner, score, n_rep, normalize_ipw, trimming_t
         "n_rep": n_rep,
         "score": score,
         "normalize_ipw": normalize_ipw,
-        "trimming_threshold": trimming_threshold,
-        "trimming_rule": "truncate",
+        "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold),
     }
 
     np.random.seed(42)
diff --git a/doubleml/irm/tests/test_irm_vs_apos.py b/doubleml/irm/tests/test_irm_vs_apos.py
index a91c8c05..aab0e09e 100644
--- a/doubleml/irm/tests/test_irm_vs_apos.py
+++ b/doubleml/irm/tests/test_irm_vs_apos.py
@@ -8,6 +8,7 @@
 
 import doubleml as dml
 from doubleml.utils._propensity_score import _propensity_score_adjustment
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 
 @pytest.fixture(
@@ -35,12 +36,12 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.2, 0.15])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold):
+def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, clipping_threshold):
 
     # collect data
     (x, y, d) = generate_data_irm
@@ -54,7 +55,7 @@ def dml_irm_apos_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimm
     kwargs = {
         "n_folds": n_folds,
         "n_rep": n_rep,
-        "trimming_threshold": trimming_threshold,
+        "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold),
         "normalize_ipw": normalize_ipw,
     }
 
@@ -159,7 +160,7 @@ def test_apos_vs_irm_sensitivity(dml_irm_apos_fixture):
 
 
 @pytest.fixture(scope="module")
-def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_ipw, trimming_threshold):
+def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_ipw, clipping_threshold):
 
     # collect data
     (x, y, d) = generate_data_irm
@@ -173,7 +174,7 @@ def dml_irm_apos_weighted_fixture(generate_data_irm, learner, n_rep, normalize_i
     kwargs = {
         "n_folds": n_folds,
         "n_rep": n_rep,
-        "trimming_threshold": trimming_threshold,
+        "ps_processor_config": PSProcessorConfig(clipping_threshold=clipping_threshold),
         "normalize_ipw": normalize_ipw,
     }
 

From 0493024298c721ac3eaa7aa7342c9a41d0c07644 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Fri, 24 Oct 2025 16:40:20 +0200
Subject: [PATCH 16/38] add ps_processor test for apo

---
 doubleml/irm/tests/test_apo_ps_processor.py | 72 +++++++++++++++++++++
 doubleml/irm/tests/test_irm_ps_processor.py |  2 +
 2 files changed, 74 insertions(+)
 create mode 100644 doubleml/irm/tests/test_apo_ps_processor.py

diff --git a/doubleml/irm/tests/test_apo_ps_processor.py b/doubleml/irm/tests/test_apo_ps_processor.py
new file mode 100644
index 00000000..d70f2553
--- /dev/null
+++ b/doubleml/irm/tests/test_apo_ps_processor.py
@@ -0,0 +1,72 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml import DoubleMLAPO, DoubleMLData
+from doubleml.irm.datasets import make_irm_data_discrete_treatments
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
+
+
+@pytest.fixture(scope="module")
+def generate_data_apo():
+    np.random.seed(3141)
+    data = make_irm_data_discrete_treatments(n_obs=200)
+    x = data["x"]
+    y = data["y"]
+    d = data["d"]
+    return x, y, d
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "ps_config",
+    [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ],
+)
+def test_apo_ml_m_predictions_ps_processor(generate_data_apo, ps_config):
+    x, y, d = generate_data_apo
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+    np.random.seed(3141)
+    dml_apo = DoubleMLAPO(
+        obj_dml_data=dml_data,
+        ml_g=LinearRegression(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=ps_config,
+        n_rep=1,
+        treatment_level=0,
+    )
+    dml_apo.fit(store_predictions=True)
+    ml_m_preds = dml_apo.predictions["ml_m"][:, 0, 0]
+    assert np.all(ml_m_preds >= ps_config.clipping_threshold)
+    assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold)
+
+
+@pytest.mark.ci
+def test_apo_ml_m_predictions_ps_processor_differences(generate_data_apo):
+    x, y, d = generate_data_apo
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+    np.random.seed(3141)
+    configs = [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ]
+    preds = []
+    for cfg in configs:
+        dml_apo = DoubleMLAPO(
+            obj_dml_data=dml_data,
+            ml_g=LinearRegression(),
+            ml_m=LogisticRegression(),
+            ps_processor_config=cfg,
+            n_rep=1,
+            treatment_level=0,
+        )
+        dml_apo.fit(store_predictions=True)
+        preds.append(dml_apo.predictions["ml_m"][:, 0, 0])
+    diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))]
+    assert any(diffs)
diff --git a/doubleml/irm/tests/test_irm_ps_processor.py b/doubleml/irm/tests/test_irm_ps_processor.py
index efd5fe0c..33ae66f4 100644
--- a/doubleml/irm/tests/test_irm_ps_processor.py
+++ b/doubleml/irm/tests/test_irm_ps_processor.py
@@ -6,6 +6,7 @@
 from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 
+@pytest.mark.ci
 @pytest.mark.parametrize(
     "ps_config",
     [
@@ -33,6 +34,7 @@ def test_irm_ml_m_predictions_ps_processor(generate_data_irm, ps_config):
     assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold)
 
 
+@pytest.mark.ci
 def test_irm_ml_m_predictions_ps_processor_differences(generate_data_irm):
     x, y, d = generate_data_irm
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)

From 5b60bbf6cb991475a7eca4aa71f18645955015a9 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 08:00:45 +0100
Subject: [PATCH 17/38] update IIVM with psprocessor

---
 doubleml/irm/iivm.py                       | 67 ++++++++++++++++------
 doubleml/irm/tests/_utils_iivm_manual.py   |  8 +--
 doubleml/irm/tests/test_iivm.py            |  9 +--
 doubleml/irm/tests/test_iivm_classifier.py |  8 +--
 doubleml/irm/tests/test_iivm_subgroups.py  |  9 +--
 5 files changed, 69 insertions(+), 32 deletions(-)

diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py
index 4eaa1d50..7f330cfb 100644
--- a/doubleml/irm/iivm.py
+++ b/doubleml/irm/iivm.py
@@ -1,3 +1,6 @@
+import warnings
+from typing import Optional
+
 import numpy as np
 from scipy.stats import norm
 from sklearn.utils import check_X_y
@@ -11,10 +14,10 @@
     _check_finite_predictions,
     _check_is_propensity,
     _check_score,
-    _check_trimming,
 )
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _solve_quadratic_inequality
-from doubleml.utils._propensity_score import _normalize_ipw, _trimm
+from doubleml.utils._propensity_score import _normalize_ipw
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
 class DoubleMLIIVM(LinearScoreMixin, DoubleML):
@@ -64,13 +67,16 @@ class DoubleMLIIVM(LinearScoreMixin, DoubleML):
         Indicates whether the inverse probability weights are normalized.
         Default is ``False``.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     draw_sample_splitting : bool
         Indicates whether the sample splitting should be drawn during initialization of the object.
@@ -135,8 +141,9 @@ def __init__(
         score="LATE",
         subgroups=None,
         normalize_ipw=False,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
@@ -172,9 +179,13 @@ def __init__(
             raise TypeError(
                 "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
             )
+
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         if subgroups is None:
             # this is the default for subgroups; via None to prevent a mutable default argument
@@ -213,19 +224,44 @@ def normalize_ipw(self):
         """
         return self._normalize_ipw
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     def _initialize_ml_nuisance_params(self):
         valid_learner = ["ml_g0", "ml_g1", "ml_m", "ml_r0", "ml_r1"]
@@ -330,9 +366,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 return_models=return_models,
             )
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
-            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
-        # also trimm external predictions
-        m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+
+        m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], z, cv=smpls)
 
         # nuisance r
         r0 = external_predictions["ml_r0"] is not None
diff --git a/doubleml/irm/tests/_utils_iivm_manual.py b/doubleml/irm/tests/_utils_iivm_manual.py
index b61526b2..601604ee 100644
--- a/doubleml/irm/tests/_utils_iivm_manual.py
+++ b/doubleml/irm/tests/_utils_iivm_manual.py
@@ -23,7 +23,7 @@ def fit_iivm(
     r0_params=None,
     r1_params=None,
     normalize_ipw=True,
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
     always_takers=True,
     never_takers=True,
 ):
@@ -53,7 +53,7 @@ def fit_iivm(
             m_params=m_params,
             r0_params=r0_params,
             r1_params=r1_params,
-            trimming_threshold=trimming_threshold,
+            clipping_threshold=clipping_threshold,
             always_takers=always_takers,
             never_takers=never_takers,
         )
@@ -98,7 +98,7 @@ def fit_nuisance_iivm(
     m_params=None,
     r0_params=None,
     r1_params=None,
-    trimming_threshold=1e-12,
+    clipping_threshold=1e-12,
     always_takers=True,
     never_takers=True,
 ):
@@ -117,7 +117,7 @@ def fit_nuisance_iivm(
         g_hat1_list = fit_predict(y, x, ml_g1, g1_params, smpls, train_cond=train_cond1)
 
     ml_m = clone(learner_m)
-    m_hat_list = fit_predict_proba(z, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
+    m_hat_list = fit_predict_proba(z, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold)
 
     ml_r0 = clone(learner_r)
     if always_takers:
diff --git a/doubleml/irm/tests/test_iivm.py b/doubleml/irm/tests/test_iivm.py
index 169f4175..1c049e26 100644
--- a/doubleml/irm/tests/test_iivm.py
+++ b/doubleml/irm/tests/test_iivm.py
@@ -34,12 +34,12 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.01, 0.05])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming_threshold):
+def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, clipping_threshold):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 491
@@ -72,7 +72,8 @@ def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming
         n_folds,
         draw_sample_splitting=False,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
+        score=score,
     )
     # synchronize the sample splitting
     dml_iivm_obj.set_sample_splitting(all_smpls=all_smpls)
@@ -91,7 +92,7 @@ def dml_iivm_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming
         all_smpls,
         score,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     res_dict = {
diff --git a/doubleml/irm/tests/test_iivm_classifier.py b/doubleml/irm/tests/test_iivm_classifier.py
index 983c34a7..78096031 100644
--- a/doubleml/irm/tests/test_iivm_classifier.py
+++ b/doubleml/irm/tests/test_iivm_classifier.py
@@ -34,12 +34,12 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.01, 0.05])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, normalize_ipw, trimming_threshold):
+def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, normalize_ipw, clipping_threshold):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 491
@@ -63,7 +63,7 @@ def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, norma
         ml_r,
         n_folds,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
         draw_sample_splitting=False,
     )
     # synchronize the sample splitting
@@ -83,7 +83,7 @@ def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, norma
         all_smpls,
         score,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     res_dict = {
diff --git a/doubleml/irm/tests/test_iivm_subgroups.py b/doubleml/irm/tests/test_iivm_subgroups.py
index 906ed897..8633f6c8 100644
--- a/doubleml/irm/tests/test_iivm_subgroups.py
+++ b/doubleml/irm/tests/test_iivm_subgroups.py
@@ -30,7 +30,7 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.01])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -47,7 +47,7 @@ def subgroups(request):
 
 
 @pytest.fixture(scope="module")
-def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw, trimming_threshold, subgroups):
+def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw, clipping_threshold, subgroups):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 491
@@ -73,9 +73,10 @@ def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw
         ml_m,
         ml_r,
         n_folds,
+        score=score,
         subgroups=subgroups,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
         draw_sample_splitting=False,
     )
     # synchronize the sample splitting
@@ -99,7 +100,7 @@ def dml_iivm_subgroups_fixture(generate_data_iivm, learner, score, normalize_ipw
         all_smpls,
         score,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
         always_takers=subgroups["always_takers"],
         never_takers=subgroups["never_takers"],
     )

From a30f1ae4db597e548b5219e0905104cd323af141 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 08:28:05 +0100
Subject: [PATCH 18/38] add ps_processor to ssm

---
 doubleml/irm/ssm.py                         | 62 +++++++++++++++-----
 doubleml/irm/tests/_utils_ssm_manual.py     | 10 ++--
 doubleml/irm/tests/test_ssm.py              | 28 +++++++--
 doubleml/irm/tests/test_ssm_exceptions.py   | 16 ------
 doubleml/irm/tests/test_ssm_ps_processor.py | 63 +++++++++++++++++++++
 5 files changed, 138 insertions(+), 41 deletions(-)
 create mode 100644 doubleml/irm/tests/test_ssm_ps_processor.py

diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py
index 00a49191..fdc2ab6e 100644
--- a/doubleml/irm/ssm.py
+++ b/doubleml/irm/ssm.py
@@ -1,5 +1,6 @@
 import copy
 import warnings
+from typing import Optional
 
 import numpy as np
 from sklearn.base import clone
@@ -9,11 +10,12 @@
 from doubleml.data.ssm_data import DoubleMLSSMData
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_score_mixins import LinearScoreMixin
-from doubleml.utils._checks import _check_finite_predictions, _check_score, _check_trimming
+from doubleml.utils._checks import _check_finite_predictions, _check_score
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d, _predict_zero_one_propensity
-from doubleml.utils._propensity_score import _trimm
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
 class DoubleMLSSM(LinearScoreMixin, DoubleML):
     """Double machine learning for sample selection models
 
@@ -50,13 +52,16 @@ class DoubleMLSSM(LinearScoreMixin, DoubleML):
         Indicates whether the inverse probability weights are normalized.
         Default is ``False``.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     draw_sample_splitting : bool
         Indicates whether the sample splitting should be drawn during initialization of the object.
@@ -109,8 +114,9 @@ def __init__(
         n_rep=1,
         score="missing-at-random",
         normalize_ipw=False,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
@@ -119,9 +125,12 @@ def __init__(
         self._sensitivity_implemented = False
         self._normalize_ipw = normalize_ipw
 
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         self._check_data(self._dml_data)
         self._is_cluster_data = self._dml_data.is_cluster_data
@@ -165,19 +174,44 @@ def normalize_ipw(self):
         """
         return self._normalize_ipw
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     def _initialize_ml_nuisance_params(self):
         valid_learner = ["ml_g_d0", "ml_g_d1", "ml_pi", "ml_m"]
@@ -369,7 +403,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     pi_hat["models"] = fitted_models["ml_pi"]
                     m_hat["models"] = fitted_models["ml_m"]
 
-        m_hat["preds"] = _trimm(m_hat["preds"], self._trimming_rule, self._trimming_threshold)
+        m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls)
 
         # treatment indicator
         dtreat = d == 1
diff --git a/doubleml/irm/tests/_utils_ssm_manual.py b/doubleml/irm/tests/_utils_ssm_manual.py
index f14a1f66..07014018 100644
--- a/doubleml/irm/tests/_utils_ssm_manual.py
+++ b/doubleml/irm/tests/_utils_ssm_manual.py
@@ -19,7 +19,7 @@ def fit_selection(
     all_smpls,
     score,
     trimming_rule="truncate",
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
     normalize_ipw=True,
     n_rep=1,
     g_d0_params=None,
@@ -55,7 +55,7 @@ def fit_selection(
             smpls,
             score,
             trimming_rule=trimming_rule,
-            trimming_threshold=trimming_threshold,
+            clipping_threshold=clipping_threshold,
             g_d0_params=g_d0_params,
             g_d1_params=g_d1_params,
             pi_params=pi_params,
@@ -108,7 +108,7 @@ def fit_nuisance_selection(
     smpls,
     score,
     trimming_rule="truncate",
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
     g_d0_params=None,
     g_d1_params=None,
     pi_params=None,
@@ -125,7 +125,7 @@ def fit_nuisance_selection(
         dx = np.column_stack((d, x, z))
 
     if score == "missing-at-random":
-        pi_hat_list = fit_predict_proba(s, dx, ml_pi, pi_params, smpls, trimming_threshold=trimming_threshold)
+        pi_hat_list = fit_predict_proba(s, dx, ml_pi, pi_params, smpls, clipping_threshold=clipping_threshold)
 
         m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls)
 
@@ -212,7 +212,7 @@ def fit_nuisance_selection(
             # predict conditional outcome
             g_hat_d0 = ml_g_d0.predict(xpi_test)
 
-            m_hat = _trimm(m_hat, trimming_rule, trimming_threshold)
+            m_hat = _trimm(m_hat, trimming_rule, clipping_threshold)
 
             # append predictions on test sample to final list of predictions
             g_hat_d1_list.append(g_hat_d1)
diff --git a/doubleml/irm/tests/test_ssm.py b/doubleml/irm/tests/test_ssm.py
index c561d9fe..735c6471 100644
--- a/doubleml/irm/tests/test_ssm.py
+++ b/doubleml/irm/tests/test_ssm.py
@@ -26,14 +26,14 @@ def normalize_ipw(request):
     return request.param
 
 
-@pytest.fixture(scope="module", params=[0.01])
-def trimming_threshold(request):
+@pytest.fixture(scope="module", params=[0.01, 0.05])
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def dml_selection_fixture(
-    generate_data_selection_mar, generate_data_selection_nonignorable, learner, score, trimming_threshold, normalize_ipw
+    generate_data_selection_mar, generate_data_selection_nonignorable, learner, score, clipping_threshold, normalize_ipw
 ):
     n_folds = 3
 
@@ -55,11 +55,27 @@ def dml_selection_fixture(
     np.random.seed(42)
     if score == "missing-at-random":
         obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s)
-        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score)
+        dml_sel_obj = dml.DoubleMLSSM(
+            obj_dml_data,
+            ml_g,
+            ml_pi,
+            ml_m,
+            n_folds=n_folds,
+            score=score,
+            ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
+        )
     else:
         assert score == "nonignorable"
         obj_dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=z, s=s)
-        dml_sel_obj = dml.DoubleMLSSM(obj_dml_data, ml_g, ml_pi, ml_m, n_folds=n_folds, score=score)
+        dml_sel_obj = dml.DoubleMLSSM(
+            obj_dml_data,
+            ml_g,
+            ml_pi,
+            ml_m,
+            n_folds=n_folds,
+            score=score,
+            ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
+        )
 
     np.random.seed(42)
     dml_sel_obj.set_sample_splitting(all_smpls=all_smpls)
@@ -78,7 +94,7 @@ def dml_selection_fixture(
         all_smpls,
         score,
         trimming_rule="truncate",
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
         normalize_ipw=normalize_ipw,
     )
 
diff --git a/doubleml/irm/tests/test_ssm_exceptions.py b/doubleml/irm/tests/test_ssm_exceptions.py
index 6df76908..4ca9f263 100644
--- a/doubleml/irm/tests/test_ssm_exceptions.py
+++ b/doubleml/irm/tests/test_ssm_exceptions.py
@@ -62,22 +62,6 @@ def test_ssm_exception_scores():
         _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, score=0)
 
 
-@pytest.mark.ci
-def test_ssm_exception_trimming_rule():
-    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="discard")
-
-    # check the trimming_threshold exceptions
-    msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="truncate", trimming_threshold="0.1")
-
-    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLSSM(dml_data_mar, ml_g, ml_pi, ml_m, trimming_rule="truncate", trimming_threshold=0.6)
-
-
 @pytest.mark.ci
 def test_ssm_exception_ipw_normalization():
     msg = "Normalization indicator has to be boolean. Object of type <class 'int'> passed."
diff --git a/doubleml/irm/tests/test_ssm_ps_processor.py b/doubleml/irm/tests/test_ssm_ps_processor.py
new file mode 100644
index 00000000..c0627699
--- /dev/null
+++ b/doubleml/irm/tests/test_ssm_ps_processor.py
@@ -0,0 +1,63 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "ps_config",
+    [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ],
+)
+def test_ssm_ml_m_predictions_ps_processor(generate_data_selection_mar, ps_config):
+    x, y, d, _, s = generate_data_selection_mar
+    dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s)
+    np.random.seed(3141)
+    dml_ssm = dml.DoubleMLSSM(
+        obj_dml_data=dml_data,
+        ml_g=LinearRegression(),
+        ml_pi=LogisticRegression(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=ps_config,
+        n_rep=1,
+    )
+    dml_ssm.fit(store_predictions=True)
+    ml_m_preds = dml_ssm.predictions["ml_m"][:, 0, 0]
+    # Just check that predictions are within [clipping_threshold, 1-clipping_threshold]
+    assert np.all(ml_m_preds >= ps_config.clipping_threshold)
+    assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold)
+
+
+@pytest.mark.ci
+def test_ssm_ml_m_predictions_ps_processor_differences(generate_data_selection_mar):
+    x, y, d, _, s = generate_data_selection_mar
+    dml_data = dml.DoubleMLSSMData.from_arrays(x, y, d, z=None, s=s)
+    np.random.seed(3141)
+    configs = [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ]
+    preds = []
+    for cfg in configs:
+        dml_ssm = dml.DoubleMLSSM(
+            obj_dml_data=dml_data,
+            ml_g=LinearRegression(),
+            ml_pi=LogisticRegression(),
+            ml_m=LogisticRegression(),
+            ps_processor_config=cfg,
+            n_rep=1,
+        )
+        dml_ssm.fit(store_predictions=True)
+        preds.append(dml_ssm.predictions["ml_m"][:, 0, 0])
+    # Check that at least two configurations yield different predictions (element-wise)
+    diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))]
+    assert any(diffs)

From 5d1a822c9406481c94703c2615a6900f070c3f1e Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 08:28:17 +0100
Subject: [PATCH 19/38] add test for iivm ps processor with fixture

---
 doubleml/irm/tests/test_iivm_ps_processor.py | 68 ++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 doubleml/irm/tests/test_iivm_ps_processor.py

diff --git a/doubleml/irm/tests/test_iivm_ps_processor.py b/doubleml/irm/tests/test_iivm_ps_processor.py
new file mode 100644
index 00000000..e8eed01f
--- /dev/null
+++ b/doubleml/irm/tests/test_iivm_ps_processor.py
@@ -0,0 +1,68 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml import DoubleMLData
+from doubleml.irm.iivm import DoubleMLIIVM
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
+
+
+@pytest.fixture
+def dml_data_iivm(generate_data_iivm):
+    data = generate_data_iivm
+    x_cols = data.columns[data.columns.str.startswith("X")].tolist()
+    dml_data = DoubleMLData(data, "y", ["d"], x_cols, "z")
+    return dml_data
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "ps_config",
+    [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ],
+)
+def test_iivm_ml_m_predictions_ps_processor(dml_data_iivm, ps_config):
+    np.random.seed(3141)
+    dml_iivm = DoubleMLIIVM(
+        obj_dml_data=dml_data_iivm,
+        ml_g=LinearRegression(),
+        ml_m=LogisticRegression(),
+        ml_r=LogisticRegression(),
+        ps_processor_config=ps_config,
+        n_rep=1,
+    )
+    dml_iivm.fit(store_predictions=True)
+    ml_m_preds = dml_iivm.predictions["ml_m"][:, 0, 0]
+    # Just check that predictions are within [clipping_threshold, 1-clipping_threshold]
+    assert np.all(ml_m_preds >= ps_config.clipping_threshold)
+    assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold)
+
+
+@pytest.mark.ci
+def test_iivm_ml_m_predictions_ps_processor_differences(dml_data_iivm):
+    np.random.seed(3141)
+    configs = [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ]
+    preds = []
+    for cfg in configs:
+        dml_iivm = DoubleMLIIVM(
+            obj_dml_data=dml_data_iivm,
+            ml_g=LinearRegression(),
+            ml_m=LogisticRegression(),
+            ml_r=LogisticRegression(),
+            ps_processor_config=cfg,
+            n_rep=1,
+        )
+        dml_iivm.fit(store_predictions=True)
+        preds.append(dml_iivm.predictions["ml_m"][:, 0, 0])
+    # Check that at least two configurations yield different predictions (element-wise)
+    diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))]
+    assert any(diffs)

From f97be67221219a240fdbd4c34dcb9936fece4e1b Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 09:04:28 +0100
Subject: [PATCH 20/38] add ps_processor to cvar

---
 doubleml/irm/cvar.py                         | 69 +++++++++++++++-----
 doubleml/irm/irm.py                          |  2 +-
 doubleml/irm/tests/_utils_cvar_manual.py     | 16 ++---
 doubleml/irm/tests/test_cvar.py              |  8 +--
 doubleml/irm/tests/test_cvar_ps_processor.py | 65 ++++++++++++++++++
 doubleml/irm/tests/test_cvar_tune.py         |  4 +-
 6 files changed, 131 insertions(+), 33 deletions(-)
 create mode 100644 doubleml/irm/tests/test_cvar_ps_processor.py

diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py
index 6d29f5e2..64e82ad8 100644
--- a/doubleml/irm/cvar.py
+++ b/doubleml/irm/cvar.py
@@ -1,3 +1,6 @@
+import warnings
+from typing import Optional
+
 import numpy as np
 from sklearn.base import clone
 from sklearn.model_selection import StratifiedKFold, train_test_split
@@ -11,7 +14,6 @@
     _check_quantile,
     _check_score,
     _check_treatment,
-    _check_trimming,
     _check_zero_one_treatment,
 )
 from doubleml.utils._estimation import (
@@ -22,9 +24,11 @@
     _predict_zero_one_propensity,
     _solve_ipw_score,
 )
-from doubleml.utils._propensity_score import _normalize_ipw, _trimm
+from doubleml.utils._propensity_score import _normalize_ipw
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
 class DoubleMLCVAR(LinearScoreMixin, DoubleML):
     """Double machine learning for conditional value at risk for potential outcomes
 
@@ -66,13 +70,16 @@ class DoubleMLCVAR(LinearScoreMixin, DoubleML):
         Indicates whether the inverse probability weights are normalized.
         Default is ``True``.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     draw_sample_splitting : bool
         Indicates whether the sample splitting should be drawn during initialization of the object.
@@ -107,8 +114,9 @@ def __init__(
         n_rep=1,
         score="CVaR",
         normalize_ipw=True,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
@@ -139,10 +147,12 @@ def __init__(
         if draw_sample_splitting:
             self.draw_sample_splitting()
 
-        # initialize and check trimming
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         _ = self._check_learner(ml_g, "ml_g", regressor=True, classifier=False)
         _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
@@ -172,19 +182,44 @@ def normalize_ipw(self):
         """
         return self._normalize_ipw
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     def _compute_ipw_score(self, theta, d, y, prop):
         score = (d == self.treatment) / prop * (y <= theta) - self.quantile
@@ -254,7 +289,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 "preds"
             ]
 
-            m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
+            m_hat_prelim = self._ps_processor.adjust_ps(m_hat_prelim, d_train_1, cv=smpls_prelim)
 
             if self._normalize_ipw:
                 m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
@@ -304,9 +339,7 @@ def ipw_score(theta):
             g_hat["models"] = fitted_models["ml_g"]
             m_hat["models"] = fitted_models["ml_m"]
 
-        # clip propensities and normalize ipw weights
-        m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
-
+        m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls)
         # this is not done in the score to be equivalent to PQ models
         if self._normalize_ipw:
             m_hat_adj = _normalize_ipw(m_hat["preds"], d)
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index 29c90c87..e880c48e 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -362,7 +362,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             )
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
 
-        m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], self._dml_data.d, cv=smpls)
+        m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls)
 
         psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], smpls)
         psi_elements = {"psi_a": psi_a, "psi_b": psi_b}
diff --git a/doubleml/irm/tests/_utils_cvar_manual.py b/doubleml/irm/tests/_utils_cvar_manual.py
index dd6935b6..8d9f0120 100644
--- a/doubleml/irm/tests/_utils_cvar_manual.py
+++ b/doubleml/irm/tests/_utils_cvar_manual.py
@@ -18,7 +18,7 @@ def fit_cvar(
     treatment,
     normalize_ipw=True,
     n_rep=1,
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
     g_params=None,
     m_params=None,
 ):
@@ -40,7 +40,7 @@ def fit_cvar(
             smpls,
             treatment,
             normalize_ipw=normalize_ipw,
-            trimming_threshold=trimming_threshold,
+            clipping_threshold=clipping_threshold,
             g_params=g_params,
             m_params=m_params,
         )
@@ -56,7 +56,7 @@ def fit_cvar(
 
 
 def fit_nuisance_cvar(
-    y, x, d, quantile, learner_g, learner_m, smpls, treatment, normalize_ipw, trimming_threshold, g_params, m_params
+    y, x, d, quantile, learner_g, learner_m, smpls, treatment, normalize_ipw, clipping_threshold, g_params, m_params
 ):
     n_folds = len(smpls)
     n_obs = len(y)
@@ -95,7 +95,7 @@ def fit_nuisance_cvar(
         x_train_1 = x[train_inds_1, :]
         # todo change prediction method
         m_hat_prelim_list = fit_predict_proba(
-            d_train_1, x_train_1, ml_m, params=None, trimming_threshold=trimming_threshold, smpls=smpls_prelim
+            d_train_1, x_train_1, ml_m, params=None, clipping_threshold=clipping_threshold, smpls=smpls_prelim
         )
 
         m_hat_prelim = np.full_like(y_train_1, np.nan, dtype="float64")
@@ -104,8 +104,8 @@ def fit_nuisance_cvar(
 
         m_hat_prelim = _dml_cv_predict(ml_m, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)["preds"]
 
-        m_hat_prelim[m_hat_prelim < trimming_threshold] = trimming_threshold
-        m_hat_prelim[m_hat_prelim > 1 - trimming_threshold] = 1 - trimming_threshold
+        m_hat_prelim[m_hat_prelim < clipping_threshold] = clipping_threshold
+        m_hat_prelim[m_hat_prelim > 1 - clipping_threshold] = 1 - clipping_threshold
 
         if normalize_ipw:
             m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
@@ -141,8 +141,8 @@ def ipw_score(theta):
         ml_m.fit(x[train_inds, :], d[train_inds])
         m_hat[test_inds] = ml_m.predict_proba(x[test_inds, :])[:, 1]
 
-    m_hat[m_hat < trimming_threshold] = trimming_threshold
-    m_hat[m_hat > 1 - trimming_threshold] = 1 - trimming_threshold
+    m_hat[m_hat < clipping_threshold] = clipping_threshold
+    m_hat[m_hat > 1 - clipping_threshold] = 1 - clipping_threshold
 
     if normalize_ipw:
         m_hat = _normalize_ipw(m_hat, d)
diff --git a/doubleml/irm/tests/test_cvar.py b/doubleml/irm/tests/test_cvar.py
index 0eee71c6..d6b08a1c 100644
--- a/doubleml/irm/tests/test_cvar.py
+++ b/doubleml/irm/tests/test_cvar.py
@@ -42,12 +42,12 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.01, 0.05])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold):
+def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, clipping_threshold):
     n_folds = 3
 
     # Set machine learning methods for m & g
@@ -71,7 +71,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, norm
         n_folds=n_folds,
         n_rep=1,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
         draw_sample_splitting=False,
     )
 
@@ -91,7 +91,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner, norm
         treatment,
         normalize_ipw=normalize_ipw,
         n_rep=1,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     res_dict = {
diff --git a/doubleml/irm/tests/test_cvar_ps_processor.py b/doubleml/irm/tests/test_cvar_ps_processor.py
new file mode 100644
index 00000000..6c78162b
--- /dev/null
+++ b/doubleml/irm/tests/test_cvar_ps_processor.py
@@ -0,0 +1,65 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "ps_config",
+    [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ],
+)
+def test_cvar_ml_m_predictions_ps_processor(generate_data_quantiles, ps_config):
+    x, y, d = generate_data_quantiles
+    dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d)
+    np.random.seed(3141)
+    dml_cvar = dml.DoubleMLCVAR(
+        obj_dml_data=dml_data,
+        ml_g=LinearRegression(),
+        ml_m=LogisticRegression(),
+        treatment=1,
+        quantile=0.5,
+        ps_processor_config=ps_config,
+        n_rep=1,
+    )
+    dml_cvar.fit(store_predictions=True)
+    ml_m_preds = dml_cvar.predictions["ml_m"][:, 0, 0]
+    # Just check that predictions are within [clipping_threshold, 1-clipping_threshold]
+    assert np.all(ml_m_preds >= ps_config.clipping_threshold)
+    assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold)
+
+
+@pytest.mark.ci
+def test_cvar_ml_m_predictions_ps_processor_differences(generate_data_quantiles):
+    x, y, d = generate_data_quantiles
+    dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d)
+    np.random.seed(3141)
+    configs = [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ]
+    preds = []
+    for cfg in configs:
+        dml_cvar = dml.DoubleMLCVAR(
+            obj_dml_data=dml_data,
+            ml_g=LinearRegression(),
+            ml_m=LogisticRegression(),
+            treatment=1,
+            quantile=0.5,
+            ps_processor_config=cfg,
+            n_rep=1,
+        )
+        dml_cvar.fit(store_predictions=True)
+        preds.append(dml_cvar.predictions["ml_m"][:, 0, 0])
+    # Check that at least two configurations yield different predictions (element-wise)
+    diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))]
+    assert any(diffs)
diff --git a/doubleml/irm/tests/test_cvar_tune.py b/doubleml/irm/tests/test_cvar_tune.py
index ade84769..d51e7852 100644
--- a/doubleml/irm/tests/test_cvar_tune.py
+++ b/doubleml/irm/tests/test_cvar_tune.py
@@ -71,7 +71,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner_g, le
         n_folds=n_folds,
         n_rep=1,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=0.01,
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=0.01),
         draw_sample_splitting=False,
     )
 
@@ -130,7 +130,7 @@ def dml_cvar_fixture(generate_data_quantiles, treatment, quantile, learner_g, le
         all_smpls=all_smpls,
         treatment=treatment,
         n_rep=1,
-        trimming_threshold=0.01,
+        clipping_threshold=0.01,
         normalize_ipw=normalize_ipw,
         g_params=g_params,
         m_params=m_params,

From c485731b599173a2b6a195ec47e091654c9dea99 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 09:16:39 +0100
Subject: [PATCH 21/38] add ps_processor to pq models

---
 doubleml/irm/pq.py                         | 71 ++++++++++++++++------
 doubleml/irm/tests/_utils_pq_manual.py     | 14 ++---
 doubleml/irm/tests/test_pq.py              |  8 +--
 doubleml/irm/tests/test_pq_ps_processor.py | 65 ++++++++++++++++++++
 doubleml/irm/tests/test_pq_tune.py         |  4 +-
 5 files changed, 131 insertions(+), 31 deletions(-)
 create mode 100644 doubleml/irm/tests/test_pq_ps_processor.py

diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py
index baf43b7e..f3b72e2c 100644
--- a/doubleml/irm/pq.py
+++ b/doubleml/irm/pq.py
@@ -1,3 +1,6 @@
+import warnings
+from typing import Optional
+
 import numpy as np
 from sklearn.base import clone
 from sklearn.model_selection import StratifiedKFold, train_test_split
@@ -11,7 +14,6 @@
     _check_quantile,
     _check_score,
     _check_treatment,
-    _check_trimming,
     _check_zero_one_treatment,
 )
 from doubleml.utils._estimation import (
@@ -23,9 +25,11 @@
     _predict_zero_one_propensity,
     _solve_ipw_score,
 )
-from doubleml.utils._propensity_score import _normalize_ipw, _trimm
+from doubleml.utils._propensity_score import _normalize_ipw
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
 class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
     """Double machine learning for potential quantiles
 
@@ -74,13 +78,16 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
         Default is ``'None'``, which uses :py:class:`statsmodels.nonparametric.kde.KDEUnivariate` with a
         gaussian kernel and silverman for bandwidth determination.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     draw_sample_splitting : bool
         Indicates whether the sample splitting should be drawn during initialization of the object.
@@ -115,8 +122,9 @@ def __init__(
         score="PQ",
         normalize_ipw=True,
         kde=None,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
@@ -155,10 +163,12 @@ def __init__(
 
         self._external_predictions_implemented = True
 
-        # initialize and check trimming
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True)
         _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
@@ -195,19 +205,44 @@ def normalize_ipw(self):
         """
         return self._normalize_ipw
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     @property
     def _score_element_names(self):
@@ -326,7 +361,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     )["preds"]
                 else:
                     m_hat_prelim = m_hat["preds"][np.concatenate([test for _, test in smpls_prelim])]
-                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
+                m_hat_prelim = self._ps_processor.adjust_ps(m_hat_prelim, d_train_1, cv=smpls_prelim)
+
                 if self._normalize_ipw:
                     m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
                 if self.treatment == 0:
@@ -370,11 +406,10 @@ def ipw_score(theta):
             g_hat["models"] = fitted_models["ml_g"]
             m_hat["models"] = fitted_models["ml_m"]
 
-        # clip propensities and normalize ipw weights
-        m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
-
+        m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls)
         # this is not done in the score to save computation due to multiple score evaluations
         # to be able to evaluate the raw models the m_hat['preds'] are not changed
+
         if self._normalize_ipw:
             m_hat_adj = _normalize_ipw(m_hat["preds"], d)
         else:
diff --git a/doubleml/irm/tests/_utils_pq_manual.py b/doubleml/irm/tests/_utils_pq_manual.py
index b5b27c7c..526854a0 100644
--- a/doubleml/irm/tests/_utils_pq_manual.py
+++ b/doubleml/irm/tests/_utils_pq_manual.py
@@ -18,7 +18,7 @@ def fit_pq(
     all_smpls,
     treatment,
     n_rep=1,
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
     normalize_ipw=True,
     g_params=None,
     m_params=None,
@@ -40,7 +40,7 @@ def fit_pq(
             learner_m,
             smpls,
             treatment,
-            trimming_threshold=trimming_threshold,
+            clipping_threshold=clipping_threshold,
             normalize_ipw=normalize_ipw,
             g_params=g_params,
             m_params=m_params,
@@ -57,7 +57,7 @@ def fit_pq(
 
 
 def fit_nuisance_pq(
-    y, x, d, quantile, learner_g, learner_m, smpls, treatment, trimming_threshold, normalize_ipw, g_params, m_params
+    y, x, d, quantile, learner_g, learner_m, smpls, treatment, clipping_threshold, normalize_ipw, g_params, m_params
 ):
     n_folds = len(smpls)
     n_obs = len(y)
@@ -96,8 +96,8 @@ def fit_nuisance_pq(
         # todo change prediction method
         m_hat_prelim = _dml_cv_predict(clone(ml_m), x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)["preds"]
 
-        m_hat_prelim[m_hat_prelim < trimming_threshold] = trimming_threshold
-        m_hat_prelim[m_hat_prelim > 1 - trimming_threshold] = 1 - trimming_threshold
+        m_hat_prelim[m_hat_prelim < clipping_threshold] = clipping_threshold
+        m_hat_prelim[m_hat_prelim > 1 - clipping_threshold] = 1 - clipping_threshold
 
         if normalize_ipw:
             m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
@@ -129,8 +129,8 @@ def ipw_score(theta):
         ml_m.fit(x[train_inds, :], d[train_inds])
         m_hat[test_inds] = ml_m.predict_proba(x[test_inds, :])[:, 1]
 
-    m_hat[m_hat < trimming_threshold] = trimming_threshold
-    m_hat[m_hat > 1 - trimming_threshold] = 1 - trimming_threshold
+    m_hat[m_hat < clipping_threshold] = clipping_threshold
+    m_hat[m_hat > 1 - clipping_threshold] = 1 - clipping_threshold
 
     if normalize_ipw:
         m_hat = _normalize_ipw(m_hat, d)
diff --git a/doubleml/irm/tests/test_pq.py b/doubleml/irm/tests/test_pq.py
index 62e69d53..b3505cf5 100644
--- a/doubleml/irm/tests/test_pq.py
+++ b/doubleml/irm/tests/test_pq.py
@@ -35,12 +35,12 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.01, 0.05])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold):
+def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normalize_ipw, clipping_threshold):
     n_folds = 3
 
     # collect data
@@ -59,7 +59,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normal
         quantile=quantile,
         n_folds=n_folds,
         n_rep=1,
-        trimming_threshold=trimming_threshold,
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
         normalize_ipw=normalize_ipw,
         draw_sample_splitting=False,
     )
@@ -80,7 +80,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner, normal
         all_smpls,
         treatment,
         n_rep=1,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
         normalize_ipw=normalize_ipw,
     )
 
diff --git a/doubleml/irm/tests/test_pq_ps_processor.py b/doubleml/irm/tests/test_pq_ps_processor.py
new file mode 100644
index 00000000..c40786d7
--- /dev/null
+++ b/doubleml/irm/tests/test_pq_ps_processor.py
@@ -0,0 +1,65 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LogisticRegression
+
+from doubleml import DoubleMLData, DoubleMLPQ
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "ps_config",
+    [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ],
+)
+def test_pq_ml_m_predictions_ps_processor(generate_data_quantiles, ps_config):
+    x, y, d = generate_data_quantiles
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+    np.random.seed(3141)
+    dml_pq = DoubleMLPQ(
+        obj_dml_data=dml_data,
+        ml_g=LogisticRegression(),
+        ml_m=LogisticRegression(),
+        treatment=1,
+        quantile=0.5,
+        ps_processor_config=ps_config,
+        n_rep=1,
+    )
+    dml_pq.fit(store_predictions=True)
+    ml_m_preds = dml_pq.predictions["ml_m"][:, 0, 0]
+    # Just check that predictions are within [clipping_threshold, 1-clipping_threshold]
+    assert np.all(ml_m_preds >= ps_config.clipping_threshold)
+    assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold)
+
+
+@pytest.mark.ci
+def test_pq_ml_m_predictions_ps_processor_differences(generate_data_quantiles):
+    x, y, d = generate_data_quantiles
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+    np.random.seed(3141)
+    configs = [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ]
+    preds = []
+    for cfg in configs:
+        dml_pq = DoubleMLPQ(
+            obj_dml_data=dml_data,
+            ml_g=LogisticRegression(),
+            ml_m=LogisticRegression(),
+            treatment=1,
+            quantile=0.5,
+            ps_processor_config=cfg,
+            n_rep=1,
+        )
+        dml_pq.fit(store_predictions=True)
+        preds.append(dml_pq.predictions["ml_m"][:, 0, 0])
+    # Check that at least two configurations yield different predictions (element-wise)
+    diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))]
+    assert any(diffs)
diff --git a/doubleml/irm/tests/test_pq_tune.py b/doubleml/irm/tests/test_pq_tune.py
index 815c17d4..47e7bc18 100644
--- a/doubleml/irm/tests/test_pq_tune.py
+++ b/doubleml/irm/tests/test_pq_tune.py
@@ -73,7 +73,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner_g, lear
         n_folds=n_folds,
         n_rep=1,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=0.01,
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=0.01),
         draw_sample_splitting=False,
     )
 
@@ -132,7 +132,7 @@ def dml_pq_fixture(generate_data_quantiles, treatment, quantile, learner_g, lear
         all_smpls=all_smpls,
         treatment=treatment,
         n_rep=1,
-        trimming_threshold=0.01,
+        clipping_threshold=0.01,
         normalize_ipw=normalize_ipw,
         g_params=g_params,
         m_params=m_params,

From 22212014837d7aa92bc5bc809ecb9ac97f1e12b0 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 09:34:35 +0100
Subject: [PATCH 22/38] add ps_processor to LPQ models

---
 doubleml/irm/lpq.py                         | 75 +++++++++++++++------
 doubleml/irm/tests/_utils_lpq_manual.py     | 10 +--
 doubleml/irm/tests/test_lpq.py              | 12 ++--
 doubleml/irm/tests/test_lpq_ps_processor.py | 65 ++++++++++++++++++
 doubleml/irm/tests/test_lpq_tune.py         |  4 +-
 5 files changed, 134 insertions(+), 32 deletions(-)
 create mode 100644 doubleml/irm/tests/test_lpq_ps_processor.py

diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py
index 962b383b..bd62794c 100644
--- a/doubleml/irm/lpq.py
+++ b/doubleml/irm/lpq.py
@@ -1,3 +1,6 @@
+import warnings
+from typing import Optional
+
 import numpy as np
 from sklearn.base import clone
 from sklearn.model_selection import StratifiedKFold, train_test_split
@@ -7,7 +10,7 @@
 from doubleml.data.base_data import DoubleMLData
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_score_mixins import NonLinearScoreMixin
-from doubleml.utils._checks import _check_quantile, _check_score, _check_treatment, _check_trimming, _check_zero_one_treatment
+from doubleml.utils._checks import _check_quantile, _check_score, _check_treatment, _check_zero_one_treatment
 from doubleml.utils._estimation import (
     _cond_targets,
     _default_kde,
@@ -17,9 +20,11 @@
     _predict_zero_one_propensity,
     _solve_ipw_score,
 )
-from doubleml.utils._propensity_score import _normalize_ipw, _trimm
+from doubleml.utils._propensity_score import _normalize_ipw
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
 class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
     """Double machine learning for local potential quantiles
 
@@ -67,13 +72,16 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
         Default is ``'None'``, which uses :py:class:`statsmodels.nonparametric.kde.KDEUnivariate` with a
         gaussian kernel and silverman for bandwidth determination.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     draw_sample_splitting : bool
         Indicates whether the sample splitting should be drawn during initialization of the object.
@@ -108,8 +116,9 @@ def __init__(
         score="LPQ",
         normalize_ipw=True,
         kde=None,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
@@ -148,10 +157,12 @@ def __init__(
 
         self._external_predictions_implemented = True
 
-        # initialize and check trimming
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True)
         _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
@@ -200,19 +211,44 @@ def normalize_ipw(self):
         """
         return self._normalize_ipw
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     @property
     def _score_element_names(self):
@@ -386,7 +422,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     ml_m_z_prelim, x_train_1, z_train_1, method="predict_proba", smpls=smpls_prelim
                 )["preds"]
 
-                m_z_hat_prelim = _trimm(m_z_hat_prelim, self.trimming_rule, self.trimming_threshold)
+                m_z_hat_prelim = self._ps_processor.adjust_ps(m_z_hat_prelim, z_train_1, cv=smpls_prelim)
                 if self._normalize_ipw:
                     m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1)
 
@@ -501,11 +537,12 @@ def ipw_score(theta):
             g_du_z0_hat["models"] = fitted_models["ml_g_du_z0"]
             g_du_z1_hat["models"] = fitted_models["ml_g_du_z1"]
 
-        # clip propensities
-        m_z_hat_adj = _trimm(m_z_hat["preds"], self.trimming_rule, self.trimming_threshold)
-
+        # adjust propensity scores
+        m_z_hat["preds"] = self._ps_processor.adjust_ps(m_z_hat["preds"], z, cv=smpls)
         if self._normalize_ipw:
-            m_z_hat_adj = _normalize_ipw(m_z_hat_adj, z)
+            m_z_hat_adj = _normalize_ipw(m_z_hat["preds"], z)
+        else:
+            m_z_hat_adj = m_z_hat["preds"]
 
         # this could be adjusted to be compatible with dml1
         # estimate final nuisance parameter
diff --git a/doubleml/irm/tests/_utils_lpq_manual.py b/doubleml/irm/tests/_utils_lpq_manual.py
index 376c7c46..839025fd 100644
--- a/doubleml/irm/tests/_utils_lpq_manual.py
+++ b/doubleml/irm/tests/_utils_lpq_manual.py
@@ -20,7 +20,7 @@ def fit_lpq(
     treatment,
     n_rep=1,
     trimming_rule="truncate",
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
     kde=_default_kde,
     normalize_ipw=True,
     m_z_params=None,
@@ -48,7 +48,7 @@ def fit_lpq(
             smpls,
             treatment,
             trimming_rule=trimming_rule,
-            trimming_threshold=trimming_threshold,
+            clipping_threshold=clipping_threshold,
             normalize_ipw=normalize_ipw,
             m_z_params=m_z_params,
             m_d_z0_params=m_d_z0_params,
@@ -80,7 +80,7 @@ def fit_nuisance_lpq(
     smpls,
     treatment,
     trimming_rule,
-    trimming_threshold,
+    clipping_threshold,
     normalize_ipw,
     m_z_params,
     m_d_z0_params,
@@ -144,7 +144,7 @@ def fit_nuisance_lpq(
             "preds"
         ]
 
-        m_z_hat_prelim = _trimm(m_z_hat_prelim, trimming_rule, trimming_threshold)
+        m_z_hat_prelim = _trimm(m_z_hat_prelim, trimming_rule, clipping_threshold)
         if normalize_ipw:
             m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1)
 
@@ -222,7 +222,7 @@ def ipw_score(theta):
         m_d_z1_hat[test_inds] = ml_m_d_z1.predict_proba(x[test_inds, :])[:, 1]
 
     # clip propensities
-    m_z_hat = _trimm(m_z_hat, trimming_rule, trimming_threshold)
+    m_z_hat = _trimm(m_z_hat, trimming_rule, clipping_threshold)
 
     if normalize_ipw:
         m_z_hat = _normalize_ipw(m_z_hat, z)
diff --git a/doubleml/irm/tests/test_lpq.py b/doubleml/irm/tests/test_lpq.py
index 3e0049b8..2f90156e 100644
--- a/doubleml/irm/tests/test_lpq.py
+++ b/doubleml/irm/tests/test_lpq.py
@@ -41,7 +41,7 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[0.05])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -51,7 +51,7 @@ def kde(request):
 
 
 @pytest.fixture(scope="module")
-def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw, trimming_threshold, kde):
+def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner, normalize_ipw, clipping_threshold, kde):
     n_folds = 3
 
     # collect data
@@ -73,7 +73,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
             n_folds=n_folds,
             n_rep=1,
             normalize_ipw=normalize_ipw,
-            trimming_threshold=trimming_threshold,
+            ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
             draw_sample_splitting=False,
         )
         # synchronize the sample splitting
@@ -94,7 +94,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
             normalize_ipw=normalize_ipw,
             kde=_default_kde,
             n_rep=1,
-            trimming_threshold=trimming_threshold,
+            clipping_threshold=clipping_threshold,
         )
     else:
         dml_lpq_obj = dml.DoubleMLLPQ(
@@ -107,7 +107,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
             n_rep=1,
             normalize_ipw=normalize_ipw,
             kde=kde,
-            trimming_threshold=trimming_threshold,
+            ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
             draw_sample_splitting=False,
         )
 
@@ -129,7 +129,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
             normalize_ipw=normalize_ipw,
             kde=kde,
             n_rep=1,
-            trimming_threshold=trimming_threshold,
+            clipping_threshold=clipping_threshold,
         )
 
     res_dict = {
diff --git a/doubleml/irm/tests/test_lpq_ps_processor.py b/doubleml/irm/tests/test_lpq_ps_processor.py
new file mode 100644
index 00000000..acd539ab
--- /dev/null
+++ b/doubleml/irm/tests/test_lpq_ps_processor.py
@@ -0,0 +1,65 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LogisticRegression
+
+import doubleml as dml
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "ps_config",
+    [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ],
+)
+def test_lpq_ml_m_predictions_ps_processor(generate_data_local_quantiles, ps_config):
+    x, y, d, z = generate_data_local_quantiles
+    dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d, z=z)
+    np.random.seed(3141)
+    dml_lpq = dml.DoubleMLLPQ(
+        obj_dml_data=dml_data,
+        ml_g=LogisticRegression(),
+        ml_m=LogisticRegression(),
+        treatment=1,
+        quantile=0.5,
+        ps_processor_config=ps_config,
+        n_rep=1,
+    )
+    dml_lpq.fit(store_predictions=True)
+    ml_m_preds = dml_lpq.predictions["ml_m_z"][:, 0, 0]
+    # Just check that predictions are within [clipping_threshold, 1-clipping_threshold]
+    assert np.all(ml_m_preds >= ps_config.clipping_threshold)
+    assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold)
+
+
+@pytest.mark.ci
+def test_lpq_ml_m_predictions_ps_processor_differences(generate_data_local_quantiles):
+    x, y, d, z = generate_data_local_quantiles
+    dml_data = dml.DoubleMLData.from_arrays(x=x, y=y, d=d, z=z)
+    np.random.seed(3141)
+    configs = [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ]
+    preds = []
+    for cfg in configs:
+        dml_lpq = dml.DoubleMLLPQ(
+            obj_dml_data=dml_data,
+            ml_g=LogisticRegression(),
+            ml_m=LogisticRegression(),
+            treatment=1,
+            quantile=0.5,
+            ps_processor_config=cfg,
+            n_rep=1,
+        )
+        dml_lpq.fit(store_predictions=True)
+        preds.append(dml_lpq.predictions["ml_m_z"][:, 0, 0])
+    # Check that at least two configurations yield different predictions (element-wise)
+    diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))]
+    assert any(diffs)
diff --git a/doubleml/irm/tests/test_lpq_tune.py b/doubleml/irm/tests/test_lpq_tune.py
index c2b7d192..30c9b718 100644
--- a/doubleml/irm/tests/test_lpq_tune.py
+++ b/doubleml/irm/tests/test_lpq_tune.py
@@ -73,7 +73,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
         n_folds=n_folds,
         n_rep=1,
         normalize_ipw=normalize_ipw,
-        trimming_threshold=0.01,
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=0.01),
         draw_sample_splitting=False,
     )
 
@@ -150,7 +150,7 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
         all_smpls=all_smpls,
         treatment=treatment,
         n_rep=1,
-        trimming_threshold=0.01,
+        clipping_threshold=0.01,
         normalize_ipw=normalize_ipw,
         m_z_params=m_z_params,
         m_d_z0_params=m_d_z0_params,

From d800d3f92f2f213fe96929ffacc2c09a7b174a13 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 10:05:40 +0100
Subject: [PATCH 23/38] add ps_processor to qte

---
 doubleml/irm/qte.py                       | 65 +++++++++++++++++------
 doubleml/irm/tests/test_qte_exceptions.py | 19 -------
 doubleml/utils/tests/test_ps_processor.py | 12 ++---
 3 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py
index f896b078..46c8f316 100644
--- a/doubleml/irm/qte.py
+++ b/doubleml/irm/qte.py
@@ -1,3 +1,6 @@
+import warnings
+from typing import Optional
+
 import numpy as np
 import pandas as pd
 from joblib import Parallel, delayed
@@ -9,11 +12,13 @@
 from doubleml.irm.cvar import DoubleMLCVAR
 from doubleml.irm.lpq import DoubleMLLPQ
 from doubleml.irm.pq import DoubleMLPQ
-from doubleml.utils._checks import _check_score, _check_trimming, _check_zero_one_treatment
+from doubleml.utils._checks import _check_score, _check_zero_one_treatment
 from doubleml.utils._descriptive import generate_summary
 from doubleml.utils._estimation import _default_kde
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
 class DoubleMLQTE(SampleSplittingMixin):
     """Double machine learning for quantile treatment effects
 
@@ -56,13 +61,16 @@ class DoubleMLQTE(SampleSplittingMixin):
         Default is ``'None'``, which uses :py:class:`statsmodels.nonparametric.kde.KDEUnivariate` with a
         gaussian kernel and silverman for bandwidth determination.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     draw_sample_splitting : bool
         Indicates whether the sample splitting should be drawn during initialization of the object.
@@ -98,8 +106,9 @@ def __init__(
         score="PQ",
         normalize_ipw=True,
         kde=None,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
     ):
         self._dml_data = obj_dml_data
@@ -130,10 +139,12 @@ def __init__(
         # initialize framework which is constructed after the fit method is called
         self._framework = None
 
-        # initialize and check trimming
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         if not isinstance(self.normalize_ipw, bool):
             raise TypeError(
@@ -250,19 +261,44 @@ def normalize_ipw(self):
         """
         return self._normalize_ipw
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     @property
     def coef(self):
@@ -530,8 +566,7 @@ def _initialize_models(self):
             "ml_m": self._learner["ml_m"],
             "n_folds": self.n_folds,
             "n_rep": self.n_rep,
-            "trimming_rule": self.trimming_rule,
-            "trimming_threshold": self.trimming_threshold,
+            "ps_processor_config": self.ps_processor_config,
             "normalize_ipw": self.normalize_ipw,
             "draw_sample_splitting": False,
         }
diff --git a/doubleml/irm/tests/test_qte_exceptions.py b/doubleml/irm/tests/test_qte_exceptions.py
index f4e95110..75f08f6b 100644
--- a/doubleml/irm/tests/test_qte_exceptions.py
+++ b/doubleml/irm/tests/test_qte_exceptions.py
@@ -56,25 +56,6 @@ def test_exception_score():
         _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), score=2)
 
 
-@pytest.mark.ci
-def test_exception_trimming_rule():
-    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="discard")
-
-    msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLQTE(
-            dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1"
-        )
-
-    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLQTE(
-            dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6
-        )
-
-
 @pytest.mark.ci
 def test_exception_quantiles():
     msg = r"Quantiles have be between 0 or 1. Quantiles \[0.2 2. \] passed."
diff --git a/doubleml/utils/tests/test_ps_processor.py b/doubleml/utils/tests/test_ps_processor.py
index d48a4b7d..a04354f1 100644
--- a/doubleml/utils/tests/test_ps_processor.py
+++ b/doubleml/utils/tests/test_ps_processor.py
@@ -1,14 +1,12 @@
-from unittest.mock import patch
 import warnings
+from unittest.mock import patch
 
 import numpy as np
 import pytest
 from sklearn.isotonic import IsotonicRegression
 from sklearn.model_selection import KFold, cross_val_predict
 
-from doubleml.utils.propensity_score_processing import (
-    PSProcessorConfig, PSProcessor, init_ps_processor
-)
+from doubleml.utils.propensity_score_processing import PSProcessor, PSProcessorConfig, init_ps_processor
 
 
 # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
@@ -109,13 +107,9 @@ def test_isotonic_calibration_with_cv(cv):
         cv = [(train, test) for train, test in KFold(n_splits=3).split(ps)]
     elif cv == "splitter":
         cv = KFold(n_splits=3)
-    else:
-        cv = cv
 
     clipping_threshold = 0.01
-    processor = PSProcessor(
-        calibration_method="isotonic", cv_calibration=True, clipping_threshold=clipping_threshold
-    )
+    processor = PSProcessor(calibration_method="isotonic", cv_calibration=True, clipping_threshold=clipping_threshold)
 
     isotonic_manual = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0)
     ps_cv = cross_val_predict(isotonic_manual, ps.reshape(-1, 1), treatment, cv=cv)

From eecb6231b162cfa233eb452ffa8f1e6bd798d40a Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 10:27:12 +0100
Subject: [PATCH 24/38] update for sklearn warning

---
 doubleml/irm/tests/test_ssm_exceptions.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doubleml/irm/tests/test_ssm_exceptions.py b/doubleml/irm/tests/test_ssm_exceptions.py
index 4ca9f263..039ed921 100644
--- a/doubleml/irm/tests/test_ssm_exceptions.py
+++ b/doubleml/irm/tests/test_ssm_exceptions.py
@@ -7,6 +7,7 @@
 from doubleml import DoubleMLSSM
 from doubleml.data.base_data import DoubleMLBaseData
 from doubleml.irm.datasets import make_ssm_data
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
 
 np.random.seed(3141)
 n = 100
@@ -184,7 +185,7 @@ def set_params(self):
         pass
 
 
-class _DummyNoClassifier(_DummyNoGetParams):
+class _DummyNoClassifier(_DummyNoGetParams, BaseEstimator):
     def get_params(self):
         pass
 
@@ -291,7 +292,7 @@ def test_double_ml_exception_evaluate_learner():
         ml_g=Lasso(),
         ml_pi=LogisticRegression(),
         ml_m=LogisticRegression(),
-        trimming_threshold=0.05,
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
         n_folds=5,
         score="missing-at-random",
     )

From bd3d795442a94f479f7a88d23feffb359921fef2 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 11:51:33 +0100
Subject: [PATCH 25/38] update exception tests for trimming and remove/supress
 warnings

---
 doubleml/tests/test_exceptions.py             | 234 ++++++++----------
 doubleml/utils/propensity_score_processing.py |   3 +-
 pytest.ini                                    |   1 +
 3 files changed, 102 insertions(+), 136 deletions(-)

diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index e725a562..94b5f824 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -24,6 +24,7 @@
 from doubleml.did.datasets import make_did_SZ2020
 from doubleml.irm.datasets import make_iivm_data, make_irm_data
 from doubleml.plm.datasets import make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, make_plr_CCDDHNR2018
+from doubleml.utils import PSProcessorConfig
 
 from ._utils import DummyDataClass
 
@@ -378,114 +379,6 @@ def test_doubleml_exception_scores():
         _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), score=2)
 
 
-@pytest.mark.ci
-def test_doubleml_exception_trimming_rule():
-    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="discard")
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIIVM(dml_data_iivm, Lasso(), LogisticRegression(), LogisticRegression(), trimming_rule="discard")
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLPQ(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard")
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLLPQ(dml_data_iivm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard")
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLCVAR(dml_data_irm, LogisticRegression(), LogisticRegression(), treatment=1, trimming_rule="discard")
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLQTE(dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="discard")
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="discard")
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="discard")
-
-    # check the trimming_threshold exceptions
-    msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1")
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLIIVM(
-            dml_data_iivm,
-            Lasso(),
-            LogisticRegression(),
-            LogisticRegression(),
-            trimming_rule="truncate",
-            trimming_threshold="0.1",
-        )
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLPQ(
-            dml_data_irm,
-            LogisticRegression(),
-            LogisticRegression(),
-            treatment=1,
-            trimming_rule="truncate",
-            trimming_threshold="0.1",
-        )
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLLPQ(
-            dml_data_iivm,
-            LogisticRegression(),
-            LogisticRegression(),
-            treatment=1,
-            trimming_rule="truncate",
-            trimming_threshold="0.1",
-        )
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLCVAR(
-            dml_data_irm, Lasso(), LogisticRegression(), treatment=1, trimming_rule="truncate", trimming_threshold="0.1"
-        )
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLQTE(
-            dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1"
-        )
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1")
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold="0.1")
-
-    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6)
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIIVM(
-            dml_data_iivm,
-            Lasso(),
-            LogisticRegression(),
-            LogisticRegression(),
-            trimming_rule="truncate",
-            trimming_threshold=0.6,
-        )
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLPQ(
-            dml_data_irm,
-            LogisticRegression(),
-            LogisticRegression(),
-            treatment=1,
-            trimming_rule="truncate",
-            trimming_threshold=0.6,
-        )
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLLPQ(
-            dml_data_iivm,
-            LogisticRegression(),
-            LogisticRegression(),
-            treatment=1,
-            trimming_rule="truncate",
-            trimming_threshold=0.6,
-        )
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLCVAR(
-            dml_data_irm, Lasso(), LogisticRegression(), treatment=1, trimming_rule="truncate", trimming_threshold=0.6
-        )
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLQTE(
-            dml_data_irm, LogisticRegression(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6
-        )
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLDID(dml_data_did, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6)
-    with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLDIDCS(dml_data_did_cs, Lasso(), LogisticRegression(), trimming_rule="truncate", trimming_threshold=0.6)
-
-
 @pytest.mark.ci
 def test_doubleml_exception_weights():
     msg = "weights must be a numpy array or dictionary. weights of type <class 'int'> was passed."
@@ -961,9 +854,9 @@ def set_params(self):
         pass
 
 
-class _DummyNoClassifier(_DummyNoGetParams):
-    def get_params(self):
-        pass
+class _DummyNoClassifier(_DummyNoGetParams, BaseEstimator):
+    def get_params(self, deep=True):
+        return {}
 
     def predict_proba(self):
         pass
@@ -1063,28 +956,25 @@ def test_doubleml_exception_learner():
     # construct a classifier which is not identifiable as classifier via is_classifier by sklearn
     # it then predicts labels and therefore an exception will be thrown
     log_reg = LogisticRegressionManipulatedPredict()
-    # TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0
-    log_reg._estimator_type = None
-    msg = (
+    msg_warn = (
         r"Learner provided for ml_m is probably invalid: LogisticRegressionManipulatedPredict\(\) is \(probably\) "
         "neither a regressor nor a classifier. Method predict is used for prediction."
     )
-    with pytest.warns(UserWarning, match=msg):
+    with pytest.warns(UserWarning, match=msg_warn):
         dml_plr_hidden_classifier = DoubleMLPLR(dml_data_irm, Lasso(), log_reg)
     msg = (
         r"For the binary variable d, predictions obtained with the ml_m learner LogisticRegressionManipulatedPredict\(\) "
         "are also observed to be binary with values 0 and 1. Make sure that for classifiers probabilities and not "
         "labels are predicted."
     )
-    with pytest.raises(ValueError, match=msg):
-        dml_plr_hidden_classifier.fit()
+    with pytest.warns(UserWarning, match=msg_warn):
+        with pytest.raises(ValueError, match=msg):
+            dml_plr_hidden_classifier.fit()
 
     # construct a classifier which is not identifiable as classifier via is_classifier by sklearn
     # it then predicts labels and therefore an exception will be thrown
     # whether predict() or predict_proba() is being called can also be manipulated via the unrelated max_iter variable
     log_reg = LogisticRegressionManipulatedPredict()
-    # TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0
-    log_reg._estimator_type = None
     msg = (
         r"Learner provided for ml_g is probably invalid: LogisticRegressionManipulatedPredict\(\) is \(probably\) "
         "neither a regressor nor a classifier. Method predict is used for prediction."
@@ -1151,7 +1041,12 @@ def test_doubleml_sensitivity_not_yet_implemented():
 
 @pytest.mark.ci
 def test_doubleml_sensitivity_inputs():
-    dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1)
+    dml_irm = DoubleMLIRM(
+        dml_data_irm,
+        Lasso(),
+        LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.1),
+    )
     dml_irm.fit()
 
     # test cf_y
@@ -1231,7 +1126,9 @@ def test_doubleml_sensitivity_inputs():
 
 
 def test_doubleml_sensitivity_reestimation_warning():
-    dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1)
+    dml_irm = DoubleMLIRM(
+        dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1)
+    )
     dml_irm.fit()
 
     dml_irm.sensitivity_elements["nu2"] = -1.0 * dml_irm.sensitivity_elements["nu2"]
@@ -1242,7 +1139,9 @@ def test_doubleml_sensitivity_reestimation_warning():
 
 
 def test_doubleml_sensitivity_summary():
-    dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1)
+    dml_irm = DoubleMLIRM(
+        dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1)
+    )
     msg = r"Apply sensitivity_analysis\(\) before sensitivity_summary."
     with pytest.raises(ValueError, match=msg):
         _ = dml_irm.sensitivity_summary
@@ -1250,7 +1149,9 @@ def test_doubleml_sensitivity_summary():
 
 @pytest.mark.ci
 def test_doubleml_sensitivity_benchmark():
-    dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1)
+    dml_irm = DoubleMLIRM(
+        dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1)
+    )
     dml_irm.fit()
 
     # test input
@@ -1272,7 +1173,12 @@ def test_doubleml_sensitivity_benchmark():
 
 @pytest.mark.ci
 def test_doubleml_sensitivity_plot_input():
-    dml_irm = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), trimming_threshold=0.1)
+    dml_irm = DoubleMLIRM(
+        dml_data_irm,
+        Lasso(),
+        LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.1),
+    )
     dml_irm.fit()
 
     msg = r"Apply sensitivity_analysis\(\) to include senario in sensitivity_plot. "
@@ -1411,7 +1317,9 @@ def test_doubleml_warning_blp():
 
 @pytest.mark.ci
 def test_doubleml_exception_gate():
-    dml_irm_obj = DoubleMLIRM(dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5)
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1)
+    )
     dml_irm_obj.fit()
 
     msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
@@ -1426,7 +1334,12 @@ def test_doubleml_exception_gate():
         dml_irm_obj.gate(groups=groups)
 
     dml_irm_obj = DoubleMLIRM(
-        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE"
+        dml_data_irm,
+        ml_g=Lasso(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.1),
+        n_folds=5,
+        score="ATTE",
     )
     dml_irm_obj.fit()
     groups = pd.DataFrame(np.random.choice([True, False], size=dml_data_irm.n_obs))
@@ -1435,7 +1348,13 @@ def test_doubleml_exception_gate():
         dml_irm_obj.gate(groups=groups)
 
     dml_irm_obj = DoubleMLIRM(
-        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2
+        dml_data_irm,
+        ml_g=Lasso(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.1),
+        n_folds=5,
+        score="ATE",
+        n_rep=2,
     )
     dml_irm_obj.fit()
 
@@ -1447,7 +1366,12 @@ def test_doubleml_exception_gate():
 @pytest.mark.ci
 def test_doubleml_exception_cate():
     dml_irm_obj = DoubleMLIRM(
-        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE"
+        dml_data_irm,
+        ml_g=Lasso(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
+        n_folds=5,
+        score="ATTE",
     )
     dml_irm_obj.fit()
 
@@ -1456,7 +1380,13 @@ def test_doubleml_exception_cate():
         dml_irm_obj.cate(basis=2)
 
     dml_irm_obj = DoubleMLIRM(
-        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2
+        dml_data_irm,
+        ml_g=Lasso(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
+        n_folds=5,
+        score="ATE",
+        n_rep=2,
     )
     dml_irm_obj.fit()
     msg = "Only implemented for one repetition. Number of repetitions is 2."
@@ -1504,7 +1434,12 @@ def test_doubleml_exception_plr_gate():
 @pytest.mark.ci
 def test_double_ml_exception_evaluate_learner():
     dml_irm_obj = DoubleMLIRM(
-        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE"
+        dml_data_irm,
+        ml_g=Lasso(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
+        n_folds=5,
+        score="ATTE",
     )
 
     msg = r"Apply fit\(\) before evaluate_learners\(\)."
@@ -1532,7 +1467,13 @@ def eval_fct(y_pred, y_true):
 
 @pytest.mark.ci
 def test_doubleml_exception_policytree():
-    dml_irm_obj = DoubleMLIRM(dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5)
+    dml_irm_obj = DoubleMLIRM(
+        dml_data_irm,
+        ml_g=Lasso(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
+        n_folds=5,
+    )
     dml_irm_obj.fit()
 
     msg = "Covariates must be of DataFrame type. Covariates of type <class 'int'> was passed."
@@ -1546,7 +1487,12 @@ def test_doubleml_exception_policytree():
         dml_irm_obj.policy_tree(features=pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3))), depth=0.1)
 
     dml_irm_obj = DoubleMLIRM(
-        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATTE"
+        dml_data_irm,
+        ml_g=Lasso(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
+        n_folds=5,
+        score="ATTE",
     )
     dml_irm_obj.fit()
 
@@ -1555,7 +1501,13 @@ def test_doubleml_exception_policytree():
         dml_irm_obj.policy_tree(features=2, depth=1)
 
     dml_irm_obj = DoubleMLIRM(
-        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2
+        dml_data_irm,
+        ml_g=Lasso(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
+        n_folds=5,
+        score="ATE",
+        n_rep=2,
     )
     dml_irm_obj.fit()
     msg = "Only implemented for one repetition. Number of repetitions is 2."
@@ -1566,7 +1518,13 @@ def test_doubleml_exception_policytree():
 @pytest.mark.ci
 def test_double_ml_external_predictions():
     dml_irm_obj = DoubleMLIRM(
-        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=2
+        dml_data_irm,
+        ml_g=Lasso(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
+        n_folds=5,
+        score="ATE",
+        n_rep=2,
     )
 
     msg = "external_predictions must be a dictionary. ml_m of type <class 'str'> was passed."
@@ -1574,7 +1532,13 @@ def test_double_ml_external_predictions():
         dml_irm_obj.fit(external_predictions="ml_m")
 
     dml_irm_obj = DoubleMLIRM(
-        dml_data_irm, ml_g=Lasso(), ml_m=LogisticRegression(), trimming_threshold=0.05, n_folds=5, score="ATE", n_rep=1
+        dml_data_irm,
+        ml_g=Lasso(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
+        n_folds=5,
+        score="ATE",
+        n_rep=1,
     )
 
     predictions = {"d": "test", "d_f": "test"}
diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index 3bec652f..e560bd41 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -66,9 +66,10 @@ class PSProcessor:
     Examples
     --------
     >>> import numpy as np
+    >>> from doubleml.utils import PSProcessor
     >>> ps = np.array([0.001, 0.2, 0.5, 0.8, 0.999])
     >>> treatment = np.array([0, 1, 1, 0, 1])
-    >>> processor = PropensityScoreProcessor(clipping_threshold=0.01)
+    >>> processor = PSProcessor(clipping_threshold=0.01)
     >>> adjusted = processor.adjust_ps(ps, treatment)
     >>> print(np.round(adjusted, 3))
     [0.01 0.2  0.5  0.8  0.99]
diff --git a/pytest.ini b/pytest.ini
index 3582830c..f7125f42 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -15,3 +15,4 @@ filterwarnings =
     ignore:.*Sensitivity analysis not implemented for callable scores.*:UserWarning
     ignore:.*Subsample has not common support. Results are based on adjusted propensities.*:UserWarning
     ignore:.*Treatment probability within bandwidth left from cutoff higher than right from cutoff.\nTreatment assignment might be based on the wrong side of the cutoff.*:UserWarning
+    ignore:.*The estimated nu2 for d is not positive.*:UserWarning

From 07e5c279c0dcd1c450c2f5c18318e360a297fcdd Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 12:32:21 +0100
Subject: [PATCH 26/38] add ps_processor to did binary rename trimming to
 clipping in did

---
 doubleml/did/did.py                           | 31 +++------
 doubleml/did/did_binary.py                    | 63 ++++++++++++-----
 doubleml/did/tests/_utils_did_manual.py       |  8 +--
 doubleml/did/tests/test_did.py                |  8 +--
 .../did/tests/test_did_binary_ps_processor.py | 67 +++++++++++++++++++
 .../did/tests/test_did_binary_vs_did_panel.py |  8 ++-
 .../test_did_binary_vs_did_two_period.py      |  9 +--
 7 files changed, 140 insertions(+), 54 deletions(-)
 create mode 100644 doubleml/did/tests/test_did_binary_ps_processor.py

diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index 9307ae78..50270e60 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -7,9 +7,8 @@
 from doubleml.data.did_data import DoubleMLDIDData
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_score_mixins import LinearScoreMixin
-from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
+from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
-from doubleml.utils._propensity_score import _trimm
 
 
 class DoubleMLDID(LinearScoreMixin, DoubleML):
@@ -50,12 +49,8 @@ class DoubleMLDID(LinearScoreMixin, DoubleML):
         Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
         Default is ``True``.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
-
-    trimming_threshold : float
-        The threshold used for trimming.
+    clipping_threshold : float
+        The threshold used for clipping.
         Default is ``1e-2``.
 
     draw_sample_splitting : bool
@@ -89,8 +84,7 @@ def __init__(
         n_rep=1,
         score="observational",
         in_sample_normalization=True,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        clipping_threshold=1e-2,
         draw_sample_splitting=True,
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
@@ -142,9 +136,7 @@ def __init__(
             self._predict_method["ml_m"] = "predict_proba"
         self._initialize_ml_nuisance_params()
 
-        self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._clipping_threshold = clipping_threshold
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
 
@@ -156,18 +148,11 @@ def in_sample_normalization(self):
         return self._in_sample_normalization
 
     @property
-    def trimming_rule(self):
-        """
-        Specifies the used trimming rule.
-        """
-        return self._trimming_rule
-
-    @property
-    def trimming_threshold(self):
+    def clipping_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        return self._clipping_threshold
 
     def _initialize_ml_nuisance_params(self):
         if self.score == "observational":
@@ -271,7 +256,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 )
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
             _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
-            m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+            m_hat["preds"] = np.clip(m_hat["preds"], self.clipping_threshold, 1 - self.clipping_threshold)
 
         # nuisance estimates of the uncond. treatment prob.
         p_hat = np.full_like(d, d.mean(), dtype="float64")
diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py
index 6e3a95f2..3e94ff13 100644
--- a/doubleml/did/did_binary.py
+++ b/doubleml/did/did_binary.py
@@ -1,4 +1,5 @@
 import warnings
+from typing import Optional
 
 import numpy as np
 from sklearn.utils import check_X_y
@@ -19,14 +20,13 @@
 from doubleml.utils._checks import (
     _check_bool,
     _check_finite_predictions,
-    _check_is_propensity,
     _check_score,
-    _check_trimming,
 )
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
-from doubleml.utils._propensity_score import _trimm
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
 class DoubleMLDIDBinary(LinearScoreMixin, DoubleML):
     """Double machine learning for difference-in-differences models with panel data (binary setting in terms of group and time
      combinations).
@@ -83,13 +83,16 @@ class DoubleMLDIDBinary(LinearScoreMixin, DoubleML):
         Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
         Default is ``True``.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     draw_sample_splitting : bool
         Indicates whether the sample splitting should be drawn during initialization of the object.
@@ -115,8 +118,9 @@ def __init__(
         n_rep=1,
         score="observational",
         in_sample_normalization=True,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
         print_periods=False,
     ):
@@ -232,9 +236,12 @@ def __init__(
             self._predict_method["ml_m"] = "predict_proba"
         self._initialize_ml_nuisance_params()
 
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
@@ -321,19 +328,44 @@ def in_sample_normalization(self):
         """
         return self._in_sample_normalization
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     @property
     def n_obs_subset(self):
@@ -500,8 +532,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     return_models=return_models,
                 )
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
-            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
-            m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+            m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m")
 
         # nuisance estimates of the uncond. treatment prob.
         p_hat = np.full_like(d, d.mean(), dtype="float64")
diff --git a/doubleml/did/tests/_utils_did_manual.py b/doubleml/did/tests/_utils_did_manual.py
index b067e44d..f0713332 100644
--- a/doubleml/did/tests/_utils_did_manual.py
+++ b/doubleml/did/tests/_utils_did_manual.py
@@ -18,7 +18,7 @@ def fit_did(
     g0_params=None,
     g1_params=None,
     m_params=None,
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
 ):
     n_obs = len(y)
 
@@ -44,7 +44,7 @@ def fit_did(
             g0_params=g0_params,
             g1_params=g1_params,
             m_params=m_params,
-            trimming_threshold=trimming_threshold,
+            clipping_threshold=clipping_threshold,
         )
 
         all_g_hat0.append(g_hat0_list)
@@ -83,7 +83,7 @@ def fit_did(
 
 
 def fit_nuisance_did(
-    y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, trimming_threshold=1e-12
+    y, x, d, learner_g, learner_m, smpls, score, g0_params=None, g1_params=None, m_params=None, clipping_threshold=1e-12
 ):
     ml_g0 = clone(learner_g)
     ml_g1 = clone(learner_g)
@@ -101,7 +101,7 @@ def fit_nuisance_did(
     else:
         assert score == "observational"
         ml_m = clone(learner_m)
-        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
+        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold)
 
     p_hat_list = []
     for _ in smpls:
diff --git a/doubleml/did/tests/test_did.py b/doubleml/did/tests/test_did.py
index 79feb110..a476adb4 100644
--- a/doubleml/did/tests/test_did.py
+++ b/doubleml/did/tests/test_did.py
@@ -37,12 +37,12 @@ def in_sample_normalization(request):
 
 
 @pytest.fixture(scope="module", params=[0.1])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, trimming_threshold):
+def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization, clipping_threshold):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -68,7 +68,7 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization,
         score=score,
         in_sample_normalization=in_sample_normalization,
         draw_sample_splitting=False,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     # synchronize the sample splitting
@@ -85,7 +85,7 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization,
         all_smpls,
         score,
         in_sample_normalization,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     res_dict = {
diff --git a/doubleml/did/tests/test_did_binary_ps_processor.py b/doubleml/did/tests/test_did_binary_ps_processor.py
new file mode 100644
index 00000000..226e6932
--- /dev/null
+++ b/doubleml/did/tests/test_did_binary_ps_processor.py
@@ -0,0 +1,67 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml.did import DoubleMLDIDBinary
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "ps_config",
+    [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ],
+)
+def test_did_binary_ml_m_predictions_ps_processor(generate_data_did_binary, ps_config):
+    dml_data = generate_data_did_binary
+    np.random.seed(3141)
+    dml_did = DoubleMLDIDBinary(
+        obj_dml_data=dml_data,
+        g_value=1,
+        t_value_pre=0,
+        t_value_eval=1,
+        ml_g=LinearRegression(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=ps_config,
+        n_rep=1,
+        score="observational",
+    )
+    dml_did.fit(store_predictions=True)
+    ml_m_preds = dml_did.predictions["ml_m"][:, 0, 0]
+    # Just check that predictions are within [clipping_threshold, 1-clipping_threshold]
+    assert np.all(ml_m_preds >= ps_config.clipping_threshold)
+    assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold)
+
+
+@pytest.mark.ci
+def test_did_binary_ml_m_predictions_ps_processor_differences(generate_data_did_binary):
+    dml_data = generate_data_did_binary
+    np.random.seed(3141)
+    configs = [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ]
+    preds = []
+    for cfg in configs:
+        dml_did = DoubleMLDIDBinary(
+            obj_dml_data=dml_data,
+            g_value=1,
+            t_value_pre=0,
+            t_value_eval=1,
+            ml_g=LinearRegression(),
+            ml_m=LogisticRegression(),
+            ps_processor_config=cfg,
+            n_rep=1,
+            score="observational",
+        )
+        dml_did.fit(store_predictions=True)
+        preds.append(dml_did.predictions["ml_m"][:, 0, 0])
+    # Check that at least two configurations yield different predictions (element-wise)
+    diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))]
+    assert any(diffs)
diff --git a/doubleml/did/tests/test_did_binary_vs_did_panel.py b/doubleml/did/tests/test_did_binary_vs_did_panel.py
index 2eddccaf..9abee475 100644
--- a/doubleml/did/tests/test_did_binary_vs_did_panel.py
+++ b/doubleml/did/tests/test_did_binary_vs_did_panel.py
@@ -9,6 +9,7 @@
 import doubleml as dml
 from doubleml.did.datasets import make_did_CS2021
 from doubleml.did.utils._did_utils import _get_id_positions
+from doubleml.utils import PSProcessorConfig
 
 
 @pytest.fixture(
@@ -36,7 +37,7 @@ def in_sample_normalization(request):
 
 
 @pytest.fixture(scope="module", params=[0.1])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -46,7 +47,7 @@ def time_type(request):
 
 
 @pytest.fixture(scope="module")
-def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold):
+def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, clipping_threshold):
     n_obs = 500
     dpg = 1
 
@@ -65,7 +66,6 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza
         "n_folds": 3,
         "score": score,
         "in_sample_normalization": in_sample_normalization,
-        "trimming_threshold": trimming_threshold,
         "draw_sample_splitting": True,
     }
 
@@ -74,6 +74,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza
         g_value=dml_panel_data.g_values[0],
         t_value_pre=dml_panel_data.t_values[0],
         t_value_eval=dml_panel_data.t_values[1],
+        ps_processor_config=PSProcessorConfig(clipping_threshold=clipping_threshold),
         **dml_args,
     )
     dml_did_binary_obj.fit()
@@ -82,6 +83,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza
     dml_data = dml.data.DoubleMLDIDData(df_wide, y_col="y_diff", d_cols="G_indicator", x_cols=["Z1", "Z2", "Z3", "Z4"])
     dml_did_obj = dml.DoubleMLDID(
         dml_data,
+        clipping_threshold=clipping_threshold,
         **dml_args,
     )
 
diff --git a/doubleml/did/tests/test_did_binary_vs_did_two_period.py b/doubleml/did/tests/test_did_binary_vs_did_two_period.py
index 74575664..25416a20 100644
--- a/doubleml/did/tests/test_did_binary_vs_did_two_period.py
+++ b/doubleml/did/tests/test_did_binary_vs_did_two_period.py
@@ -37,12 +37,12 @@ def in_sample_normalization(request):
 
 
 @pytest.fixture(scope="module", params=[0.1])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_sample_normalization, trimming_threshold):
+def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_sample_normalization, clipping_threshold):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -68,7 +68,6 @@ def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_s
         "n_folds": n_folds,
         "score": score,
         "in_sample_normalization": in_sample_normalization,
-        "trimming_threshold": trimming_threshold,
         "draw_sample_splitting": False,
     }
 
@@ -77,11 +76,13 @@ def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_s
         g_value=1,
         t_value_pre=0,
         t_value_eval=1,
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
         **dml_args,
     )
 
     dml_did_obj = dml.DoubleMLDID(
         obj_dml_data,
+        clipping_threshold=clipping_threshold,
         **dml_args,
     )
 
@@ -107,7 +108,7 @@ def dml_did_binary_vs_did_fixture(generate_data_did_binary, learner, score, in_s
         all_smpls,
         score,
         in_sample_normalization,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     res_dict = {

From 118bc8538acf6a27eca7d1b9fad98681ed6cc094 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 13:29:25 +0100
Subject: [PATCH 27/38] add ps_processor to did_cs_binary and clipping to
 did_cs

---
 doubleml/did/did_cs.py                        | 39 ++++--------
 doubleml/did/did_cs_binary.py                 | 63 ++++++++++++++-----
 doubleml/did/tests/_utils_did_cs_manual.py    |  8 +--
 doubleml/did/tests/test_did_cs.py             |  8 +--
 .../test_did_cs_binary_vs_did_cs_panel.py     |  7 ++-
 ...test_did_cs_binary_vs_did_cs_two_period.py |  9 +--
 6 files changed, 76 insertions(+), 58 deletions(-)

diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index 11c467b5..706cfd8e 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -7,9 +7,8 @@
 from doubleml.data.did_data import DoubleMLDIDData
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_score_mixins import LinearScoreMixin
-from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score, _check_trimming
+from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d
-from doubleml.utils._propensity_score import _trimm
 
 
 class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
@@ -50,12 +49,8 @@ class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
         Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
         Default is ``True``.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
-
-    trimming_threshold : float
-        The threshold used for trimming.
+    clipping_threshold : float
+        The threshold used for clipping.
         Default is ``1e-2``.
 
     draw_sample_splitting : bool
@@ -87,8 +82,7 @@ def __init__(
         n_rep=1,
         score="observational",
         in_sample_normalization=True,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        clipping_threshold=1e-2,
         draw_sample_splitting=True,
     ):
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
@@ -140,10 +134,7 @@ def __init__(
             self._predict_method["ml_m"] = "predict_proba"
         self._initialize_ml_nuisance_params()
 
-        self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
-
+        self._clipping_threshold = clipping_threshold
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
 
@@ -155,18 +146,11 @@ def in_sample_normalization(self):
         return self._in_sample_normalization
 
     @property
-    def trimming_rule(self):
+    def clipping_threshold(self):
         """
-        Specifies the used trimming rule.
+        Specifies the used clipping threshold.
         """
-        return self._trimming_rule
-
-    @property
-    def trimming_threshold(self):
-        """
-        Specifies the used trimming threshold.
-        """
-        return self._trimming_threshold
+        return self._clipping_threshold
 
     def _initialize_ml_nuisance_params(self):
         if self.score == "observational":
@@ -312,9 +296,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     method=self._predict_method["ml_m"],
                     return_models=return_models,
                 )
-                _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
-                _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
-            m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+
+            _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
+            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
+            m_hat["preds"] = np.clip(m_hat["preds"], self.clipping_threshold, 1 - self.clipping_threshold)
 
         psi_a, psi_b = self._score_elements(
             y,
diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index 5375011d..c547ff40 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -1,4 +1,5 @@
 import warnings
+from typing import Optional
 
 import numpy as np
 from sklearn.utils import check_X_y
@@ -19,14 +20,13 @@
 from doubleml.utils._checks import (
     _check_bool,
     _check_finite_predictions,
-    _check_is_propensity,
     _check_score,
-    _check_trimming,
 )
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d
-from doubleml.utils._propensity_score import _trimm
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
 class DoubleMLDIDCSBinary(LinearScoreMixin, DoubleML):
     """Double machine learning for difference-in-differences models with repeated cross sections
     (binary setting in terms of group and time combinations).
@@ -83,13 +83,16 @@ class DoubleMLDIDCSBinary(LinearScoreMixin, DoubleML):
         Indicates whether to use a slightly different normalization from Sant'Anna and Zhao (2020).
         Default is ``True``.
 
-    trimming_rule : str
-        A str (``'truncate'`` is the only choice) specifying the trimming approach.
-        Default is ``'truncate'``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     draw_sample_splitting : bool
         Indicates whether the sample splitting should be drawn during initialization of the object.
@@ -115,8 +118,9 @@ def __init__(
         n_rep=1,
         score="observational",
         in_sample_normalization=True,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
         print_periods=False,
     ):
@@ -221,9 +225,12 @@ def __init__(
             self._predict_method["ml_m"] = "predict_proba"
         self._initialize_ml_nuisance_params()
 
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         self._sensitivity_implemented = True
         self._external_predictions_implemented = True
@@ -312,19 +319,44 @@ def in_sample_normalization(self):
         """
         return self._in_sample_normalization
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     @property
     def n_obs_subset(self):
@@ -480,8 +512,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 )
 
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
-            _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
-            m_hat["preds"] = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
+            m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m")
 
         psi_a, psi_b = self._score_elements(
             y,
diff --git a/doubleml/did/tests/_utils_did_cs_manual.py b/doubleml/did/tests/_utils_did_cs_manual.py
index ce6f8870..0353efef 100644
--- a/doubleml/did/tests/_utils_did_cs_manual.py
+++ b/doubleml/did/tests/_utils_did_cs_manual.py
@@ -21,7 +21,7 @@ def fit_did_cs(
     g_d1_t0_params=None,
     g_d1_t1_params=None,
     m_params=None,
-    trimming_threshold=1e-2,
+    clipping_threshold=1e-2,
 ):
     n_obs = len(y)
 
@@ -54,7 +54,7 @@ def fit_did_cs(
                 g_d1_t0_params=g_d1_t0_params,
                 g_d1_t1_params=g_d1_t1_params,
                 m_params=m_params,
-                trimming_threshold=trimming_threshold,
+                clipping_threshold=clipping_threshold,
             )
         )
 
@@ -149,7 +149,7 @@ def fit_nuisance_did_cs(
     g_d1_t0_params=None,
     g_d1_t1_params=None,
     m_params=None,
-    trimming_threshold=1e-12,
+    clipping_threshold=1e-12,
 ):
     ml_g_d0_t0 = clone(learner_g)
     ml_g_d0_t1 = clone(learner_g)
@@ -169,7 +169,7 @@ def fit_nuisance_did_cs(
     g_hat_d1_t1_list = fit_predict(y, x, ml_g_d1_t1, g_d1_t1_params, smpls, train_cond=train_cond_d1_t1)
     if score == "observational":
         ml_m = clone(learner_m)
-        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, trimming_threshold=trimming_threshold)
+        m_hat_list = fit_predict_proba(d, x, ml_m, m_params, smpls, clipping_threshold=clipping_threshold)
     else:
         assert score == "experimental"
         m_hat_list = list()
diff --git a/doubleml/did/tests/test_did_cs.py b/doubleml/did/tests/test_did_cs.py
index bc8e2da6..bc56def2 100644
--- a/doubleml/did/tests/test_did_cs.py
+++ b/doubleml/did/tests/test_did_cs.py
@@ -38,12 +38,12 @@ def in_sample_normalization(request):
 
 
 @pytest.fixture(scope="module", params=[0.1])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normalization, trimming_threshold):
+def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normalization, clipping_threshold):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -70,7 +70,7 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza
         score=score,
         in_sample_normalization=in_sample_normalization,
         draw_sample_splitting=False,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     # synchronize the sample splitting
@@ -88,7 +88,7 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza
         all_smpls,
         score,
         in_sample_normalization,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     res_dict = {
diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py
index da7db085..eaedba49 100644
--- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py
+++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_panel.py
@@ -36,7 +36,7 @@ def in_sample_normalization(request):
 
 
 @pytest.fixture(scope="module", params=[0.1])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -46,7 +46,7 @@ def time_type(request):
 
 
 @pytest.fixture(scope="module")
-def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold):
+def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normalization, clipping_threshold):
     n_obs = 500
     dpg = 1
 
@@ -62,7 +62,6 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza
         "n_folds": 3,
         "score": score,
         "in_sample_normalization": in_sample_normalization,
-        "trimming_threshold": trimming_threshold,
         "draw_sample_splitting": True,
     }
 
@@ -71,6 +70,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza
         g_value=dml_panel_data.g_values[0],
         t_value_pre=dml_panel_data.t_values[0],
         t_value_eval=dml_panel_data.t_values[1],
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
         **dml_args,
     )
     dml_did_binary_obj.fit()
@@ -81,6 +81,7 @@ def dml_did_binary_vs_did_fixture(time_type, learner, score, in_sample_normaliza
     )
     dml_did_obj = dml.DoubleMLDIDCS(
         dml_data,
+        clipping_threshold=clipping_threshold,
         **dml_args,
     )
 
diff --git a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
index b9e267ce..a9ba726e 100644
--- a/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
+++ b/doubleml/did/tests/test_did_cs_binary_vs_did_cs_two_period.py
@@ -38,12 +38,12 @@ def in_sample_normalization(request):
 
 
 @pytest.fixture(scope="module", params=[0.1])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score, in_sample_normalization, trimming_threshold):
+def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score, in_sample_normalization, clipping_threshold):
     boot_methods = ["normal"]
     n_folds = 2
     n_rep_boot = 499
@@ -70,7 +70,6 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score
         "n_folds": n_folds,
         "score": score,
         "in_sample_normalization": in_sample_normalization,
-        "trimming_threshold": trimming_threshold,
         "draw_sample_splitting": False,
     }
 
@@ -79,11 +78,13 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score
         g_value=1,
         t_value_pre=0,
         t_value_eval=1,
+        ps_processor_config=dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
         **dml_args,
     )
 
     dml_did_obj = dml.DoubleMLDIDCS(
         obj_dml_data,
+        clipping_threshold=clipping_threshold,
         **dml_args,
     )
 
@@ -111,7 +112,7 @@ def dml_did_cs_binary_vs_did_cs_fixture(generate_data_did_binary, learner, score
         all_smpls,
         score,
         in_sample_normalization,
-        trimming_threshold=trimming_threshold,
+        clipping_threshold=clipping_threshold,
     )
 
     res_dict = {

From 5fbfa26534fa547bbacb20c6da53e678f0f74b11 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 13:30:01 +0100
Subject: [PATCH 28/38] update clipping terminology and enhance ps_processor
 usage in DID classes

---
 doubleml/did/did.py        | 3 ++-
 doubleml/did/did_binary.py | 1 +
 doubleml/irm/irm.py        | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index 50270e60..5ca19a93 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -150,7 +150,7 @@ def in_sample_normalization(self):
     @property
     def clipping_threshold(self):
         """
-        Specifies the used trimming threshold.
+        Specifies the used clipping threshold.
         """
         return self._clipping_threshold
 
@@ -254,6 +254,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     method=self._predict_method["ml_m"],
                     return_models=return_models,
                 )
+
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
             _check_is_propensity(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls, eps=1e-12)
             m_hat["preds"] = np.clip(m_hat["preds"], self.clipping_threshold, 1 - self.clipping_threshold)
diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py
index 3e94ff13..d92ebf19 100644
--- a/doubleml/did/did_binary.py
+++ b/doubleml/did/did_binary.py
@@ -531,6 +531,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     method=self._predict_method["ml_m"],
                     return_models=return_models,
                 )
+
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
             m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m")
 
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index e880c48e..01d288bd 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -362,7 +362,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             )
             _check_finite_predictions(m_hat["preds"], self._learner["ml_m"], "ml_m", smpls)
 
-        m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls)
+        m_hat["preds"] = self._ps_processor.adjust_ps(m_hat["preds"], d, cv=smpls, learner_name="ml_m")
 
         psi_a, psi_b = self._score_elements(y, d, g_hat0["preds"], g_hat1["preds"], m_hat["preds"], smpls)
         psi_elements = {"psi_a": psi_a, "psi_b": psi_b}

From 474b81fe70f77a78b8171647290861fa61ad23ef Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 13:34:34 +0100
Subject: [PATCH 29/38] add ps_processor test for did_cs_binary

---
 .../tests/test_did_cs_binary_ps_processor.py  | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 doubleml/did/tests/test_did_cs_binary_ps_processor.py

diff --git a/doubleml/did/tests/test_did_cs_binary_ps_processor.py b/doubleml/did/tests/test_did_cs_binary_ps_processor.py
new file mode 100644
index 00000000..1594ee93
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_binary_ps_processor.py
@@ -0,0 +1,67 @@
+import numpy as np
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml.did import DoubleMLDIDCSBinary
+from doubleml.utils.propensity_score_processing import PSProcessorConfig
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "ps_config",
+    [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ],
+)
+def test_did_cs_binary_ml_m_predictions_ps_processor(generate_data_did_binary, ps_config):
+    dml_data = generate_data_did_binary
+    np.random.seed(3141)
+    dml_did = DoubleMLDIDCSBinary(
+        obj_dml_data=dml_data,
+        g_value=1,
+        t_value_pre=0,
+        t_value_eval=1,
+        ml_g=LinearRegression(),
+        ml_m=LogisticRegression(),
+        ps_processor_config=ps_config,
+        n_rep=1,
+        score="observational",
+    )
+    dml_did.fit(store_predictions=True)
+    ml_m_preds = dml_did.predictions["ml_m"][:, 0, 0]
+    # Just check that predictions are within [clipping_threshold, 1-clipping_threshold]
+    assert np.all(ml_m_preds >= ps_config.clipping_threshold)
+    assert np.all(ml_m_preds <= 1 - ps_config.clipping_threshold)
+
+
+@pytest.mark.ci
+def test_did_cs_binary_ml_m_predictions_ps_processor_differences(generate_data_did_binary):
+    dml_data = generate_data_did_binary
+    np.random.seed(3141)
+    configs = [
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=0.05, calibration_method=None, cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=False),
+        PSProcessorConfig(clipping_threshold=1e-2, calibration_method="isotonic", cv_calibration=True),
+    ]
+    preds = []
+    for cfg in configs:
+        dml_did = DoubleMLDIDCSBinary(
+            obj_dml_data=dml_data,
+            g_value=1,
+            t_value_pre=0,
+            t_value_eval=1,
+            ml_g=LinearRegression(),
+            ml_m=LogisticRegression(),
+            ps_processor_config=cfg,
+            n_rep=1,
+            score="observational",
+        )
+        dml_did.fit(store_predictions=True)
+        preds.append(dml_did.predictions["ml_m"][:, 0, 0])
+    # Check that at least two configurations yield different predictions (element-wise)
+    diffs = [not np.allclose(preds[i], preds[j], atol=1e-6) for i in range(len(preds)) for j in range(i + 1, len(preds))]
+    assert any(diffs)

From adfe40d91eac15f433af0bb87130281b5b0ce33c Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 14:00:22 +0100
Subject: [PATCH 30/38] add ps_processor to did_multi

---
 doubleml/did/did_multi.py                     | 63 ++++++++++++++-----
 .../test_did_multi_aggregation_single_gt.py   |  6 +-
 .../did/tests/test_did_multi_exceptions.py    | 16 -----
 .../did/tests/test_did_multi_vs_binary.py     |  6 +-
 .../did/tests/test_did_multi_vs_cs_binary.py  |  6 +-
 5 files changed, 57 insertions(+), 40 deletions(-)

diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py
index 66e7f837..a9e9e790 100644
--- a/doubleml/did/did_multi.py
+++ b/doubleml/did/did_multi.py
@@ -1,5 +1,6 @@
 import copy
 import warnings
+from typing import Optional
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -33,11 +34,13 @@
 from doubleml.did.utils._plot import add_jitter
 from doubleml.double_ml import DoubleML
 from doubleml.double_ml_framework import concat
-from doubleml.utils._checks import _check_bool, _check_score, _check_trimming
+from doubleml.utils._checks import _check_bool, _check_score
 from doubleml.utils._descriptive import generate_summary
 from doubleml.utils.gain_statistics import gain_statistics
+from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
+# TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
 class DoubleMLDIDMulti:
     """Double machine learning for multi-period difference-in-differences models.
 
@@ -96,13 +99,16 @@ class DoubleMLDIDMulti:
         A str (``'truncate'`` is the only choice) specifying the trimming approach.
         Default is ``'truncate'``.
 
-    trimming_threshold : float
-        The threshold used for trimming.
-        Default is ``1e-2``.
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use `ps_processor_config` instead. Will be removed in a future version.
 
-    draw_sample_splitting : bool
-        Indicates whether the sample splitting should be drawn during initialization.
-        Default is ``True``.
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use `ps_processor_config` instead. Will be removed in a future version.
+
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
 
     print_periods : bool
         Indicates whether to print information about the evaluated periods.
@@ -165,8 +171,9 @@ def __init__(
         score="observational",
         panel=True,
         in_sample_normalization=True,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
+        trimming_rule="truncate",  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        trimming_threshold=1e-2,  # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        ps_processor_config: Optional[PSProcessorConfig] = None,
         draw_sample_splitting=True,
         print_periods=False,
     ):
@@ -214,10 +221,12 @@ def __init__(
         # initialize framework which is constructed after the fit method is called
         self._framework = None
 
-        # initialize and check trimming
+        # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
+        self._ps_processor_config, self._ps_processor = init_ps_processor(
+            ps_processor_config, trimming_rule, trimming_threshold
+        )
         self._trimming_rule = trimming_rule
-        self._trimming_threshold = trimming_threshold
-        _check_trimming(self._trimming_rule, self._trimming_threshold)
+        self._trimming_threshold = self._ps_processor.clipping_threshold
 
         ml_g_is_classifier = DoubleML._check_learner(ml_g, "ml_g", regressor=True, classifier=True)
         if self.score == "observational":
@@ -378,19 +387,44 @@ def in_sample_normalization(self):
         """
         return self._in_sample_normalization
 
+    @property
+    def ps_processor_config(self):
+        """
+        Configuration for propensity score processing (clipping, calibration, etc.).
+        """
+        return self._ps_processor_config
+
+    @property
+    def ps_processor(self):
+        """
+        Propensity score processor.
+        """
+        return self._ps_processor
+
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_rule(self):
         """
         Specifies the used trimming rule.
         """
+        warnings.warn(
+            "'trimming_rule' is deprecated and will be removed in a future version. ", DeprecationWarning, stacklevel=2
+        )
         return self._trimming_rule
 
+    # TODO [v0.12.0]: Remove support for 'trimming_rule' and 'trimming_threshold' (deprecated).
     @property
     def trimming_threshold(self):
         """
         Specifies the used trimming threshold.
         """
-        return self._trimming_threshold
+        warnings.warn(
+            "'trimming_threshold' is deprecated and will be removed in a future version. "
+            "Use 'ps_processor_config.clipping_threshold' or 'ps_processor.clipping_threshold' instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self._ps_processor.clipping_threshold
 
     @property
     def n_folds(self):
@@ -1355,8 +1389,7 @@ def _initialize_models(self):
             "score": self.score,
             "n_folds": self.n_folds,
             "n_rep": self.n_rep,
-            "trimming_rule": self.trimming_rule,
-            "trimming_threshold": self.trimming_threshold,
+            "ps_processor_config": self.ps_processor_config,
             "in_sample_normalization": self.in_sample_normalization,
             "draw_sample_splitting": True,
             "print_periods": self._print_periods,
diff --git a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py
index a6ffcd49..ede8ed74 100644
--- a/doubleml/did/tests/test_did_multi_aggregation_single_gt.py
+++ b/doubleml/did/tests/test_did_multi_aggregation_single_gt.py
@@ -38,7 +38,7 @@ def in_sample_normalization(request):
 
 
 @pytest.fixture(scope="module", params=[0.1])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -48,7 +48,7 @@ def time_type(request):
 
 
 @pytest.fixture(scope="module")
-def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_sample_normalization, trimming_threshold):
+def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_sample_normalization, clipping_threshold):
     n_obs = 500
     dpg = 1
 
@@ -63,7 +63,7 @@ def dml_single_gt_aggregation(aggregation, time_type, learner, score, panel, in_
         "score": score,
         "panel": panel,
         "in_sample_normalization": in_sample_normalization,
-        "trimming_threshold": trimming_threshold,
+        "ps_processor_config": dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
         "draw_sample_splitting": True,
     }
     gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[3])]
diff --git a/doubleml/did/tests/test_did_multi_exceptions.py b/doubleml/did/tests/test_did_multi_exceptions.py
index c53d79d3..a9e432a5 100644
--- a/doubleml/did/tests/test_did_multi_exceptions.py
+++ b/doubleml/did/tests/test_did_multi_exceptions.py
@@ -62,22 +62,6 @@ def test_input():
         invalid_arguments = {"score": "test"}
         _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments))
 
-    # trimming
-    msg = "Invalid trimming_rule discard. Valid trimming_rule truncate."
-    with pytest.raises(ValueError, match=msg):
-        invalid_arguments = {"trimming_rule": "discard"}
-        _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments))
-
-    msg = "trimming_threshold has to be a float. Object of type <class 'str'> passed."
-    with pytest.raises(TypeError, match=msg):
-        invalid_arguments = {"trimming_threshold": "test"}
-        _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments))
-
-    msg = "Invalid trimming_threshold 0.6. trimming_threshold has to be between 0 and 0.5."
-    with pytest.raises(ValueError, match=msg):
-        invalid_arguments = {"trimming_threshold": 0.6}
-        _ = dml.did.DoubleMLDIDMulti(**(valid_arguments | invalid_arguments))
-
 
 @pytest.mark.ci
 def test_exception_learners():
diff --git a/doubleml/did/tests/test_did_multi_vs_binary.py b/doubleml/did/tests/test_did_multi_vs_binary.py
index 15d3fd0c..86cb2ae4 100644
--- a/doubleml/did/tests/test_did_multi_vs_binary.py
+++ b/doubleml/did/tests/test_did_multi_vs_binary.py
@@ -35,7 +35,7 @@ def in_sample_normalization(request):
 
 
 @pytest.fixture(scope="module", params=[0.1])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -45,7 +45,7 @@ def time_type(request):
 
 
 @pytest.fixture(scope="module")
-def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_normalization, trimming_threshold):
+def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_normalization, clipping_threshold):
     n_obs = 500
     dpg = 1
     boot_methods = ["normal"]
@@ -61,7 +61,7 @@ def dml_did_binary_vs_did_multi_fixture(time_type, learner, score, in_sample_nor
         "n_folds": 3,
         "score": score,
         "in_sample_normalization": in_sample_normalization,
-        "trimming_threshold": trimming_threshold,
+        "ps_processor_config": dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
         "draw_sample_splitting": True,
     }
     gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[1])]
diff --git a/doubleml/did/tests/test_did_multi_vs_cs_binary.py b/doubleml/did/tests/test_did_multi_vs_cs_binary.py
index 7af8d74d..a658aeee 100644
--- a/doubleml/did/tests/test_did_multi_vs_cs_binary.py
+++ b/doubleml/did/tests/test_did_multi_vs_cs_binary.py
@@ -35,7 +35,7 @@ def in_sample_normalization(request):
 
 
 @pytest.fixture(scope="module", params=[0.1])
-def trimming_threshold(request):
+def clipping_threshold(request):
     return request.param
 
 
@@ -50,7 +50,7 @@ def lambda_t(request):
 
 
 @pytest.fixture(scope="module")
-def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_sample_normalization, trimming_threshold):
+def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_sample_normalization, clipping_threshold):
     n_obs = 500
     dpg = 1
     boot_methods = ["normal"]
@@ -66,7 +66,7 @@ def dml_did_binary_vs_did_multi_fixture(time_type, lambda_t, learner, score, in_
         "n_folds": 3,
         "score": score,
         "in_sample_normalization": in_sample_normalization,
-        "trimming_threshold": trimming_threshold,
+        "ps_processor_config": dml.utils.PSProcessorConfig(clipping_threshold=clipping_threshold),
         "draw_sample_splitting": True,
     }
     gt_combination = [(dml_panel_data.g_values[0], dml_panel_data.t_values[0], dml_panel_data.t_values[1])]

From ca07266ee83f6e5795bae99c47d88b893e9717a5 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 14:07:46 +0100
Subject: [PATCH 31/38] update defaults tests for PSProcessor and
 clipping_threshold

---
 doubleml/tests/test_model_defaults.py | 44 +++++++++++++--------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/doubleml/tests/test_model_defaults.py b/doubleml/tests/test_model_defaults.py
index b04117eb..14650f39 100644
--- a/doubleml/tests/test_model_defaults.py
+++ b/doubleml/tests/test_model_defaults.py
@@ -92,8 +92,8 @@ def test_irm_defaults():
     _fit_bootstrap(dml_irm)
     _assert_resampling_default_settings(dml_irm)
     assert dml_irm.score == "ATE"
-    assert dml_irm.trimming_rule == "truncate"
-    assert dml_irm.trimming_threshold == 1e-2
+    assert isinstance(dml_irm.ps_processor_config, dml.utils.PSProcessorConfig)
+    assert isinstance(dml_irm.ps_processor, dml.utils.PSProcessor)
     assert not dml_irm.normalize_ipw
     assert set(dml_irm.weights.keys()) == set(["weights"])
     assert np.array_equal(dml_irm.weights["weights"], np.ones((dml_irm._dml_data.n_obs,)))
@@ -106,8 +106,8 @@ def test_iivm_defaults():
     _assert_resampling_default_settings(dml_iivm)
     assert dml_iivm.score == "LATE"
     assert dml_iivm.subgroups == {"always_takers": True, "never_takers": True}
-    assert dml_iivm.trimming_rule == "truncate"
-    assert dml_iivm.trimming_threshold == 1e-2
+    assert isinstance(dml_iivm.ps_processor_config, dml.utils.PSProcessorConfig)
+    assert isinstance(dml_iivm.ps_processor, dml.utils.PSProcessor)
     assert not dml_iivm.normalize_ipw
 
 
@@ -119,8 +119,8 @@ def test_cvar_defaults():
     assert dml_cvar.quantile == 0.5
     assert dml_cvar.treatment == 1
     assert dml_cvar.score == "CVaR"
-    assert dml_cvar.trimming_rule == "truncate"
-    assert dml_cvar.trimming_threshold == 1e-2
+    assert isinstance(dml_cvar.ps_processor_config, dml.utils.PSProcessorConfig)
+    assert isinstance(dml_cvar.ps_processor, dml.utils.PSProcessor)
 
 
 @pytest.mark.ci
@@ -131,8 +131,8 @@ def test_pq_defaults():
     assert dml_pq.quantile == 0.5
     assert dml_pq.treatment == 1
     assert dml_pq.score == "PQ"
-    assert dml_pq.trimming_rule == "truncate"
-    assert dml_pq.trimming_threshold == 1e-2
+    assert isinstance(dml_pq.ps_processor_config, dml.utils.PSProcessorConfig)
+    assert isinstance(dml_pq.ps_processor, dml.utils.PSProcessor)
     assert dml_pq.normalize_ipw
 
 
@@ -144,8 +144,8 @@ def test_lpq_defaults():
     assert dml_lpq.quantile == 0.5
     assert dml_lpq.treatment == 1
     assert dml_lpq.score == "LPQ"
-    assert dml_lpq.trimming_rule == "truncate"
-    assert dml_lpq.trimming_threshold == 1e-2
+    assert isinstance(dml_lpq.ps_processor_config, dml.utils.PSProcessorConfig)
+    assert isinstance(dml_lpq.ps_processor, dml.utils.PSProcessor)
     assert dml_lpq.normalize_ipw
 
 
@@ -159,8 +159,8 @@ def test_qte_defaults():
     # not fix since its a differen object added in future versions _assert_resampling_default_settings(dml_qte)
     assert dml_qte.quantiles == 0.5
     assert dml_qte.score == "PQ"
-    assert dml_qte.trimming_rule == "truncate"
-    assert dml_qte.trimming_threshold == 1e-2
+    assert isinstance(dml_qte.ps_processor_config, dml.utils.PSProcessorConfig)
+    assert isinstance(dml_qte.ps_processor, dml.utils.PSProcessor)
     assert dml_qte.normalize_ipw
 
 
@@ -171,8 +171,7 @@ def test_did_defaults():
     _assert_resampling_default_settings(dml_did)
     assert dml_did.score == "observational"
     assert dml_did.in_sample_normalization
-    assert dml_did.trimming_rule == "truncate"
-    assert dml_did.trimming_threshold == 1e-2
+    assert dml_did.clipping_threshold == 1e-2
 
 
 @pytest.mark.ci
@@ -182,8 +181,7 @@ def test_did_cs_defaults():
     _assert_resampling_default_settings(dml_did_cs)
     assert dml_did.score == "observational"
     assert dml_did_cs.in_sample_normalization
-    assert dml_did_cs.trimming_rule == "truncate"
-    assert dml_did_cs.trimming_threshold == 1e-2
+    assert dml_did_cs.clipping_threshold == 1e-2
 
 
 @pytest.mark.ci
@@ -192,8 +190,8 @@ def test_ssm_defaults():
     _fit_bootstrap(dml_ssm)
     _assert_resampling_default_settings(dml_ssm)
     assert dml_ssm.score == "missing-at-random"
-    assert dml_ssm.trimming_rule == "truncate"
-    assert dml_ssm.trimming_threshold == 1e-2
+    assert isinstance(dml_ssm.ps_processor_config, dml.utils.PSProcessorConfig)
+    assert isinstance(dml_ssm.ps_processor, dml.utils.PSProcessor)
     assert not dml_ssm.normalize_ipw
 
 
@@ -203,8 +201,8 @@ def test_apo_defaults():
     _fit_bootstrap(dml_apo)
     _assert_resampling_default_settings(dml_apo)
     assert dml_apo.score == "APO"
-    assert dml_apo.trimming_rule == "truncate"
-    assert dml_apo.trimming_threshold == 1e-2
+    assert isinstance(dml_apo.ps_processor_config, dml.utils.PSProcessorConfig)
+    assert isinstance(dml_apo.ps_processor, dml.utils.PSProcessor)
     assert not dml_apo.normalize_ipw
     assert set(dml_apo.weights.keys()) == set(["weights"])
     assert np.array_equal(dml_apo.weights["weights"], np.ones((dml_apo._dml_data.n_obs,)))
@@ -216,10 +214,10 @@ def test_apos_defaults():
     assert dml_apos.boot_method is None
     assert dml_apos.framework is None
     assert dml_apos.boot_t_stat is None
-    _fit_bootstrap(dml_qte)
+    _fit_bootstrap(dml_apos)
     assert dml_apos.score == "APO"
-    assert dml_apos.trimming_rule == "truncate"
-    assert dml_apos.trimming_threshold == 1e-2
+    assert isinstance(dml_apos.ps_processor_config, dml.utils.PSProcessorConfig)
+    assert isinstance(dml_apos.ps_processor, dml.utils.PSProcessor)
     assert not dml_apos.normalize_ipw
     assert np.array_equal(dml_apos.weights, np.ones((dml_apos._dml_data.n_obs,)))
 

From 5d13b0a6e0e441f7865aa4a82be6c0baa0a42692 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 14:21:36 +0100
Subject: [PATCH 32/38] add deprecation warninigs for DoubleMLDID and
 DoubleMLDIDCS and DoubleMLDIDData

---
 doubleml/data/__init__.py                     |  1 +
 doubleml/data/did_data.py                     | 10 ++++++-
 doubleml/did/did.py                           |  6 ++++
 doubleml/did/did_cs.py                        |  6 ++++
 .../tests/test_did_deprecation_warnings.py    | 29 +++++++++++++++++++
 5 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 doubleml/did/tests/test_did_deprecation_warnings.py

diff --git a/doubleml/data/__init__.py b/doubleml/data/__init__.py
index 8343c228..73fd71d3 100644
--- a/doubleml/data/__init__.py
+++ b/doubleml/data/__init__.py
@@ -11,6 +11,7 @@
 from .ssm_data import DoubleMLSSMData
 
 
+# TODO: Remove DoubleMLClusterData with version 0.12.0
 class DoubleMLClusterData(DoubleMLData):
     """
     Backwards compatibility wrapper for DoubleMLData with cluster_cols.
diff --git a/doubleml/data/did_data.py b/doubleml/data/did_data.py
index 57d486a3..30cc2900 100644
--- a/doubleml/data/did_data.py
+++ b/doubleml/data/did_data.py
@@ -1,4 +1,5 @@
 import io
+import warnings
 
 import pandas as pd
 from sklearn.utils import assert_all_finite
@@ -7,6 +8,7 @@
 from doubleml.data.base_data import DoubleMLData
 
 
+# TODO: Remove DoubleMLDIDData with version 0.12.0
 class DoubleMLDIDData(DoubleMLData):
     """Double machine learning data-backend for Difference-in-Differences models.
 
@@ -81,7 +83,13 @@ def __init__(
         use_other_treat_as_covariate=True,
         force_all_x_finite=True,
         force_all_d_finite=True,
-    ):  # Initialize _t_col to None first to avoid AttributeError during parent init
+    ):
+        warnings.warn(
+            "DoubleMLDIDData is deprecated and will be removed with version 0.12.0." "Use DoubleMLPanelData instead.",
+            FutureWarning,
+            stacklevel=2,
+        )
+        # Initialize _t_col to None first to avoid AttributeError during parent init
         self._t_col = None
 
         # Store whether x_cols was originally None to reset it later
diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index 5ca19a93..87eb4aaa 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -11,6 +11,7 @@
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
 
 
+# TODO: Remove DoubleMLDIDData with version 0.12.0
 class DoubleMLDID(LinearScoreMixin, DoubleML):
     """Double machine learning for difference-in-differences models with panel data (two time periods).
 
@@ -87,6 +88,11 @@ def __init__(
         clipping_threshold=1e-2,
         draw_sample_splitting=True,
     ):
+        warnings.warn(
+            "DoubleMLDID is deprecated and will be removed with version 0.12.0. " "Please use DoubleMLDIDBinary instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._check_data(self._dml_data)
diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index 706cfd8e..da833fd5 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -11,6 +11,7 @@
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d
 
 
+# TODO: Remove DoubleMLDIDData with version 0.12.0
 class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
     """Double machine learning for difference-in-difference with repeated cross-sections.
 
@@ -85,6 +86,11 @@ def __init__(
         clipping_threshold=1e-2,
         draw_sample_splitting=True,
     ):
+        warnings.warn(
+            "DoubleMLDIDCS is deprecated and will be removed with version 0.12.0. " "Please use DoubleMLDIDCSBinary instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(obj_dml_data, n_folds, n_rep, score, draw_sample_splitting)
 
         self._check_data(self._dml_data)
diff --git a/doubleml/did/tests/test_did_deprecation_warnings.py b/doubleml/did/tests/test_did_deprecation_warnings.py
new file mode 100644
index 00000000..75158fdb
--- /dev/null
+++ b/doubleml/did/tests/test_did_deprecation_warnings.py
@@ -0,0 +1,29 @@
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+from doubleml.data.did_data import DoubleMLDIDData
+from doubleml.did.did import DoubleMLDID
+from doubleml.did.did_cs import DoubleMLDIDCS
+
+
+@pytest.mark.ci
+def test_deprecation_DoubleMLDIDData(generate_data_did):
+    (x, y, d, _) = generate_data_did
+    with pytest.warns(FutureWarning, match="DoubleMLDIDData is deprecated"):
+        _ = DoubleMLDIDData.from_arrays(x, y, d)
+
+
+@pytest.mark.ci
+def test_deprecation_DoubleMLDID(generate_data_did):
+    (x, y, d, _) = generate_data_did
+    obj_dml_data = DoubleMLDIDData.from_arrays(x, y, d)
+    with pytest.warns(DeprecationWarning, match="DoubleMLDID is deprecated"):
+        _ = DoubleMLDID(obj_dml_data, ml_g=LinearRegression(), ml_m=LogisticRegression())
+
+
+@pytest.mark.ci
+def test_deprecation_DoubleMLDIDCS(generate_data_did_cs):
+    (x, y, d, t) = generate_data_did_cs
+    obj_dml_data = DoubleMLDIDData.from_arrays(x, y, d, t=t)
+    with pytest.warns(DeprecationWarning, match="DoubleMLDIDCS is deprecated"):
+        _ = DoubleMLDIDCS(obj_dml_data, ml_g=LinearRegression(), ml_m=LogisticRegression())

From c69af49811258896d3afed16a36fe2a68e945600 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 16:46:39 +0100
Subject: [PATCH 33/38] update type hint for cv parameter in PSProcessor to use
 Union[int, list]

---
 doubleml/utils/propensity_score_processing.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index e560bd41..c5a50c88 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -1,6 +1,6 @@
 import warnings
 from dataclasses import dataclass
-from typing import Optional
+from typing import Optional, Union
 
 import numpy as np
 from sklearn.isotonic import IsotonicRegression
@@ -131,7 +131,7 @@ def adjust_ps(
         self,
         propensity_scores: np.ndarray,
         treatment: np.ndarray,
-        cv: Optional[int | list] = None,
+        cv: Optional[Union[int, list]] = None,
         learner_name: Optional[str] = None,
     ) -> np.ndarray:
         """

From f7dcc983ad71d42b5508d7f6250dfd129955e74b Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Mon, 27 Oct 2025 17:14:31 +0100
Subject: [PATCH 34/38] update type hint for cv parameter in PSProcessor to use
 Union[int, list]

---
 doubleml/utils/propensity_score_processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index c5a50c88..c04c8f18 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -171,7 +171,7 @@ def _apply_calibration(
         self,
         propensity_scores: np.ndarray,
         treatment: np.ndarray,
-        cv: Optional[int | list] = None,
+        cv: Optional[Union[int, list]] = None,
     ) -> np.ndarray:
         """Apply calibration method to propensity scores if specified."""
         if self.calibration_method is None:

From 915ec4b24649f70ad1f0223b0ef7bfc45185dd4a Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 4 Nov 2025 11:01:52 +0100
Subject: [PATCH 35/38] correct docstring

---
 doubleml/utils/propensity_score_processing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index c04c8f18..37b42634 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -135,7 +135,7 @@ def adjust_ps(
         learner_name: Optional[str] = None,
     ) -> np.ndarray:
         """
-        Adjust propensity scores via validation, clipping, and warnings.
+        Adjust propensity scores via calibration and clipping.
 
         Parameters
         ----------

From 8a79909c3c3e090ed6a26c9e1566797431423257 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Nov 2025 01:13:06 +0100
Subject: [PATCH 36/38] add docstring for data class

---
 doubleml/utils/propensity_score_processing.py | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index 37b42634..fdc6f6f4 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -10,6 +10,45 @@
 
 @dataclass
 class PSProcessorConfig:
+    """
+    Configuration for propensity score processing.
+
+    This dataclass holds the configuration parameters used by PSProcessor
+    for propensity score calibration, clipping, and validation.
+
+    Parameters
+    ----------
+    clipping_threshold : float, default=1e-2
+        Minimum and maximum bound for propensity scores after clipping.
+        Must be between 0 and 0.5.
+
+    extreme_threshold : float, default=1e-12
+        Threshold below which propensity scores are considered extreme.
+        Propensity scores are clipped based on this value when scores are too close to 0 or 1
+        to avoid numerical instability.
+        Must be between 0 and 0.5.
+
+    calibration_method : {'isotonic', None}, optional
+        If provided, applies the specified calibration method to
+        the propensity scores before clipping. Currently supports:
+        - 'isotonic': Isotonic regression calibration
+        - None: No calibration applied
+
+    cv_calibration : bool, default=False
+        Whether to use cross-validation for calibration.
+        Only applies if a calibration method is specified.
+        Requires calibration_method to be set.
+
+    Examples
+    --------
+    >>> from doubleml.utils import PSProcessorConfig, PSProcessor
+    >>> config = PSProcessorConfig(
+    ...     clipping_threshold=0.05,
+    ...     calibration_method='isotonic',
+    ...     cv_calibration=True
+    ... )
+    >>> processor = PSProcessor.from_config(config)
+    """
     clipping_threshold: float = 1e-2
     extreme_threshold: float = 1e-12
     calibration_method: Optional[str] = None

From ec55d304e870076a01539878c97b48aa90688cca Mon Sep 17 00:00:00 2001
From: SvenKlaassen <sven.klaassen@uni-hamburg.de>
Date: Wed, 5 Nov 2025 01:14:02 +0100
Subject: [PATCH 37/38] add docstring to from_config method in PSProcessor
 class

---
 doubleml/utils/propensity_score_processing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index fdc6f6f4..f57f17cd 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -132,6 +132,7 @@ def __init__(
 
     @classmethod
     def from_config(cls, config: PSProcessorConfig):
+        """Create PSProcessor from PSProcessorConfig."""
         return cls(
             clipping_threshold=config.clipping_threshold,
             extreme_threshold=config.extreme_threshold,

From c09660423babe7613c37062591a6c9352ad7f663 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 5 Nov 2025 01:56:55 +0100
Subject: [PATCH 38/38] add missing newline after docstring in
 PSProcessorConfig class

---
 doubleml/utils/propensity_score_processing.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doubleml/utils/propensity_score_processing.py b/doubleml/utils/propensity_score_processing.py
index f57f17cd..2c5428c9 100644
--- a/doubleml/utils/propensity_score_processing.py
+++ b/doubleml/utils/propensity_score_processing.py
@@ -49,6 +49,7 @@ class PSProcessorConfig:
     ... )
     >>> processor = PSProcessor.from_config(config)
     """
+
     clipping_threshold: float = 1e-2
     extreme_threshold: float = 1e-12
     calibration_method: Optional[str] = None