DoubleML · JulianDiefenbacher · Jun 23, 2025 · Jun 26, 2025 · Jun 26, 2025 · Jul 10, 2025
diff --git a/doubleml/__init__.py b/doubleml/__init__.py
@@ -15,6 +15,7 @@
 from .irm.ssm import DoubleMLSSM
 from .plm.lplr import DoubleMLLPLR
 from .plm.pliv import DoubleMLPLIV
+from .plm.plpr import DoubleMLPLPR
 from .plm.plr import DoubleMLPLR
 from .utils.blp import DoubleMLBLP
 from .utils.policytree import DoubleMLPolicyTree
@@ -44,6 +45,7 @@
     "DoubleMLPolicyTree",
     "DoubleMLSSM",
     "DoubleMLLPLR",
+    "DoubleMLPLPR",
 ]
 
 __version__ = importlib.metadata.version("doubleml")
diff --git a/doubleml/data/panel_data.py b/doubleml/data/panel_data.py
@@ -41,6 +41,13 @@ class DoubleMLPanelData(DoubleMLData):
         The instrumental variable(s).
         Default is ``None``.
 
+    static_panel : bool
+        Indicates whether the data model corresponds to a static
+        panel data approach (``True``) or to staggered adoption panel data
+        (``False``). In the latter case, the treatment groups/values are defined in terms of the first time of
+        treatment exposure.
+        Default is ``False``.
+
     use_other_treat_as_covariate : bool
         Indicates whether in the multiple-treatment case the other treatment variables should be added as covariates.
         Default is ``True``.
@@ -82,31 +89,40 @@ def __init__(
         id_col,
         x_cols=None,
         z_cols=None,
+        static_panel=False,
         use_other_treat_as_covariate=True,
         force_all_x_finite=True,
         datetime_unit="M",
     ):
         DoubleMLBaseData.__init__(self, data)
 
+        self._static_panel = static_panel
+
         # we need to set id_col (needs _data) before call to the super __init__ because of the x_cols setter
         self.id_col = id_col
-        self._datetime_unit = _is_valid_datetime_unit(datetime_unit)
         self._set_id_var()
-
         # Set time column before calling parent constructor
         self.t_col = t_col
+        self._datetime_unit = _is_valid_datetime_unit(datetime_unit)
+
+        if not self.static_panel:
+            cluster_cols = None
+            force_all_d_finite = False
+        else:
+            cluster_cols = id_col
+            force_all_d_finite = True
 
-        # Call parent constructor
         DoubleMLData.__init__(
             self,
             data=data,
             y_col=y_col,
             d_cols=d_cols,
             x_cols=x_cols,
             z_cols=z_cols,
+            cluster_cols=cluster_cols,
             use_other_treat_as_covariate=use_other_treat_as_covariate,
             force_all_x_finite=force_all_x_finite,
-            force_all_d_finite=False,
+            force_all_d_finite=force_all_d_finite,
         )
 
         # reset index to ensure a simple RangeIndex
@@ -115,15 +131,15 @@ def __init__(
         # Set time variable array after data is loaded
         self._set_time_var()
 
-        if self.n_treat != 1:
-            raise ValueError("Only one treatment column is allowed for panel data.")
-
         self._check_disjoint_sets_id_col()
 
         # intialize the unique values of g and t
         self._g_values = np.sort(np.unique(self.d))  # unique values of g
         self._t_values = np.sort(np.unique(self.t))  # unique values of t
 
+        if self.n_treat != 1:
+            raise ValueError("Only one treatment column is allowed for panel data.")
+
     def __str__(self):
         data_summary = self._data_summary_str()
         buf = io.StringIO()
@@ -146,6 +162,7 @@ def _data_summary_str(self):
             f"Instrument variable(s): {self.z_cols}\n"
             f"Time variable: {self.t_col}\n"
             f"Id variable: {self.id_col}\n"
+            f"Static panel data: {self.static_panel}\n"
         )
 
         data_summary += f"No. Unique Ids: {self.n_ids}\n" f"No. Observations: {self.n_obs}\n"
@@ -296,6 +313,11 @@ def n_t_periods(self):
         """
         return len(self.t_values)
 
+    @property
+    def static_panel(self):
+        """Indicates whether the data model corresponds to a static panel data approach."""
+        return self._static_panel
+
     def _get_optional_col_sets(self):
         base_optional_col_sets = super()._get_optional_col_sets()
         id_col_set = {self.id_col}

diff --git a/doubleml/data/tests/test_panel_data.py b/doubleml/data/tests/test_panel_data.py
@@ -157,14 +157,26 @@ def test_panel_data_str():
     assert "Time variable: t" in dml_str
     assert "Id variable: id" in dml_str
     assert "No. Observations:" in dml_str
+    assert "Static panel data:" in dml_str
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def static_panel(request):
+    return request.param
 
 
 @pytest.mark.ci
-def test_panel_data_properties():
+def test_panel_data_properties(static_panel):
     np.random.seed(3141)
     df = make_did_SZ2020(n_obs=100, return_type="DoubleMLPanelData")._data
     dml_data = DoubleMLPanelData(
-        data=df, y_col="y", d_cols="d", t_col="t", id_col="id", x_cols=[f"Z{i + 1}" for i in np.arange(4)]
+        data=df,
+        y_col="y",
+        d_cols="d",
+        t_col="t",
+        id_col="id",
+        x_cols=[f"Z{i + 1}" for i in np.arange(4)],
+        static_panel=static_panel,
     )
 
     assert np.array_equal(dml_data.id_var, df["id"].values)
@@ -176,3 +188,10 @@ def test_panel_data_properties():
     assert dml_data.n_groups == len(np.unique(df["d"].values))
     assert np.array_equal(dml_data.t_values, np.sort(np.unique(df["t"].values)))
     assert dml_data.n_t_periods == len(np.unique(df["t"].values))
+
+    if static_panel:
+        assert dml_data.static_panel is True
+        assert dml_data.cluster_cols == ["id"]
+    else:
+        assert dml_data.static_panel is False
+        assert dml_data.cluster_cols is None
diff --git a/doubleml/plm/__init__.py b/doubleml/plm/__init__.py
@@ -4,6 +4,7 @@
 
 from .lplr import DoubleMLLPLR
 from .pliv import DoubleMLPLIV
+from .plpr import DoubleMLPLPR
 from .plr import DoubleMLPLR
 
-__all__ = ["DoubleMLPLR", "DoubleMLPLIV", "DoubleMLLPLR"]
+__all__ = ["DoubleMLPLR", "DoubleMLPLIV", "DoubleMLLPLR", "DoubleMLPLPR"]
diff --git a/doubleml/plm/datasets/__init__.py b/doubleml/plm/datasets/__init__.py
@@ -7,6 +7,7 @@
 from .dgp_lplr_LZZ2020 import make_lplr_LZZ2020
 from .dgp_pliv_CHS2015 import make_pliv_CHS2015
 from .dgp_pliv_multiway_cluster_CKMS2021 import make_pliv_multiway_cluster_CKMS2021
+from .dgp_plpr_CP2025 import make_plpr_CP2025
 from .dgp_plr_CCDDHNR2018 import make_plr_CCDDHNR2018
 from .dgp_plr_turrell2018 import make_plr_turrell2018
 
@@ -18,4 +19,5 @@
     "make_pliv_multiway_cluster_CKMS2021",
     "make_lplr_LZZ2020",
     "_make_pliv_data",
+    "make_plpr_CP2025",
 ]
diff --git a/doubleml/plm/datasets/dgp_plpr_CP2025.py b/doubleml/plm/datasets/dgp_plpr_CP2025.py
@@ -0,0 +1,134 @@
+import numpy as np
+import pandas as pd
+
+
+def make_plpr_CP2025(num_id=250, num_t=10, dim_x=30, theta=0.5, dgp_type="dgp1"):
+    """
+    Generates synthetic data for a partially linear panel regression model, based on Clarke and Polselli (2025).
+    The data generating process is defined as
+
+        .. math::
+
+        Y_{it} &= D_{it} \\theta + l_0(X_{it}) + \\alpha_i + U_{it}, & &U_{it} \\sim \\mathcal{N}(0,1),
+
+        D_{it} &= m_0(X_{it}) + c_i + V_{it}, & &V_{it} \\sim \\mathcal{N}(0,1),
+
+        \\alpha_i &= 0.25 \\left(\\frac{1}{T} \\sum_{t=1}^{T} D_{it} - \\bar{D} \\right)
+        + 0.25 \\frac{1}{T} \\sum_{t=1}^{T} \\sum_{k \\in \\mathcal{K}} X_{it,k} + a_i
+
+
+    with :math:`a_i \\sim \\mathcal{N}(0,0.95)`, :math:`X_{it,p} \\sim \\mathcal{N}(0,5)`, :math:`c_i \\sim \\mathcal{N}(0,1)`.
+    Where :math:`k \\in \\mathcal{K} = \\{1,3\\}` is the number of relevant (non-zero) confounding variables, and :math:`p` is
+    the number of total confounding variables.
+
+    Clarke and Polselli (2025) consider three functional forms of the confounders to model the nuisance functions :math:`l_0`
+    and :math:`m_0` with varying levels of non-linearity and non-smoothness:
+
+    Design 1. (dgp1): Linear in the nuisance parameters
+
+        .. math::
+
+        l_0(X_{it}) &= a X_{it,1} + X_{it,3}
+
+        m_0(X_{it}) &= a X_{it,1} + X_{it,3}
+
+    Design 2. (dgp2): Non-linear and smooth in the nuisance parameters
+
+        .. math::
+
+        l_0(X_{it}) &= \\frac{\\exp(X_{it,1})}{1 + \\exp(X_{it,1})} + a \\cos(X_{it,3})
+
+        m_0(X_{it}) &= \\cos(X_{it,1}) + a \\frac{\\exp(X_{it,3})}{1 + \\exp(X_{it,3})}
+
+    Design 3. (dgp3): Non-linear and discontinuous in the nuisance parameters
+
+        .. math::
+
+        l_0(X_{it}) &= b (X_{it,1} \\cdot X_{it,3}) + a (X_{it,3} \\cdot 1\\{X_{it,3} > 0\\})
+
+        m_0(X_{it}) &= a (X_{it,1} \\cdot 1\\{X_{it,1} > 0\\}) + b (X_{it,1} \\cdot X_{it,3}),
+
+    where :math:`a = 0.25`, :math:`b = 0.5`.
+
+    Parameters
+    ----------
+    num_id :
+        The number of units in the panel.
+    num_t :
+        The number of time periods in the panel.
+    num_x :
+        The number of confounding variables.
+    theta :
+        The value of the causal parameter.
+    dgp_type :
+        The type of DGP design to be used. Default is ``'dgp1'``, other options are ``'dgp2'`` and ``'dgp3'``.
+
+    Returns
+    -------
+    pandas.DataFrame
+        DataFrame containing the simulated static panel data.
+
+    References
+    ----------
+    Clarke, P. S. and Polselli, A. (2025),
+    Double machine learning for static panel models with fixed effects. The Econometrics Journal, utaf011,
+    doi:`10.1093/ectj/utaf011 <https://doi.org/10.1093/ectj/utaf011>`_.
+    """
+
+    # parameters
+    a = 0.25
+    b = 0.5
+    sigma2_a = 0.95
+    sigma2_x = 5
+
+    # id and time vectors
+    id_var = np.repeat(np.arange(1, num_id + 1), num_t)
+    time = np.tile(np.arange(1, num_t + 1), num_id)
+
+    # individual fixed effects
+    a_i = np.repeat(np.random.normal(0, np.sqrt(sigma2_a), num_id), num_t)
+    c_i = np.repeat(np.random.standard_normal(num_id), num_t)
+
+    # covariates and errors
+    x_mean = 0
+    x_it = np.random.normal(loc=x_mean, scale=np.sqrt(sigma2_x), size=(num_id * num_t, dim_x))
+    u_it = np.random.standard_normal(num_id * num_t)
+    v_it = np.random.standard_normal(num_id * num_t)
+
+    # functional forms in nuisance functions
+    if dgp_type == "dgp1":
+        l_0 = a * x_it[:, 0] + x_it[:, 2]
+        m_0 = a * x_it[:, 0] + x_it[:, 2]
+    elif dgp_type == "dgp2":
+        l_0 = np.divide(np.exp(x_it[:, 0]), 1 + np.exp(x_it[:, 0])) + a * np.cos(x_it[:, 2])
+        m_0 = np.cos(x_it[:, 0]) + a * np.divide(np.exp(x_it[:, 2]), 1 + np.exp(x_it[:, 2]))
+    elif dgp_type == "dgp3":
+        l_0 = b * (x_it[:, 0] * x_it[:, 2]) + a * (x_it[:, 2] * np.where(x_it[:, 2] > 0, 1, 0))
+        m_0 = a * (x_it[:, 0] * np.where(x_it[:, 0] > 0, 1, 0)) + b * (x_it[:, 0] * x_it[:, 2])
+    else:
+        raise ValueError("Invalid dgp type.")
+
+    # treatment
+    d_it = m_0 + c_i + v_it
+
+    def alpha_i(x_it, d_it, a_i, num_n, num_t):
+        d_i = np.array_split(d_it, num_n)
+        d_i_term = np.repeat(np.mean(d_i, axis=1), num_t) - np.mean(d_it)
+
+        x_i = np.array_split(np.sum(x_it[:, [0, 2]], axis=1), num_n)
+        x_i_mean = np.mean(x_i, axis=1)
+        x_i_term = np.repeat(x_i_mean, num_t)
+
+        alpha_term = 0.25 * d_i_term + 0.25 * x_i_term + a_i
+        return alpha_term
+
+    # outcome
+    y_it = d_it * theta + l_0 + alpha_i(x_it, d_it, a_i, num_id, num_t) + u_it
+
+    x_cols = [f"x{i + 1}" for i in np.arange(dim_x)]
+
+    data = pd.DataFrame(np.column_stack((id_var, time, y_it, d_it, x_it)), columns=["id", "time", "y", "d"] + x_cols).astype(
+        {"id": "int64", "time": "int64"}
+    )
+
+    return data