Merge pull request #253 from DoubleML/jh-logistic

SvenKlaassen · web-flow · commit c1495d48c161 · 2025-11-20T09:48:53.000+01:00
Added documentation for LPLR model.
diff --git a/doc/api/datasets.rst b/doc/api/datasets.rst
@@ -31,6 +31,7 @@ Dataset Generators
 
    plm.datasets.make_plr_CCDDHNR2018
    plm.datasets.make_plr_turrell2018
+   plm.datasets.make_lplr_LZZ2020
    plm.datasets.make_pliv_CHS2015
    plm.datasets.make_pliv_multiway_cluster_CKMS2021
    plm.datasets.make_confounded_plr_data
diff --git a/doc/api/dml_models.rst b/doc/api/dml_models.rst
@@ -16,6 +16,7 @@ doubleml.plm
     :template: class.rst
 
     DoubleMLPLR
+    DoubleMLLPLR
     DoubleMLPLIV
 
 
diff --git a/doc/conf.py b/doc/conf.py
@@ -274,6 +274,8 @@
     "https://doi.org/10.1097%2FEDE.0b013e3181f74493",
     # Valid DOI; Causes 403 Client Error: Forbidden for url:...
     "https://doi.org/10.3982/ECTA15732",
+    # Valid DOI; Causes 403 Client Error: Forbidden for url:...
+    "https://doi.org/10.1093/ectj/utab019"
 ]
 
 # To execute R code via jupyter-execute one needs to install the R kernel for jupyter
diff --git a/doc/examples/index.rst b/doc/examples/index.rst
@@ -22,10 +22,11 @@ General Examples
     py_double_ml_sensitivity.ipynb
     py_double_ml_apo.ipynb
     py_double_ml_irm_vs_apo.ipynb
+    py_double_ml_lplr.ipynb
+    py_double_ml_ssm.ipynb
     py_double_ml_learner.ipynb
     py_double_ml_firststage.ipynb
     py_double_ml_multiway_cluster.ipynb
-    py_double_ml_ssm.ipynb
     py_double_ml_sensitivity_booking.ipynb
     learners/py_tabpfn.ipynb
     py_double_ml_basic_iv.ipynb
diff --git a/doc/examples/py_double_ml_lplr.ipynb b/doc/examples/py_double_ml_lplr.ipynb
diff --git a/doc/guide/models/plm/lplr.rst b/doc/guide/models/plm/lplr.rst
@@ -0,0 +1,13 @@
+**Logistic partially linear regression (LPLR)** models take the form
+
+.. math::
+
+    \mathbb{E} [Y | D, X] = \mathbb{P} (Y=1 | D, X) = \text{expit} \{\beta_0 D + r_0 (X) \}
+
+where :math:`Y` is the binary outcome variable and :math:`D` is the policy variable of interest.
+The high-dimensional vector :math:`X = (X_1, \ldots, X_p)` consists of confounding covariates and
+:math:`\text{expit}` is the logistic link function
+
+.. math::
+    \text{expit} ( X ) = \frac{1}{1 + e^{-x}}
+
diff --git a/doc/guide/models/plm/plm_models.inc b/doc/guide/models/plm/plm_models.inc
@@ -64,6 +64,40 @@ Partially linear regression model (PLR)
             dml_plr_obj$fit()
             print(dml_plr_obj)
 
+.. _lplr-model:
+
+Logistic partially linear regression model (LPLR)
+*************************************************
+
+.. include:: /guide/models/plm/lplr.rst
+
+.. include:: /shared/causal_graphs/plr_irm_causal_graph.rst
+
+``DoubleMLLPLR`` implements LPLR models. Estimation is conducted via its ``fit()`` method.
+
+.. note::
+    Remark that the treatment effects are not additive in this model. The partial linear term enters the model through a logistic link function.
+
+.. tab-set::
+
+    .. tab-item:: Python
+        :sync: py
+
+        .. ipython:: python
+
+            import numpy as np
+            import doubleml as dml
+            from doubleml.plm.datasets import make_lplr_LZZ2020
+            from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+            from sklearn.base import clone
+            np.random.seed(3141)
+            ml_t = RandomForestRegressor(n_estimators=100, max_features=15, max_depth=15, min_samples_leaf=5)
+            ml_m = RandomForestRegressor(n_estimators=100, max_features=15, max_depth=15, min_samples_leaf=5)
+            ml_M = RandomForestClassifier(n_estimators=100, max_features=15, max_depth=15, min_samples_leaf=5)
+            obj_dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=1000, dim_x=15)
+            dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
+            dml_lplr_obj.fit().summary
+
 
 .. _pliv-model:
 
@@ -91,12 +125,12 @@ Estimation is conducted via its ``fit()`` method:
             from sklearn.ensemble import RandomForestRegressor
             from sklearn.base import clone
 
-            learner = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
+            learner = RandomForestRegressor(n_estimators=100, max_features=5, max_depth=5, min_samples_leaf=5)
             ml_l = clone(learner)
             ml_m = clone(learner)
             ml_r = clone(learner)
             np.random.seed(2222)
-            data = make_pliv_CHS2015(alpha=0.5, n_obs=500, dim_x=20, dim_z=1, return_type='DataFrame')
+            data = make_pliv_CHS2015(alpha=0.5, n_obs=500, dim_x=5, dim_z=1, return_type='DataFrame')
             obj_dml_data = dml.DoubleMLData(data, 'y', 'd', z_cols='Z1')
             dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_l, ml_m, ml_r)
             print(dml_pliv_obj.fit())
@@ -120,4 +154,4 @@ Estimation is conducted via its ``fit()`` method:
             obj_dml_data = DoubleMLData$new(data, y_col="y", d_col = "d", z_cols= "Z1")
             dml_pliv_obj = DoubleMLPLIV$new(obj_dml_data, ml_l, ml_m, ml_r)
             dml_pliv_obj$fit()
-            print(dml_pliv_obj)
+            print(dml_pliv_obj)
diff --git a/doc/guide/scores/plm/lplr_score.rst b/doc/guide/scores/plm/lplr_score.rst
@@ -0,0 +1,47 @@
+For the LPLR model implemented in ``DoubleMLLPLR`` one can choose between
+``score='nuisance_space'`` and ``score='instrument'``.
+
+``score='nuisance_space'`` implements the score function:
+
+.. math::
+
+    \psi(W, \beta, \eta) := \psi(X) \{Y e^{\beta D} -(1-Y)e^{r_0(X)} \} \{ D - m_0(X)\}
+
+with nuisance elements :math:`\eta = { r(\cdot), m(\cdot), \psi(\cdot) }`, where
+
+.. math::
+
+    r_0(X) = t_0(X) - \breve \beta a_0(X),
+
+    m_0(X) = \mathbb{E} [D | X, Y=0],
+
+    \psi(X) = \text{expit} (-r_0(X)).
+
+For the estimation of :math:`r_0(X)`, we further need to obtain a preliminary estimate :math:`\breve \beta` and
+:math:`M (D, X) = \mathbb{P} [Y=1 | D, X]` as described in `Liu et al. (2021) <https://doi.org/10.1093/ectj/utab019>`_
+and the following estimates:
+
+.. math::
+
+    t_0(X) = \mathbb{E} [\text{logit}(M (D, X)) | X],
+
+    a_0(X) = \mathbb{E} [D | X].
+
+
+
+``score='instrument'`` implements the score function:
+
+.. math::
+
+    \psi(W; \beta, \eta) :=  \mathbb E [ \{Y - \text{expit} (\beta_0 D + r_0(X )) \} Z_0 ]
+
+
+with :math:`Z_0=D-m(X)` and :math:`\eta = { r(\cdot), m(\cdot), \psi(\cdot) }`, where
+
+.. math::
+
+    r_0(X) = t_0(X) - \breve \beta a_0(X),
+
+    m_0(X) = \mathbb{E} [D | X].
+
+and :math:`r_0(X)` is computed as for ``score='nuisance_space'``.
diff --git a/doc/guide/scores/plm/plm_scores.inc b/doc/guide/scores/plm/plm_scores.inc
@@ -7,6 +7,12 @@ Partially linear regression model (PLR)
 
 .. include:: /guide/scores/plm/plr_score.rst
 
+.. _lplr-score:
+
+Logistic partial linear regression (LPLR)
+===========================================
+
+.. include:: /guide/scores/plm/lplr_score.rst
 
 .. _pliv-score:
 
diff --git a/doc/literature/literature.rst b/doc/literature/literature.rst
@@ -136,6 +136,12 @@ Double Machine Learning Literature
         :octicon:`link` :bdg-link-dark:`URL <https://jmlr.org/papers/volume21/19-827/19-827.pdf>` 
         |hr|
 
+      - Molei Liu, Yi Zhang, Doudou Zhou |br|
+        **Double/Debiased Machine Learning for Logistic Partially Linear Model** |br|
+        *The Econometrics Journal, 24(3), Pages 559–588, 2021* |br|
+        :octicon:`link` :bdg-link-dark:`URL <https://doi.org/10.1093/ectj/utab019>`
+        |hr|
+
       - Yusuke Narita, Shota Yasui, Kohei Yata |br|
         **Debiased Off-Policy Evaluation for Recommendation Systems** |br|
         *RecSys '21: Fifteenth ACM Conference on Recommender Systems, 372–379, 2021* |br|

Original file line number	Diff line number	Diff line change
`@@ -274,6 +274,8 @@`
`274`	`274`	`"https://doi.org/10.1097%2FEDE.0b013e3181f74493",`
`275`	`275`	`# Valid DOI; Causes 403 Client Error: Forbidden for url:...`
`276`	`276`	`"https://doi.org/10.3982/ECTA15732",`
	`277`	`+ # Valid DOI; Causes 403 Client Error: Forbidden for url:...`
	`278`	`+ "https://doi.org/10.1093/ectj/utab019"`
`277`	`279`	`]`
`278`	`280`
`279`	`281`	`# To execute R code via jupyter-execute one needs to install the R kernel for jupyter`