Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[![Conda Version](https://img.shields.io/conda/vn/conda-forge/doubleml.svg)](https://anaconda.org/conda-forge/doubleml)
[![codecov](https://codecov.io/gh/DoubleML/doubleml-for-py/branch/main/graph/badge.svg?token=0BjlFPgdGk)](https://codecov.io/gh/DoubleML/doubleml-for-py)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/1c08ec7d782c451784293c996537de14)](https://www.codacy.com/gh/DoubleML/doubleml-for-py/dashboard?utm_source=github.com&utm_medium=referral&utm_content=DoubleML/doubleml-for-py&utm_campaign=Badge_Grade)
[![Python version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)](https://www.python.org/)
[![Python version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)](https://www.python.org/)

The Python package **DoubleML** provides an implementation of the double / debiased machine learning framework of
[Chernozhukov et al. (2018)](https://doi.org/10.1111/ectj.12097).
Expand Down
4 changes: 2 additions & 2 deletions doubleml/data/base_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,8 @@ def from_arrays(
elif not isinstance(force_all_d_finite, bool):
raise TypeError("Invalid force_all_d_finite. " + "force_all_d_finite must be True, False or 'allow-nan'.")

x = check_array(x, ensure_2d=False, allow_nd=False, force_all_finite=force_all_x_finite)
d = check_array(d, ensure_2d=False, allow_nd=False, force_all_finite=force_all_x_finite)
x = check_array(x, ensure_2d=False, allow_nd=False, ensure_all_finite=force_all_x_finite)
d = check_array(d, ensure_2d=False, allow_nd=False, ensure_all_finite=force_all_x_finite)
y = column_or_1d(y, warn=True)

x = _assure_2d_array(x)
Expand Down
8 changes: 4 additions & 4 deletions doubleml/did/did.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ def _check_data(self, obj_dml_data):
return

def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)

# nuisance g
# get train indices for d == 0
Expand Down Expand Up @@ -372,8 +372,8 @@ def _sensitivity_element_est(self, preds):
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
# get train indices for d == 0 and d == 1
smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)

Expand Down
8 changes: 4 additions & 4 deletions doubleml/did/did_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,8 @@ def _preprocess_data(self, g_value, pre_t, eval_t):
def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):

# Here: d is a binary treatment indicator
x, y = check_X_y(self._x_data_subset, self._y_data_subset, force_all_finite=False)
x, d = check_X_y(x, self._g_data_subset, force_all_finite=False)
x, y = check_X_y(self._x_data_subset, self._y_data_subset, ensure_all_finite=False)
x, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False)
# nuisance g
# get train indices for d == 0
smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
Expand Down Expand Up @@ -570,8 +570,8 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, p_hat):
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(self._x_data_subset, self._y_data_subset, force_all_finite=False)
x, d = check_X_y(x, self._g_data_subset, force_all_finite=False)
x, y = check_X_y(self._x_data_subset, self._y_data_subset, ensure_all_finite=False)
x, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False)

# get train indices for d == 0 and d == 1
smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
Expand Down
12 changes: 6 additions & 6 deletions doubleml/did/did_cs.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,9 @@ def _check_data(self, obj_dml_data):
return

def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, t = check_X_y(x, self._dml_data.t, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
x, t = check_X_y(x, self._dml_data.t, ensure_all_finite=False)

# THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold
# nuisance estimates of the uncond. treatment prob.
Expand Down Expand Up @@ -547,9 +547,9 @@ def _sensitivity_element_est(self, preds):
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, t = check_X_y(x, self._dml_data.t, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
x, t = check_X_y(x, self._dml_data.t, ensure_all_finite=False)

if scoring_methods is None:
scoring_methods = {"ml_g": None, "ml_m": None}
Expand Down
12 changes: 6 additions & 6 deletions doubleml/did/did_cs_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,9 +435,9 @@ def _estimate_conditional_g(
def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):

# Here: d is a binary treatment indicator
x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, force_all_finite=False)
_, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # (d is the G_indicator)
_, t = check_X_y(x, self._t_data_subset, force_all_finite=False)
x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, ensure_all_finite=False)
_, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False) # (d is the G_indicator)
_, t = check_X_y(x, self._t_data_subset, ensure_all_finite=False)

# THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold
# nuisance estimates of the uncond. treatment prob.
Expand Down Expand Up @@ -621,9 +621,9 @@ def _score_elements(self, y, d, t, g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, force_all_finite=False)
_, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # (d is the G_indicator)
_, t = check_X_y(x, self._t_data_subset, force_all_finite=False)
x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, ensure_all_finite=False)
_, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False) # (d is the G_indicator)
_, t = check_X_y(x, self._t_data_subset, ensure_all_finite=False)

if scoring_methods is None:
scoring_methods = {"ml_g": None, "ml_m": None}
Expand Down
11 changes: 0 additions & 11 deletions doubleml/did/tests/test_did_aggregation_plot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pytest
Expand Down Expand Up @@ -180,13 +178,4 @@ def test_joint_ci_bootstrap_warning(mock_framework):
with pytest.warns(UserWarning, match="Joint confidence intervals require bootstrapping"):
_ = aggregation.plot_effects(joint=True)

# Verify that bootstrap was performed
assert aggregation.aggregated_frameworks.boot_t_stat is not None

# No warning should be raised when plotting again
with warnings.catch_warnings(record=True) as recorded_warnings:
warnings.simplefilter("always") # Ensure all warnings are recorded
_ = aggregation.plot_effects(joint=True)

assert len(recorded_warnings) == 0
plt.close("all")
8 changes: 4 additions & 4 deletions doubleml/irm/apo.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ def _get_weights(self):
return weights, weights_bar

def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
dx = np.column_stack((d, x))
# use the treated indicator to get the correct sample splits
treated = self.treated
Expand Down Expand Up @@ -361,8 +361,8 @@ def _sensitivity_element_est(self, preds):
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
dx = np.column_stack((d, x))
# use the treated indicator to get the correct sample splits
treated = self.treated
Expand Down
8 changes: 4 additions & 4 deletions doubleml/irm/cvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,8 @@ def _initialize_ml_nuisance_params(self):
self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in ["ml_g", "ml_m"]}

def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)

# initialize nuisance predictions, targets and models
g_hat = {
Expand Down Expand Up @@ -330,8 +330,8 @@ def ipw_score(theta):
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)

if scoring_methods is None:
scoring_methods = {"ml_g": None, "ml_m": None}
Expand Down
12 changes: 6 additions & 6 deletions doubleml/irm/iivm.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,9 @@ def _check_data(self, obj_dml_data):
return

def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)

# get train indices for z == 0 and z == 1
smpls_z0, smpls_z1 = _get_cond_smpls(smpls, z)
Expand Down Expand Up @@ -448,9 +448,9 @@ def _score_elements(self, y, z, d, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls)
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)

# get train indices for z == 0 and z == 1
smpls_z0, smpls_z1 = _get_cond_smpls(smpls, z)
Expand Down
8 changes: 4 additions & 4 deletions doubleml/irm/irm.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,8 @@ def _check_data(self, obj_dml_data):
return

def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
# get train indices for d == 0 and d == 1
smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
g0_external = external_predictions["ml_g0"] is not None
Expand Down Expand Up @@ -401,8 +401,8 @@ def _sensitivity_element_est(self, preds):
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
# get train indices for d == 0 and d == 1
smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)

Expand Down
12 changes: 6 additions & 6 deletions doubleml/irm/lpq.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,9 @@ def _initialize_ml_nuisance_params(self):
}

def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False)

m_z = external_predictions["ml_m_z"] is not None
m_d_d0 = external_predictions["ml_m_d_z0"] is not None
Expand Down Expand Up @@ -557,9 +557,9 @@ def ipw_score(theta):
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False)

if scoring_methods is None:
scoring_methods = {"ml_m_z": None, "ml_m_d_z0": None, "ml_m_d_z1": None, "ml_g_du_z0": None, "ml_g_du_z1": None}
Expand Down
8 changes: 4 additions & 4 deletions doubleml/irm/pq.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,8 @@ def _initialize_ml_nuisance_params(self):
self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in ["ml_g", "ml_m"]}

def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)

g_external = external_predictions["ml_g"] is not None
m_external = external_predictions["ml_m"] is not None
Expand Down Expand Up @@ -398,8 +398,8 @@ def ipw_score(theta):
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)

if scoring_methods is None:
scoring_methods = {"ml_g": None, "ml_m": None}
Expand Down
16 changes: 8 additions & 8 deletions doubleml/irm/ssm.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,12 @@ def _check_data(self, obj_dml_data):
return

def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, s = check_X_y(x, self._dml_data.s, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
x, s = check_X_y(x, self._dml_data.s, ensure_all_finite=False)

if self._score == "nonignorable":
z, _ = check_X_y(self._dml_data.z, y, force_all_finite=False)
z, _ = check_X_y(self._dml_data.z, y, ensure_all_finite=False)
dx = np.column_stack((x, d, z))
else:
dx = np.column_stack((x, d))
Expand Down Expand Up @@ -427,12 +427,12 @@ def _score_elements(self, dtreat, dcontrol, g_d1, g_d0, pi, m, s, y):
def _nuisance_tuning(
self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
):
x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
x, s = check_X_y(x, self._dml_data.s, force_all_finite=False)
x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
x, s = check_X_y(x, self._dml_data.s, ensure_all_finite=False)

if self._score == "nonignorable":
z, _ = check_X_y(self._dml_data.z, y, force_all_finite=False)
z, _ = check_X_y(self._dml_data.z, y, ensure_all_finite=False)

if scoring_methods is None:
scoring_methods = {"ml_g": None, "ml_pi": None, "ml_m": None}
Expand Down
Loading