diff --git a/README.md b/README.md index a5dfa3cb..aadb136f 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Conda Version](https://img.shields.io/conda/vn/conda-forge/doubleml.svg)](https://anaconda.org/conda-forge/doubleml) [![codecov](https://codecov.io/gh/DoubleML/doubleml-for-py/branch/main/graph/badge.svg?token=0BjlFPgdGk)](https://codecov.io/gh/DoubleML/doubleml-for-py) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/1c08ec7d782c451784293c996537de14)](https://www.codacy.com/gh/DoubleML/doubleml-for-py/dashboard?utm_source=github.com&utm_medium=referral&utm_content=DoubleML/doubleml-for-py&utm_campaign=Badge_Grade) -[![Python version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)](https://www.python.org/) +[![Python version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue)](https://www.python.org/) The Python package **DoubleML** provides an implementation of the double / debiased machine learning framework of [Chernozhukov et al. (2018)](https://doi.org/10.1111/ectj.12097). diff --git a/doubleml/data/base_data.py b/doubleml/data/base_data.py index 93543e8b..c5b92fa3 100644 --- a/doubleml/data/base_data.py +++ b/doubleml/data/base_data.py @@ -286,8 +286,8 @@ def from_arrays( elif not isinstance(force_all_d_finite, bool): raise TypeError("Invalid force_all_d_finite. " + "force_all_d_finite must be True, False or 'allow-nan'.") - x = check_array(x, ensure_2d=False, allow_nd=False, force_all_finite=force_all_x_finite) - d = check_array(d, ensure_2d=False, allow_nd=False, force_all_finite=force_all_x_finite) + x = check_array(x, ensure_2d=False, allow_nd=False, ensure_all_finite=force_all_x_finite) + d = check_array(d, ensure_2d=False, allow_nd=False, ensure_all_finite=force_all_x_finite) y = column_or_1d(y, warn=True) x = _assure_2d_array(x) diff --git a/doubleml/did/did.py b/doubleml/did/did.py index 62c75a42..9307ae78 100644 --- a/doubleml/did/did.py +++ b/doubleml/did/did.py @@ -201,8 +201,8 @@ def _check_data(self, obj_dml_data): return def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) # nuisance g # get train indices for d == 0 @@ -372,8 +372,8 @@ def _sensitivity_element_est(self, preds): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) # get train indices for d == 0 and d == 1 smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index 83e49cd0..6e3a95f2 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -429,8 +429,8 @@ def _preprocess_data(self, g_value, pre_t, eval_t): def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): # Here: d is a binary treatment indicator - x, y = check_X_y(self._x_data_subset, self._y_data_subset, force_all_finite=False) - x, d = check_X_y(x, self._g_data_subset, force_all_finite=False) + x, y = check_X_y(self._x_data_subset, self._y_data_subset, ensure_all_finite=False) + x, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False) # nuisance g # get train indices for d == 0 smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) @@ -570,8 +570,8 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, p_hat): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._x_data_subset, self._y_data_subset, force_all_finite=False) - x, d = check_X_y(x, self._g_data_subset, force_all_finite=False) + x, y = check_X_y(self._x_data_subset, self._y_data_subset, ensure_all_finite=False) + x, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False) # get train indices for d == 0 and d == 1 smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index 354fa666..11c467b5 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -212,9 +212,9 @@ def _check_data(self, obj_dml_data): return def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) - x, t = check_X_y(x, self._dml_data.t, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + x, t = check_X_y(x, self._dml_data.t, ensure_all_finite=False) # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold # nuisance estimates of the uncond. treatment prob. @@ -547,9 +547,9 @@ def _sensitivity_element_est(self, preds): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) - x, t = check_X_y(x, self._dml_data.t, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + x, t = check_X_y(x, self._dml_data.t, ensure_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_g": None, "ml_m": None} diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index 11419c40..5375011d 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -435,9 +435,9 @@ def _estimate_conditional_g( def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): # Here: d is a binary treatment indicator - x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, force_all_finite=False) - _, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # (d is the G_indicator) - _, t = check_X_y(x, self._t_data_subset, force_all_finite=False) + x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, ensure_all_finite=False) + _, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False) # (d is the G_indicator) + _, t = check_X_y(x, self._t_data_subset, ensure_all_finite=False) # THIS DIFFERS FROM THE PAPER due to stratified splitting this should be the same for each fold # nuisance estimates of the uncond. treatment prob. @@ -621,9 +621,9 @@ def _score_elements(self, y, d, t, g_hat_d0_t0, g_hat_d0_t1, g_hat_d1_t0, g_hat_ def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, force_all_finite=False) - _, d = check_X_y(x, self._g_data_subset, force_all_finite=False) # (d is the G_indicator) - _, t = check_X_y(x, self._t_data_subset, force_all_finite=False) + x, y = check_X_y(X=self._x_data_subset, y=self._y_data_subset, ensure_all_finite=False) + _, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False) # (d is the G_indicator) + _, t = check_X_y(x, self._t_data_subset, ensure_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_g": None, "ml_m": None} diff --git a/doubleml/did/tests/test_did_aggregation_plot.py b/doubleml/did/tests/test_did_aggregation_plot.py index 1079b144..692a0e68 100644 --- a/doubleml/did/tests/test_did_aggregation_plot.py +++ b/doubleml/did/tests/test_did_aggregation_plot.py @@ -1,5 +1,3 @@ -import warnings - import matplotlib.pyplot as plt import numpy as np import pytest @@ -180,13 +178,4 @@ def test_joint_ci_bootstrap_warning(mock_framework): with pytest.warns(UserWarning, match="Joint confidence intervals require bootstrapping"): _ = aggregation.plot_effects(joint=True) - # Verify that bootstrap was performed - assert aggregation.aggregated_frameworks.boot_t_stat is not None - - # No warning should be raised when plotting again - with warnings.catch_warnings(record=True) as recorded_warnings: - warnings.simplefilter("always") # Ensure all warnings are recorded - _ = aggregation.plot_effects(joint=True) - - assert len(recorded_warnings) == 0 plt.close("all") diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index 0de311bc..93379e90 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -208,8 +208,8 @@ def _get_weights(self): return weights, weights_bar def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) dx = np.column_stack((d, x)) # use the treated indicator to get the correct sample splits treated = self.treated @@ -361,8 +361,8 @@ def _sensitivity_element_est(self, preds): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) dx = np.column_stack((d, x)) # use the treated indicator to get the correct sample splits treated = self.treated diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py index 5701c5f2..6d29f5e2 100644 --- a/doubleml/irm/cvar.py +++ b/doubleml/irm/cvar.py @@ -204,8 +204,8 @@ def _initialize_ml_nuisance_params(self): self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in ["ml_g", "ml_m"]} def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) # initialize nuisance predictions, targets and models g_hat = { @@ -330,8 +330,8 @@ def ipw_score(theta): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_g": None, "ml_m": None} diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py index 54dcdd6a..4eaa1d50 100644 --- a/doubleml/irm/iivm.py +++ b/doubleml/irm/iivm.py @@ -263,9 +263,9 @@ def _check_data(self, obj_dml_data): return def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) # get train indices for z == 0 and z == 1 smpls_z0, smpls_z1 = _get_cond_smpls(smpls, z) @@ -448,9 +448,9 @@ def _score_elements(self, y, z, d, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls) def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) # get train indices for z == 0 and z == 1 smpls_z0, smpls_z1 = _get_cond_smpls(smpls, z) diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index 5e2d693b..343b7878 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -261,8 +261,8 @@ def _check_data(self, obj_dml_data): return def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) # get train indices for d == 0 and d == 1 smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) g0_external = external_predictions["ml_g0"] is not None @@ -401,8 +401,8 @@ def _sensitivity_element_est(self, preds): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) # get train indices for d == 0 and d == 1 smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d) diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py index fd51792b..962b383b 100644 --- a/doubleml/irm/lpq.py +++ b/doubleml/irm/lpq.py @@ -277,9 +277,9 @@ def _initialize_ml_nuisance_params(self): } def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) - x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False) m_z = external_predictions["ml_m_z"] is not None m_d_d0 = external_predictions["ml_m_d_z0"] is not None @@ -557,9 +557,9 @@ def ipw_score(theta): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) - x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_m_z": None, "ml_m_d_z0": None, "ml_m_d_z1": None, "ml_g_du_z0": None, "ml_g_du_z1": None} diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py index e515e578..baf43b7e 100644 --- a/doubleml/irm/pq.py +++ b/doubleml/irm/pq.py @@ -253,8 +253,8 @@ def _initialize_ml_nuisance_params(self): self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in ["ml_g", "ml_m"]} def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) g_external = external_predictions["ml_g"] is not None m_external = external_predictions["ml_m"] is not None @@ -398,8 +398,8 @@ def ipw_score(theta): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_g": None, "ml_m": None} diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py index 7e27b52a..00a49191 100644 --- a/doubleml/irm/ssm.py +++ b/doubleml/irm/ssm.py @@ -203,12 +203,12 @@ def _check_data(self, obj_dml_data): return def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) - x, s = check_X_y(x, self._dml_data.s, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + x, s = check_X_y(x, self._dml_data.s, ensure_all_finite=False) if self._score == "nonignorable": - z, _ = check_X_y(self._dml_data.z, y, force_all_finite=False) + z, _ = check_X_y(self._dml_data.z, y, ensure_all_finite=False) dx = np.column_stack((x, d, z)) else: dx = np.column_stack((x, d)) @@ -427,12 +427,12 @@ def _score_elements(self, dtreat, dcontrol, g_d1, g_d0, pi, m, s, y): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) - x, s = check_X_y(x, self._dml_data.s, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + x, s = check_X_y(x, self._dml_data.s, ensure_all_finite=False) if self._score == "nonignorable": - z, _ = check_X_y(self._dml_data.z, y, force_all_finite=False) + z, _ = check_X_y(self._dml_data.z, y, ensure_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_g": None, "ml_pi": None, "ml_m": None} diff --git a/doubleml/plm/pliv.py b/doubleml/plm/pliv.py index f933ce7d..d2b348c5 100644 --- a/doubleml/plm/pliv.py +++ b/doubleml/plm/pliv.py @@ -248,8 +248,8 @@ def _nuisance_tuning( return res def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) # nuisance l if external_predictions["ml_l"] is not None: @@ -273,7 +273,7 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return # nuisance m if self._dml_data.n_instr == 1: # one instrument: just identified - x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False) + x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False) if external_predictions["ml_m"] is not None: m_hat = {"preds": external_predictions["ml_m"], "targets": None, "models": None} else: @@ -299,7 +299,7 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return } for i_instr in range(self._dml_data.n_instr): z = self._dml_data.z - x, this_z = check_X_y(x, z[:, i_instr], force_all_finite=False) + x, this_z = check_X_y(x, z[:, i_instr], ensure_all_finite=False) if external_predictions["ml_m_" + self._dml_data.z_cols[i_instr]] is not None: m_hat["preds"][:, i_instr] = external_predictions["ml_m_" + self._dml_data.z_cols[i_instr]] predictions["ml_m_" + self._dml_data.z_cols[i_instr]] = external_predictions[ @@ -415,7 +415,7 @@ def _score_elements(self, y, z, d, l_hat, m_hat, r_hat, g_hat, smpls): def _nuisance_est_partial_z(self, smpls, n_jobs_cv, return_models=False): y = self._dml_data.y - xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, force_all_finite=False) + xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, ensure_all_finite=False) # nuisance m r_hat = _dml_cv_predict( @@ -448,9 +448,9 @@ def _nuisance_est_partial_z(self, smpls, n_jobs_cv, return_models=False): return psi_elements, preds def _nuisance_est_partial_xz(self, smpls, n_jobs_cv, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) # nuisance l l_hat = _dml_cv_predict( @@ -516,8 +516,8 @@ def _nuisance_est_partial_xz(self, smpls, n_jobs_cv, return_models=False): def _nuisance_tuning_partial_x( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_l": None, "ml_m": None, "ml_r": None, "ml_g": None} @@ -541,7 +541,7 @@ def _nuisance_tuning_partial_x( m_tune_res = {instr_var: list() for instr_var in self._dml_data.z_cols} z = self._dml_data.z for i_instr in range(self._dml_data.n_instr): - x, this_z = check_X_y(x, z[:, i_instr], force_all_finite=False) + x, this_z = check_X_y(x, z[:, i_instr], ensure_all_finite=False) m_tune_res[self._dml_data.z_cols[i_instr]] = _dml_tune( this_z, x, @@ -556,7 +556,7 @@ def _nuisance_tuning_partial_x( ) else: # one instrument: just identified - x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False) + x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False) m_tune_res = _dml_tune( z, x, @@ -632,7 +632,7 @@ def _nuisance_tuning_partial_x( def _nuisance_tuning_partial_z( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, force_all_finite=False) + xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, ensure_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_r": None} @@ -664,9 +664,9 @@ def _nuisance_tuning_partial_z( def _nuisance_tuning_partial_xz( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_l": None, "ml_m": None, "ml_r": None} diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py index 0e29df0d..faf0399f 100644 --- a/doubleml/plm/plr.py +++ b/doubleml/plm/plr.py @@ -151,8 +151,8 @@ def _check_data(self, obj_dml_data): return def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) m_external = external_predictions["ml_m"] is not None l_external = external_predictions["ml_l"] is not None if "ml_g" in self._learner: @@ -287,8 +287,8 @@ def _sensitivity_element_est(self, preds): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) if scoring_methods is None: scoring_methods = {"ml_l": None, "ml_m": None, "ml_g": None} diff --git a/doubleml/rdd/rdd.py b/doubleml/rdd/rdd.py index 0d97ed0a..da54d61c 100644 --- a/doubleml/rdd/rdd.py +++ b/doubleml/rdd/rdd.py @@ -94,12 +94,12 @@ class RDFlex: >>> print(rdflex_obj.fit()) Method Coef. S.E. t-stat P>|t| 95% CI ------------------------------------------------------------------------- - Conventional 0.950 0.225 4.230 2.333e-05 [0.510, 1.391] - Robust - - 3.653 2.589e-04 [0.431, 1.429] + Conventional 0.939 0.225 4.168 3.071e-05 [0.498, 1.381] + Robust - - 3.589 3.316e-04 [0.416, 1.417] Design Type: Fuzzy Cutoff: 0 First Stage Kernel: triangular - Final Bandwidth: [0.74746872] + Final Bandwidth: [0.74754257] """ diff --git a/doubleml/tests/test_nonlinear_score_mixin.py b/doubleml/tests/test_nonlinear_score_mixin.py index d4e9a695..0fce08c3 100644 --- a/doubleml/tests/test_nonlinear_score_mixin.py +++ b/doubleml/tests/test_nonlinear_score_mixin.py @@ -83,8 +83,8 @@ def _check_data(self, obj_dml_data): pass def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) # nuisance l l_hat = _dml_cv_predict( diff --git a/doubleml/utils/_checks.py b/doubleml/utils/_checks.py index db1fbf94..0eabf53b 100644 --- a/doubleml/utils/_checks.py +++ b/doubleml/utils/_checks.py @@ -1,8 +1,8 @@ -import inspect import warnings import numpy as np from sklearn.utils.multiclass import type_of_target +from sklearn.utils.validation import has_fit_parameter def _check_in_zero_one(value, name, include_zero=True, include_one=True): @@ -514,7 +514,7 @@ def _check_sample_splitting(all_smpls, all_smpls_cluster, dml_data, is_cluster_d def _check_supports_sample_weights(learner, learner_name): - if "sample_weight" not in inspect.signature(learner.fit).parameters: + if not has_fit_parameter(learner, "sample_weight"): raise ValueError( f"The {learner_name} learner {str(learner)} does not support sample weights. " "Please choose a learner that supports sample weights." diff --git a/doubleml/utils/global_learner.py b/doubleml/utils/global_learner.py index b445f655..efc1eeec 100644 --- a/doubleml/utils/global_learner.py +++ b/doubleml/utils/global_learner.py @@ -1,17 +1,6 @@ -from sklearn import __version__ as sklearn_version from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, clone, is_classifier, is_regressor from sklearn.utils.multiclass import unique_labels -from sklearn.utils.validation import _check_sample_weight, check_is_fitted - - -def parse_version(version): - return tuple(map(int, version.split(".")[:2])) - - -# TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0 -sklearn_supports_validation = parse_version(sklearn_version) >= (1, 6) -if sklearn_supports_validation: - from sklearn.utils.validation import validate_data +from sklearn.utils.validation import _check_sample_weight, check_is_fitted, validate_data class GlobalRegressor(RegressorMixin, BaseEstimator): @@ -45,11 +34,7 @@ def fit(self, X, y, sample_weight=None): if not is_regressor(self.base_estimator): raise ValueError(f"base_estimator must be a regressor. Got {self.base_estimator.__class__.__name__} instead.") - # TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0 - if sklearn_supports_validation: - X, y = validate_data(self, X, y) - else: - X, y = self._validate_data(X, y) + X, y = validate_data(self, X, y) _check_sample_weight(sample_weight, X) self._fitted_learner = clone(self.base_estimator) self._fitted_learner.fit(X, y) @@ -101,11 +86,7 @@ def fit(self, X, y, sample_weight=None): if not is_classifier(self.base_estimator): raise ValueError(f"base_estimator must be a classifier. Got {self.base_estimator.__class__.__name__} instead.") - # TODO(0.11) can be removed if the sklearn dependency is bumped to 1.6.0 - if sklearn_supports_validation: - X, y = validate_data(self, X, y) - else: - X, y = self._validate_data(X, y) + X, y = validate_data(self, X, y) _check_sample_weight(sample_weight, X) self.classes_ = unique_labels(y) self._fitted_learner = clone(self.base_estimator) diff --git a/pyproject.toml b/pyproject.toml index 1da863b4..06cb6502 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,15 +15,15 @@ maintainers = [ ] requires-python = ">=3.9" dependencies = [ - "joblib", - "numpy", - "pandas", - "scipy", - "scikit-learn>=1.4.0", - "statsmodels", - "matplotlib", + "joblib>=1.2.0", + "numpy>=2.0.0", + "pandas>=2.0.0", + "scipy>=1.7.0", + "scikit-learn>=1.6.0", + "statsmodels>=0.14.0", + "matplotlib>=3.9.0", "seaborn>=0.13", - "plotly" + "plotly>=5.0.0" ] classifiers = [ "Programming Language :: Python :: 3", @@ -31,6 +31,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent" ] @@ -42,8 +43,8 @@ rdd = [ dev = [ "pytest>=8.3.0", "pytest-cov>=6.0.0", - "xgboost", - "lightgbm", + "xgboost>=2.1.0", + "lightgbm>=4.5.0", "black>=25.1.0", "ruff>=0.11.1", "pre-commit>=4.2.0", @@ -67,7 +68,7 @@ doctest_optionflags = [ [tool.black] line-length = 127 -target-version = ['py39', 'py310', 'py311', 'py312'] +target-version = ['py39', 'py310', 'py311', 'py312', 'py313'] preview = true exclude = ''' /(