From 2272a18b5759fe214cac8654446d0318f923fb6a Mon Sep 17 00:00:00 2001 From: Shrey Dixit Date: Fri, 8 Jul 2022 11:40:10 +0200 Subject: [PATCH] Added notebook --- doc/examples/DoubleML meets FLAML.ipynb | 1329 +++++++++++++++++++++++ 1 file changed, 1329 insertions(+) create mode 100644 doc/examples/DoubleML meets FLAML.ipynb diff --git a/doc/examples/DoubleML meets FLAML.ipynb b/doc/examples/DoubleML meets FLAML.ipynb new file mode 100644 index 00000000..9a3b2eed --- /dev/null +++ b/doc/examples/DoubleML meets FLAML.ipynb @@ -0,0 +1,1329 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DoubleML meets FLAML: Tuning XGBoost learners\n", + "\n", + "In this example notebook, we will be illustrating how one can use [FLAML (Fast Library for Automated Machine Learning & Tuning)](https://github.com/microsoft/FLAML) to tune hyperparameters of an XGBoost learner in the [DoubleML](https://docs.doubleml.org/stable/index.html) Framework. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data\n", + "\n", + "For the first part of this tutorial, we're going to use simulated data by calling [`make_plr_CCDDHNR2018`](https://docs.doubleml.org/stable/api/generated/doubleml.datasets.make_plr_CCDDHNR2018.html). This function generates data from a partially linear regression model used in [Chernozhukov et al. (2018)](https://onlinelibrary.wiley.com/doi/abs/10.1111/ectj.12097) for Figure 1. We generate 1000 observations out of which 500 are used for tuning the XGB learners using FLAML and the rest 500 are used to train the DoubleML model. The data generated will have 50 covariates variables, 1 treatment variable and 1 outcome variable. We have set the treatment effect to be $ 0.5 $ for this particular example" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# ! pip install flaml;" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import xgboost as xgb\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "\n", + "from doubleml.datasets import make_plr_CCDDHNR2018\n", + "\n", + "import doubleml as dml\n", + "from flaml import AutoML\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.dummy import DummyRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sns.set()\n", + "colors = sns.color_palette()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "plt.rcParams['figure.figsize'] = 10., 7.5\n", + "sns.set(font_scale=1.5)\n", + "sns.set_style('whitegrid', {'axes.spines.top': False,\n", + " 'axes.spines.bottom': False,\n", + " 'axes.spines.left': False,\n", + " 'axes.spines.right': False})" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "data = make_plr_CCDDHNR2018(alpha=0.5, n_obs=1000, dim_x=50, return_type=\"DataFrame\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
X1X2X3X4X5X6X7X8X9X10...X43X44X45X46X47X48X49X50yd
01.6572220.9467160.3779601.2325670.548446-0.601472-0.639983-0.888613-0.1098920.865485...1.6623510.0568520.345201-0.933119-0.864177-1.4558190.2774210.4325553.8207682.871799
1-0.1403730.3181781.326577-0.038081-0.0815260.5250320.1739630.9889392.0520821.225609...2.3662731.6606310.1691730.1153050.0050490.129702-0.2571080.7174112.782493-0.683040
2-1.230496-1.1479150.0573840.8222330.3169330.321593-0.138514-0.9990750.231421-0.651108...-1.443120-0.648600-1.081062-0.834230-1.232478-0.582722-0.313851-1.4241370.643945-0.527346
3-0.0332240.8265160.348916-0.558627-1.092446-0.445529-0.404820-0.860091-1.155255-2.058850...-0.282183-1.089843-0.759434-1.019423-1.391079-1.443890-0.215652-0.836138-0.0030380.383504
4-1.542770-1.374956-0.221876-0.104524-0.618885-0.505555-0.7648990.402509-0.308056-0.364675...-0.185257-0.9118340.7071090.5379420.4875480.7400120.3611880.309118-2.316022-2.028819
\n", + "

5 rows × 52 columns

\n", + "
" + ], + "text/plain": [ + " X1 X2 X3 X4 X5 X6 X7 \\\n", + "0 1.657222 0.946716 0.377960 1.232567 0.548446 -0.601472 -0.639983 \n", + "1 -0.140373 0.318178 1.326577 -0.038081 -0.081526 0.525032 0.173963 \n", + "2 -1.230496 -1.147915 0.057384 0.822233 0.316933 0.321593 -0.138514 \n", + "3 -0.033224 0.826516 0.348916 -0.558627 -1.092446 -0.445529 -0.404820 \n", + "4 -1.542770 -1.374956 -0.221876 -0.104524 -0.618885 -0.505555 -0.764899 \n", + "\n", + " X8 X9 X10 ... X43 X44 X45 X46 \\\n", + "0 -0.888613 -0.109892 0.865485 ... 1.662351 0.056852 0.345201 -0.933119 \n", + "1 0.988939 2.052082 1.225609 ... 2.366273 1.660631 0.169173 0.115305 \n", + "2 -0.999075 0.231421 -0.651108 ... -1.443120 -0.648600 -1.081062 -0.834230 \n", + "3 -0.860091 -1.155255 -2.058850 ... -0.282183 -1.089843 -0.759434 -1.019423 \n", + "4 0.402509 -0.308056 -0.364675 ... -0.185257 -0.911834 0.707109 0.537942 \n", + "\n", + " X47 X48 X49 X50 y d \n", + "0 -0.864177 -1.455819 0.277421 0.432555 3.820768 2.871799 \n", + "1 0.005049 0.129702 -0.257108 0.717411 2.782493 -0.683040 \n", + "2 -1.232478 -0.582722 -0.313851 -1.424137 0.643945 -0.527346 \n", + "3 -1.391079 -1.443890 -0.215652 -0.836138 -0.003038 0.383504 \n", + "4 0.487548 0.740012 0.361188 0.309118 -2.316022 -2.028819 \n", + "\n", + "[5 rows x 52 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "data_flaml, data_dml = train_test_split(data, test_size=0.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hyperparameter Tuning using FLAML\n", + "\n", + "We will be training two [`XGBRegressor`](https://xgboost.readthedocs.io/en/stable/python/python_api.html#xgboost.XGBRegressor) models: first one for estimatating the nuisance function $ l_{\\theta}(X) = E[Y|X] $, and the second one to estimate the nuisance function $ m_{\\theta}(X) = E[D|X] $. We will be doing so by minimizing the mean squared error between the true values and the predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[flaml.automl: 07-08 11:24:41] {2322} WARNING - Time taken to find the best model is 83% of the provided time budget and not all estimators' hyperparameter search converged. Consider increasing the time budget.\n" + ] + } + ], + "source": [ + "automl_l = AutoML()\n", + "settings = {\n", + " \"time_budget\": 120, # total running time in seconds\n", + " \"metric\": 'mse', # primary metrics for regression can be chosen from: ['mae','mse','r2','rmse','mape']\n", + " \"estimator_list\": ['xgboost'], # list of ML learners; we tune xgboost in this example\n", + " \"task\": 'regression', # task type \n", + "}\n", + "\n", + "automl_l.fit(X_train=data_flaml.drop(columns = [\"y\", \"d\"]).values, y_train=data_flaml[\"y\"].values, verbose=2, **settings)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best hyperparmeter config: {'n_estimators': 9, 'max_leaves': 8, 'min_child_weight': 45.63982015489896, 'learning_rate': 0.8271174453352466, 'subsample': 0.8887851542660552, 'colsample_bylevel': 0.8861407627871792, 'colsample_bytree': 0.6360227175852697, 'reg_alpha': 0.24333444176621818, 'reg_lambda': 0.149045122688325}\n", + "Best mse on validation data: 1.37\n", + "Training duration of best run: 0.01917 s\n" + ] + } + ], + "source": [ + "# retrieve best config\n", + "print('Best hyperparmeter config:', automl_l.best_config)\n", + "print('Best mse on validation data: {0:.4g}'.format(automl_l.best_loss))\n", + "print('Training duration of best run: {0:.4g} s'.format(automl_l.best_config_train_time))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "XGBRegressor(base_score=0.5, booster='gbtree',\n", + " colsample_bylevel=0.8861407627871792, colsample_bynode=1,\n", + " colsample_bytree=0.6360227175852697, gamma=0, gpu_id=-1,\n", + " grow_policy='lossguide', importance_type='gain',\n", + " interaction_constraints='', learning_rate=0.8271174453352466,\n", + " max_delta_step=0, max_depth=0, max_leaves=8,\n", + " min_child_weight=45.63982015489896, missing=nan,\n", + " monotone_constraints='()', n_estimators=1, n_jobs=-1,\n", + " num_parallel_tree=1, random_state=0, reg_alpha=0.24333444176621818,\n", + " reg_lambda=0.149045122688325, scale_pos_weight=1,\n", + " subsample=0.8887851542660552, tree_method='hist',\n", + " use_label_encoder=False, validate_parameters=1, verbosity=0)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl_l.model.estimator" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "automl_m = AutoML()\n", + "settings = {\n", + " \"time_budget\": 120, # total running time in seconds\n", + " \"metric\": 'mse', # primary metrics for regression can be chosen from: ['mae','mse','r2','rmse','mape']\n", + " \"estimator_list\": ['xgboost'], # list of ML learners; we tune xgboost in this example\n", + " \"task\": 'regression', # task type \n", + "}\n", + "\n", + "automl_m.fit(X_train=data_flaml.drop(columns = [\"y\", \"d\"]).values, y_train=data_flaml[\"d\"].values, verbose=2, **settings)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best hyperparmeter config: {'n_estimators': 23, 'max_leaves': 9, 'min_child_weight': 42.84308837737722, 'learning_rate': 1.0, 'subsample': 0.9908960911521747, 'colsample_bylevel': 0.7908024341286631, 'colsample_bytree': 0.7922331300866388, 'reg_alpha': 0.02047249913685125, 'reg_lambda': 0.5100784572580507}\n", + "Best mse on validation data: 1.173\n", + "Training duration of best run: 0.01911 s\n" + ] + } + ], + "source": [ + "# retrieve best config\n", + "print('Best hyperparmeter config:', automl_m.best_config)\n", + "print('Best mse on validation data: {0:.4g}'.format(automl_m.best_loss))\n", + "print('Training duration of best run: {0:.4g} s'.format(automl_m.best_config_train_time))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "XGBRegressor(base_score=0.5, booster='gbtree',\n", + " colsample_bylevel=0.7908024341286631, colsample_bynode=1,\n", + " colsample_bytree=0.7922331300866388, gamma=0, gpu_id=-1,\n", + " grow_policy='lossguide', importance_type='gain',\n", + " interaction_constraints='', learning_rate=1.0, max_delta_step=0,\n", + " max_depth=0, max_leaves=9, min_child_weight=42.84308837737722,\n", + " missing=nan, monotone_constraints='()', n_estimators=1, n_jobs=-1,\n", + " num_parallel_tree=1, random_state=0, reg_alpha=0.02047249913685125,\n", + " reg_lambda=0.5100784572580507, scale_pos_weight=1,\n", + " subsample=0.9908960911521747, tree_method='hist',\n", + " use_label_encoder=False, validate_parameters=1, verbosity=0)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "automl_m.model.estimator" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overfit Check\n", + "Before continuing with the DML experiments, we make sure that the estimators trained by FLAML are not overfitted to the data they're trained. For this, we compare the in-sample (MSE for FLAML data) and out-of-sample (MSE for DML data) and verify if they vary significantly." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Check for Overfitting: Compare in-sample (train), out-of-sample (test) MSE\n", + "\n", + "# ml_l\n", + "pres_ins_ml_l = automl_l.model.estimator.predict(data_flaml.drop(columns = [\"y\", \"d\"]).values)\n", + "mse_ins_ml_l = np.mean((pres_ins_ml_l - data_flaml.y.values)**2)\n", + "pres_oos_ml_l = automl_l.model.estimator.predict(data_dml.drop(columns = [\"y\", \"d\"]).values)\n", + "mse_oos_ml_l = np.mean((pres_oos_ml_l - data_dml.y.values)**2)\n", + "\n", + "# ml_m\n", + "pres_ins_ml_m = automl_m.model.estimator.predict(data_flaml.drop(columns = [\"y\", \"d\"]).values)\n", + "mse_ins_ml_m = np.mean((pres_ins_ml_m - data_flaml.d.values)**2)\n", + "pres_oos_ml_m = automl_m.model.estimator.predict(data_dml.drop(columns = [\"y\", \"d\"]).values)\n", + "mse_oos_ml_m = np.mean((pres_oos_ml_m - data_dml.d.values)**2)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ins mse, ml_l: 1.2759536289191706\n", + "oos mse, ml_l: 1.3618780092147345\n", + "ins mse, ml_m: 0.9963568938504447\n", + "oos mse, ml_m: 1.0363425121122045\n" + ] + } + ], + "source": [ + "print(\"ins mse, ml_l:\", mse_ins_ml_l)\n", + "print(\"oos mse, ml_l:\", mse_oos_ml_l)\n", + "\n", + "print(\"ins mse, ml_m:\", mse_ins_ml_m)\n", + "print(\"oos mse, ml_m:\", mse_oos_ml_m)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can observe, the MSE on training data is a little lower than validation data (DML data). However, this is not very significant and we can use these estimators for DML." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Double / Debiased Machine Learning\n", + "\n", + "Now that we have our tuned XGB Learners, we can train our Double Machine Learning model." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================== DoubleMLData Object ==================\n", + "\n", + "------------------ Data summary ------------------\n", + "Outcome variable: y\n", + "Treatment variable(s): ['d']\n", + "Covariates: ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11', 'X12', 'X13', 'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23', 'X24', 'X25', 'X26', 'X27', 'X28', 'X29', 'X30', 'X31', 'X32', 'X33', 'X34', 'X35', 'X36', 'X37', 'X38', 'X39', 'X40', 'X41', 'X42', 'X43', 'X44', 'X45', 'X46', 'X47', 'X48', 'X49', 'X50']\n", + "Instrument variable(s): None\n", + "No. Observations: 500\n", + "\n", + "------------------ DataFrame info ------------------\n", + "\n", + "Int64Index: 500 entries, 261 to 256\n", + "Columns: 52 entries, X1 to d\n", + "dtypes: float64(52)\n", + "memory usage: 207.0 KB\n", + "\n" + ] + } + ], + "source": [ + "obj_dml_data = dml.DoubleMLData(data_dml, \"y\", \"d\")\n", + "print(obj_dml_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def print_scores(dml_plr_obj, return_scores=False):\n", + " # export data, fitted coefficient and predictions of the DoubleML model\n", + " y = dml_plr_obj._dml_data.y\n", + " d = dml_plr_obj._dml_data.d\n", + "\n", + " y_preds_ml_l = dml_plr_obj.predictions['ml_l'][:, 0, 0]\n", + " y_preds_ml_m = dml_plr_obj.predictions['ml_m'][:, 0, 0]\n", + "\n", + " if return_scores:\n", + " return mean_squared_error(y, y_preds_ml_l), mean_squared_error(y, y_preds_ml_m)\n", + "\n", + " print(f\"The mean squared error for l_of_X: {mean_squared_error(y, y_preds_ml_l):.3f}\")\n", + " print(f\"The mean squared error for m_of_X: {mean_squared_error(y, y_preds_ml_m):.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tuned Learner" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
coefstd errtP>|t|2.5 %97.5 %
d0.4777970.0504829.4646612.945135e-210.3788540.576741
\n", + "
" + ], + "text/plain": [ + " coef std err t P>|t| 2.5 % 97.5 %\n", + "d 0.477797 0.050482 9.464661 2.945135e-21 0.378854 0.576741" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dml_plr_obj_tuned = dml.DoubleMLPLR(obj_dml_data, automl_l.model.estimator, automl_m.model.estimator)\n", + "automl_tuned_summary = dml_plr_obj_tuned.fit(store_predictions = True).summary\n", + "automl_tuned_summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can observe the coefficient i.e. the treatment effect calculated by our model is very close to the actual treatment effect of $ 0.5 $ and the standard error is also very low. Now will also evaluate the learners themselves by calculating their mean squared error." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The mean squared error for l_of_X: 1.368\n", + "The mean squared error for m_of_X: 1.665\n" + ] + } + ], + "source": [ + "print_scores(dml_plr_obj_tuned)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Untuned XGB Learners\n", + "\n", + "Now we compare these metrics with DoubleML model that uses untuned XGB learners" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
coefstd errtP>|t|2.5 %97.5 %
d0.4196970.0483358.6830283.853698e-180.3249620.514433
\n", + "
" + ], + "text/plain": [ + " coef std err t P>|t| 2.5 % 97.5 %\n", + "d 0.419697 0.048335 8.683028 3.853698e-18 0.324962 0.514433" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ml_l = ml_m = xgb.XGBRegressor()\n", + "\n", + "dml_plr_obj_untuned = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m)\n", + "untuned_summary = dml_plr_obj_untuned.fit(store_predictions = True).summary\n", + "untuned_summary" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The mean squared error for l_of_X: 1.544\n", + "The mean squared error for m_of_X: 1.806\n" + ] + } + ], + "source": [ + "print_scores(dml_plr_obj_untuned)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can observe, the estimated coefficient is not as close to the true treatment effect compared to the tuned case. We also calculated the mean squared error of the learners themselves and found them to be higher than their tuned counterparts. This further proves our hypothesis.\n", + "\n", + "Now, we'll run some more experiments to validate if the FLAML approach is indeed superior" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dummy Learner" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
coefstd errtP>|t|2.5 %97.5 %
d0.7233190.03372721.4465384.919298e-1020.6572160.789422
\n", + "
" + ], + "text/plain": [ + " coef std err t P>|t| 2.5 % 97.5 %\n", + "d 0.723319 0.033727 21.446538 4.919298e-102 0.657216 0.789422" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ml_l = ml_m = DummyRegressor()\n", + "\n", + "dml_plr_obj_dummy = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m)\n", + "dummy_summary = dml_plr_obj_dummy.fit(store_predictions = True).summary\n", + "dummy_summary" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The mean squared error for l_of_X: 2.036\n", + "The mean squared error for m_of_X: 2.335\n" + ] + } + ], + "source": [ + "print_scores(dml_plr_obj_dummy)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### AutoML Untuned Learner" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best hyperparmeter config: {'n_estimators': 4, 'max_leaves': 4, 'min_child_weight': 0.9999999999999993, 'learning_rate': 0.09999999999999995, 'subsample': 1.0, 'colsample_bylevel': 1.0, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 1.0}\n", + "Best mse on validation data: 2.105\n", + "Training duration of best run: 0.03583 s\n" + ] + } + ], + "source": [ + "automl_untuned_l = AutoML()\n", + "settings = {\n", + " \"time_budget\": 0.01, # total running time in seconds\n", + " \"metric\": 'mse', # primary metrics for regression can be chosen from: ['mae','mse','r2','rmse','mape']\n", + " \"estimator_list\": ['xgboost'], # list of ML learners; we tune xgboost in this example\n", + " \"task\": 'regression', # task type \n", + "}\n", + "\n", + "automl_untuned_l.fit(X_train=data_flaml.drop(columns = [\"y\", \"d\"]).values, y_train=data_flaml[\"y\"].values, verbose=0, **settings)\n", + "\n", + "# retrieve best config\n", + "print('Best hyperparmeter config:', automl_untuned_l.best_config)\n", + "print('Best mse on validation data: {0:.4g}'.format(automl_untuned_l.best_loss))\n", + "print('Training duration of best run: {0:.4g} s'.format(automl_untuned_l.best_config_train_time))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best hyperparmeter config: {'n_estimators': 4, 'max_leaves': 4, 'min_child_weight': 0.9999999999999993, 'learning_rate': 0.09999999999999995, 'subsample': 1.0, 'colsample_bylevel': 1.0, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 1.0}\n", + "Best mse on validation data: 2.039\n", + "Training duration of best run: 0.02979 s\n" + ] + } + ], + "source": [ + "automl_untuned_m = AutoML()\n", + "settings = {\n", + " \"time_budget\": 0.01, # total running time in seconds\n", + " \"metric\": 'mse', # primary metrics for regression can be chosen from: ['mae','mse','r2','rmse','mape']\n", + " \"estimator_list\": ['xgboost'], # list of ML learners; we tune xgboost in this example\n", + " \"task\": 'regression', # task type \n", + "}\n", + "\n", + "automl_untuned_m.fit(X_train=data_flaml.drop(columns = [\"y\", \"d\"]).values, y_train=data_flaml[\"d\"].values, verbose=0, **settings)\n", + "\n", + "# retrieve best config\n", + "print('Best hyperparmeter config:', automl_untuned_m.best_config)\n", + "print('Best mse on validation data: {0:.4g}'.format(automl_untuned_m.best_loss))\n", + "print('Training duration of best run: {0:.4g} s'.format(automl_untuned_m.best_config_train_time))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
coefstd errtP>|t|2.5 %97.5 %
d0.6169570.0344117.9296226.924914e-720.5495150.684399
\n", + "
" + ], + "text/plain": [ + " coef std err t P>|t| 2.5 % 97.5 %\n", + "d 0.616957 0.03441 17.929622 6.924914e-72 0.549515 0.684399" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dml_plr_obj_untuned_automl = dml.DoubleMLPLR(obj_dml_data, automl_untuned_l.model.estimator, automl_untuned_m.model.estimator)\n", + "untuned_automl_summary = dml_plr_obj_untuned_automl.fit(store_predictions = True).summary\n", + "untuned_automl_summary" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The mean squared error for l_of_X: 1.894\n", + "The mean squared error for m_of_X: 1.892\n" + ] + } + ], + "source": [ + "print_scores(dml_plr_obj_untuned_automl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results and Conclusion" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
coefstd errtP>|t|2.5 %97.5 %
automl_tuned0.4777970.0504829.4646612.945135e-210.3788540.576741
untuned0.4196970.0483358.6830283.853698e-180.3249620.514433
dummy0.7233190.03372721.4465384.919298e-1020.6572160.789422
untuned_automl0.6169570.03441017.9296226.924914e-720.5495150.684399
\n", + "
" + ], + "text/plain": [ + " coef std err t P>|t| 2.5 % \\\n", + "automl_tuned 0.477797 0.050482 9.464661 2.945135e-21 0.378854 \n", + "untuned 0.419697 0.048335 8.683028 3.853698e-18 0.324962 \n", + "dummy 0.723319 0.033727 21.446538 4.919298e-102 0.657216 \n", + "untuned_automl 0.616957 0.034410 17.929622 6.924914e-72 0.549515 \n", + "\n", + " 97.5 % \n", + "automl_tuned 0.576741 \n", + "untuned 0.514433 \n", + "dummy 0.789422 \n", + "untuned_automl 0.684399 " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summary = pd.concat((automl_tuned_summary, untuned_summary, dummy_summary, untuned_automl_summary))\n", + "summary.index = ('automl_tuned', 'untuned', 'dummy', 'untuned_automl')\n", + "summary" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "summary.to_pickle(\"summary_simulated_data_split.pkl\")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "errors = np.full((2, summary.shape[0]), np.nan)\n", + "errors[0, :] = summary['coef'] - summary['2.5 %']\n", + "errors[1, :] = summary['97.5 %'] - summary['coef']\n", + "plt.errorbar(summary.index, summary.coef, fmt='o', yerr=errors)\n", + "plt.axhline(0.5)\n", + "\n", + "plt.xlabel('ML method')\n", + "plt.ylabel('Coefficients and 95%-CI')\n", + "plt.xticks(rotation = 20)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The FLAML AutoML approach leads to the coefficient that is the closest to the actual coefficient with the least wide confidence interval." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
automl_tuneduntuneddummyautoml_untuned
MSE l_of_X1.3676881.5442242.0358791.893867
MSE m_of_X1.6647191.8064712.3354721.892149
\n", + "
" + ], + "text/plain": [ + " automl_tuned untuned dummy automl_untuned\n", + "MSE l_of_X 1.367688 1.544224 2.035879 1.893867\n", + "MSE m_of_X 1.664719 1.806471 2.335472 1.892149" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scores = {}\n", + "scores[\"automl_tuned\"] = print_scores(dml_plr_obj_tuned, True)\n", + "scores[\"untuned\"] = print_scores(dml_plr_obj_untuned, True)\n", + "scores[\"dummy\"] = print_scores(dml_plr_obj_dummy, True)\n", + "scores[\"automl_untuned\"] = print_scores(dml_plr_obj_untuned_automl, True)\n", + "scores = pd.DataFrame(scores, index = [\"MSE l_of_X\", \"MSE m_of_X\"])\n", + "scores" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "scores.to_pickle(\"scores_simulated_data_split.pkl\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "scores.iloc[0].plot(kind=\"bar\", title=\"MSE for l_of_X\");" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "scores.iloc[1].plot(kind=\"bar\", title=\"MSE for m_of_X\");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In all of our tests, we found out that the FLAML AutoML approach leads to the closest coefficient to the true one with the lowest MSE for $ l_{\\theta}(X) $ and $ m_{\\theta}(X) $. In conclusion, this approach can save a lot of time spent on hyperparameter tuning without compromising performance." + ] + } + ], + "metadata": { + "interpreter": { + "hash": "78ea0f640d7ba82461c68eabb12a1cf14510fff2c8bf1b2902d34c8551bd83b1" + }, + "kernelspec": { + "display_name": "Python 3.8.8 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}