# Install packages

In [8]:
from doubleml import DoubleMLData
from doubleml.datasets import fetch_401K
import numpy as np

# Construct DoubleMLData object

In [3]:
data = fetch_401K(return_type='DataFrame')

# Construct DoubleMLData object
dml_data = DoubleMLData(data, y_col='net_tfa', d_cols='e401',
                        x_cols=['age', 'inc', 'educ', 'fsize', 'marr',
                                'twoearn', 'db', 'pira', 'hown'])

# Machine Learning applied for estimation 

In [6]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

ml_l_rf = RandomForestRegressor(n_estimators = 500, max_depth = 7,
                                max_features = 3, min_samples_leaf = 3)


ml_m_rf = RandomForestClassifier(n_estimators = 500, max_depth = 5,
                                max_features = 4, min_samples_leaf = 7)


from xgboost import XGBClassifier, XGBRegressor

ml_l_xgb = XGBRegressor(objective = "reg:squarederror", eta = 0.1,
                        n_estimators =35)


ml_m_xgb = XGBClassifier(use_label_encoder = False ,
                         objective = "binary:logistic",
                         eval_metric = "logloss",
                         eta = 0.1, n_estimators = 34)
 

# Partially Linear Regression (PLR)

In [10]:
from doubleml import DoubleMLPLR

np.random.seed(123)

#without any parameter handling
dml_plr_tree = DoubleMLPLR(dml_data,
                             ml_l = ml_l_rf,
                             ml_m = ml_m_rf)
 

np.random.seed(123)

#with parameter setting
dml_plr_tree = DoubleMLPLR(dml_data,
                             ml_l = ml_l_rf,
                             ml_m = ml_m_rf,
                             n_folds = 3,
                             n_rep = 1,
                             score = 'partialling out', # option : partialling out or IV-type
                             dml_procedure = 'dml2')    # option : dml1 or dml2

# Estimation

In [12]:
dml_plr_tree.fit() #model fitting & estimation

<doubleml.double_ml_plr.DoubleMLPLR at 0x1759edfd0>

In [13]:
dml_plr_tree.coef # Coefficient estimate : theta-zero-check

array([8909.63407762])

In [14]:
dml_plr_tree.se # Standard error

array([1321.82228913])

In [16]:
dml_plr_tree.confint() # Confidence Interval 

Unnamed: 0,2.5 %,97.5 %
e401,6318.909997,11500.358158


In [15]:
dml_plr_tree.summary

Unnamed: 0,coef,std err,t,P>|t|,2.5 %,97.5 %
e401,8909.634078,1321.822289,6.740417,1.579322e-11,6318.909997,11500.358158


# Inference

In [18]:
# Multiplier bootstrap (relevant in case with multiple treatment variables)
#  DoubleML supports various approaches to perform valid simultaneous inference which are partly based on a multiplier bootstrap.
dml_plr_tree.bootstrap()

<doubleml.double_ml_plr.DoubleMLPLR at 0x1759edfd0>

### we find a positive and significant effect: Being eligible for such a pension plan increases the amount of net financial assets by approximately $9000. 