In [1]:
import numpy as np
import pandas as pd
from doubleml.data.cluster_data import DoubleMLClusterData
from doubleml.plm.plpr import DoubleMLPLPR
from sklearn.linear_model import LassoCV, LinearRegression
from sklearn.base import clone
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
from doubleml.plm.utils._plpr_util import extend_data, cre_fct, fd_fct, wd_fct
from doubleml.plm.datasets.dgp_static_panel_CP2025 import make_static_panel_CP2025
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import warnings
warnings.filterwarnings("ignore")

In [2]:
ml_ols = LinearRegression()

ml_lasso = make_pipeline(StandardScaler(), LassoCV())

ml_cart = DecisionTreeRegressor()

ml_rf = RandomForestRegressor(n_estimators=100, 
                              max_features=1.0, 
                              min_samples_leaf=5)

# Rf
# ml_rf_grid = {'ml_l': {'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)},
#               'ml_m': {'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)}}

# dml_plpr = DoubleMLPLPR(data_pdml, clone(ml_rf), clone(ml_rf), pdml_approach='cre', n_folds=5)
# dml_plpr.tune(param_grids=ml_rf_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
# dml_plpr.fit(n_jobs_cv=5)
# res_cre_rf[i, 0] = dml_plpr.coef[0] - theta
# res_cre_rf[i, 1] = dml_plpr.se[0] 
# confint = dml_plpr.confint()
# res_cre_rf[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

ml_boost = LGBMRegressor(verbose=-1, 
                         n_estimators=100, 
                         learning_rate=0.3,
                         min_child_samples=1) 

In [7]:
data = make_static_panel_CP2025(num_n=100, dgp_type='dgp1', x_var=5**2, a_var=0.95**2)
data

Unnamed: 0,id,time,d,y,x1,x2,x3,x4,x5,x6,...,x21,x22,x23,x24,x25,x26,x27,x28,x29,x30
0,1,1,-2.213736,-5.293206,-0.428742,7.394317,-3.040323,-0.556956,-1.971048,3.811521,...,6.212741,-4.062078,3.238729,-9.268694,-5.648472,9.005437,-1.537681,-4.250953,4.311248,1.155672
1,1,2,-0.705639,-1.099439,6.859486,-3.476090,-2.070501,0.951224,-0.037587,5.537579,...,-3.596578,2.580089,1.015682,-11.813514,6.059815,9.918398,-0.825656,-1.491334,2.950680,4.083369
2,1,3,7.975096,10.156811,8.308916,2.863192,4.956786,-2.209816,-3.509712,-5.863826,...,5.826138,-0.334149,1.253619,-6.735010,0.705128,-3.978092,3.948464,-3.216404,0.912388,-1.348336
3,1,4,-1.259776,-2.755462,-8.534541,4.878212,1.080527,9.905302,4.336665,7.272071,...,7.800067,6.354467,-3.945007,-6.898544,-0.649614,2.045477,4.110676,7.025232,4.912177,-1.291772
4,1,5,-14.969484,-24.316493,2.135777,1.402142,-15.818097,6.619429,-2.092901,-0.523111,...,-2.314368,6.764651,-1.790039,-5.611779,-0.821312,-4.314345,-2.108172,1.211618,4.972882,-3.570822
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,100,6,-1.542279,-3.390828,5.548290,0.599051,-1.009998,-2.323957,3.695354,-3.960357,...,-4.302796,5.029354,-3.867411,-5.779731,1.022882,-1.610741,-6.182220,3.790532,1.378363,-3.250114
996,100,7,-5.195300,-6.324920,0.875120,-1.827447,-3.364818,0.381971,-2.868032,-0.474953,...,-7.145188,-2.962602,6.051157,-0.310512,3.926997,6.150247,-0.424644,-0.768921,1.381692,-4.279590
997,100,8,-4.935131,-6.861080,-5.428853,2.489955,-0.062190,5.694679,-8.452585,-13.783593,...,5.129312,6.157375,-5.336022,-7.142420,6.714065,-6.922350,4.991919,6.219515,-5.687230,-3.842934
998,100,9,-10.656389,-17.293887,-8.934547,2.080141,-6.928987,1.259232,-7.253584,1.381321,...,-0.607923,10.114989,-4.271365,-8.707851,1.071853,-1.960183,0.646585,4.832729,3.747999,0.701658


In [8]:
wd_fct(data).columns

Index(['id', 'time', 'd', 'y', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8',
       'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', 'x16', 'x17', 'x18',
       'x19', 'x20', 'x21', 'x22', 'x23', 'x24', 'x25', 'x26', 'x27', 'x28',
       'x29', 'x30'],
      dtype='object')

In [None]:
n_reps = 20
theta = 0.5
dgp = 'dgp3'

res_cre_ols = np.full((n_reps, 3), np.nan)
res_cre_lasso = np.full((n_reps, 3), np.nan)
res_cre_cart = np.full((n_reps, 3), np.nan)
res_cre_boost = np.full((n_reps, 3), np.nan)

res_fd_ols = np.full((n_reps, 3), np.nan)
res_fd_lasso = np.full((n_reps, 3), np.nan)
res_fd_cart = np.full((n_reps, 3), np.nan)
res_fd_boost = np.full((n_reps, 3), np.nan)

res_wd_ols = np.full((n_reps, 3), np.nan)
res_wd_lasso = np.full((n_reps, 3), np.nan)
res_wd_cart = np.full((n_reps, 3), np.nan)
res_wd_boost = np.full((n_reps, 3), np.nan)


np.random.seed(123)

for i in range(n_reps):
    print(f"\rProcessing: {round((i+1)/n_reps*100, 3)} %", end="")

    ml_cart_grid = {'ml_l': {'ccp_alpha': np.random.choice(np.arange(0.002, 0.052 , 0.002), 5, replace=False),
                             'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)},
                    'ml_m': {'ccp_alpha': np.random.choice(np.arange(0.002, 0.052 , 0.002), 5, replace=False),
                             'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)}}

    ml_boost_grid= {'ml_l': {'reg_lambda': np.random.choice(np.arange(0.2, 2 , 0.2), 5, replace=False), 
                                'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)},
                        'ml_m': {'reg_lambda': np.random.choice(np.arange(0.2, 2 , 0.2), 5, replace=False),
                                'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)}}
    
    data = make_static_panel_CP2025(num_n=100, theta=theta, dgp_type=dgp, x_var=5**2, a_var=0.95**2)

    ## CRE
    cre_data = cre_fct(data)

    data_cre_pdml = DoubleMLClusterData(cre_data,
                                    y_col='y',
                                    d_cols='d',
                                    cluster_cols='id',
                                    x_cols=[col for col in cre_data.columns if "x" in col])

    # OLS
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_ols), clone(ml_ols), pdml_approach='cre_general', n_folds=5)
    dml_plpr.fit()
    res_cre_ols[i, 0] = dml_plpr.coef[0] - theta
    res_cre_ols[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_ols[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Lasso
    cre_data_ext = extend_data(cre_data)
    data_cre_pdml_ext = DoubleMLClusterData(cre_data_ext,
                                        y_col='y',
                                        d_cols='d',
                                        cluster_cols='id',
                                        x_cols=[col for col in cre_data_ext.columns if "x" in col]
                                        )

    dml_plpr = DoubleMLPLPR(data_cre_pdml_ext, clone(ml_lasso), clone(ml_lasso), pdml_approach='cre_general', n_folds=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_lasso[i, 0] = dml_plpr.coef[0] - theta
    res_cre_lasso[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_lasso[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Cart
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_cart), clone(ml_cart), pdml_approach='cre_general', n_folds=5)
    dml_plpr.tune(param_grids=ml_cart_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_cart[i, 0] = dml_plpr.coef[0] - theta
    res_cre_cart[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_cart[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Boost
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_boost), clone(ml_boost), pdml_approach='cre_general', n_folds=5)
    dml_plpr.tune(param_grids=ml_boost_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_boost[i, 0] = dml_plpr.coef[0] - theta
    res_cre_boost[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_boost[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    ## FD
    fd_data = fd_fct(data)

    data_fd_pdml = DoubleMLClusterData(fd_data,
                                       y_col='y_diff',
                                       d_cols='d_diff',
                                       cluster_cols='id',
                                       x_cols=[col for col in fd_data.columns if "x" in col])

    # OLS
    dml_plpr = DoubleMLPLPR(data_fd_pdml, clone(ml_ols), clone(ml_ols), pdml_approach='transform', n_folds=5)
    dml_plpr.fit()
    res_fd_ols[i, 0] = dml_plpr.coef[0] - theta
    res_fd_ols[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_fd_ols[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Lasso
    fd_data_ext = extend_data(fd_data)
    data_fd_pdml_ext = DoubleMLClusterData(fd_data_ext,
                                           y_col='y_diff',
                                           d_cols='d_diff',
                                           cluster_cols='id',
                                           x_cols=[col for col in fd_data_ext.columns if "x" in col]
                                           )

    dml_plpr = DoubleMLPLPR(data_fd_pdml_ext, clone(ml_lasso), clone(ml_lasso), pdml_approach='transform', n_folds=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_fd_lasso[i, 0] = dml_plpr.coef[0] - theta
    res_fd_lasso[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_fd_lasso[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Cart
    dml_plpr = DoubleMLPLPR(data_fd_pdml, clone(ml_cart), clone(ml_cart), pdml_approach='transform', n_folds=5)
    dml_plpr.tune(param_grids=ml_cart_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_fd_cart[i, 0] = dml_plpr.coef[0] - theta
    res_fd_cart[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_fd_cart[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Boost
    dml_plpr = DoubleMLPLPR(data_fd_pdml, clone(ml_boost), clone(ml_boost), pdml_approach='transform', n_folds=5)
    dml_plpr.tune(param_grids=ml_boost_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_fd_boost[i, 0] = dml_plpr.coef[0] - theta
    res_fd_boost[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_fd_boost[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    ## WD
    wd_data = wd_fct(data)

    data_wd_pdml = DoubleMLClusterData(wd_data,
                                       y_col='y',
                                       d_cols='d',
                                       cluster_cols='id',
                                       x_cols=[col for col in wd_data.columns if "x" in col])

    # OLS
    dml_plpr = DoubleMLPLPR(data_wd_pdml, clone(ml_ols), clone(ml_ols), pdml_approach='transform', n_folds=5)
    dml_plpr.fit()
    res_wd_ols[i, 0] = dml_plpr.coef[0] - theta
    res_wd_ols[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_wd_ols[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Lasso
    wd_data_ext = extend_data(wd_data)
    data_wd_pdml_ext = DoubleMLClusterData(wd_data_ext,
                                           y_col='y',
                                           d_cols='d',
                                           cluster_cols='id',
                                           x_cols=[col for col in wd_data_ext.columns if "x" in col]
                                           )

    dml_plpr = DoubleMLPLPR(data_wd_pdml_ext, clone(ml_lasso), clone(ml_lasso), pdml_approach='transform', n_folds=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_wd_lasso[i, 0] = dml_plpr.coef[0] - theta
    res_wd_lasso[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_wd_lasso[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Cart
    dml_plpr = DoubleMLPLPR(data_wd_pdml, clone(ml_cart), clone(ml_cart), pdml_approach='transform', n_folds=5)
    dml_plpr.tune(param_grids=ml_cart_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_wd_cart[i, 0] = dml_plpr.coef[0] - theta
    res_wd_cart[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_wd_cart[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Boost
    dml_plpr = DoubleMLPLPR(data_wd_pdml, clone(ml_boost), clone(ml_boost), pdml_approach='transform', n_folds=5)
    dml_plpr.tune(param_grids=ml_boost_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_wd_boost[i, 0] = dml_plpr.coef[0] - theta
    res_wd_boost[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_wd_boost[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

Processing: 100.0 %

In [10]:
# cre general, dgp square
tab_dat = np.vstack([res_cre_ols.mean(axis=0), res_cre_lasso.mean(axis=0), 
                     res_cre_cart.mean(axis=0), res_cre_boost.mean(axis=0),
                     res_fd_ols.mean(axis=0), res_fd_lasso.mean(axis=0), 
                     res_fd_cart.mean(axis=0), res_fd_boost.mean(axis=0),
                     res_wd_ols.mean(axis=0), res_wd_lasso.mean(axis=0), 
                     res_wd_cart.mean(axis=0), res_wd_boost.mean(axis=0)])

tab_sd = np.vstack([res_cre_ols[:,1].std(), res_cre_lasso[:,1].std(), 
                    res_cre_cart[:,1].std(), res_cre_boost[:,1].std(),
                    res_fd_ols[:,1].std(), res_fd_lasso[:,1].std(), 
                    res_fd_cart[:,1].std(), res_fd_boost[:,1].std(),
                    res_wd_ols[:,1].std(), res_wd_lasso[:,1].std(), 
                    res_wd_cart[:,1].std(), res_wd_boost[:,1].std()])

tab_se = np.column_stack([res_cre_ols[:,1], res_cre_lasso[:,1], 
                          res_cre_cart[:,1], res_cre_boost[:,1],
                          res_fd_ols[:,1], res_fd_lasso[:,1], 
                          res_fd_cart[:,1], res_fd_boost[:,1],
                          res_wd_ols[:,1], res_wd_lasso[:,1], 
                          res_wd_cart[:,1], res_wd_boost[:,1]])

tab_rmse = np.vstack([np.sqrt(np.mean(res_cre_ols[:,0]**2)), np.sqrt(np.mean(res_cre_lasso[:,0]**2)), 
                      np.sqrt(np.mean(res_cre_cart[:,0]**2)), np.sqrt(np.mean(res_cre_boost[:,0]**2)),
                      np.sqrt(np.mean(res_fd_ols[:,0]**2)), np.sqrt(np.mean(res_fd_lasso[:,0]**2)), 
                      np.sqrt(np.mean(res_fd_cart[:,0]**2)), np.sqrt(np.mean(res_fd_boost[:,0]**2)),
                      np.sqrt(np.mean(res_wd_ols[:,0]**2)), np.sqrt(np.mean(res_wd_lasso[:,0]**2)), 
                      np.sqrt(np.mean(res_wd_cart[:,0]**2)), np.sqrt(np.mean(res_wd_boost[:,0]**2))])

se_sd = tab_sd / tab_dat[:,1].reshape((-1,1))

tab_dat = np.column_stack((tab_dat, se_sd, tab_rmse))

pd.DataFrame(tab_dat, columns=['Bias', 'SE', 'Coverage', 'SE/SD', 'RMSE'], 
                      index=['OLS (CRE)', 'Lasso (CRE)', 'Cart (CRE)', 'Boost (CRE)',
                             'OLS (FD)', 'Lasso (FD)', 'Cart (FD)', 'Boost (FD)',
                             'OLS (WD)', 'Lasso (WD)', 'Cart (WD)', 'Boost (WD)'])

Unnamed: 0,Bias,SE,Coverage,SE/SD,RMSE
OLS (CRE),0.992168,0.004074,0.0,0.111677,0.992171
Lasso (CRE),0.006154,0.033672,1.0,0.090553,0.028267
Cart (CRE),0.665847,0.087393,0.0,0.405625,0.702282
Boost (CRE),0.667262,0.066499,0.0,0.225313,0.672432
OLS (FD),0.990554,0.004807,0.0,0.096511,0.990562
Lasso (FD),0.025886,0.039459,1.0,0.104909,0.037748
Cart (FD),0.851024,0.045228,0.0,0.402826,0.854973
Boost (FD),0.83522,0.040769,0.0,0.087226,0.837982
OLS (WD),0.992407,0.004118,0.0,0.111584,0.992413
Lasso (WD),0.969836,0.010887,0.0,0.084016,0.969867


In [None]:
n_reps = 20
theta = 0.5
dgp = 'dgp3'

res_cre_ols = np.full((n_reps, 3), np.nan)
res_cre_lasso = np.full((n_reps, 3), np.nan)
res_cre_cart = np.full((n_reps, 3), np.nan)
res_cre_boost = np.full((n_reps, 3), np.nan)

np.random.seed(123)

for i in range(n_reps):
    print(f"\rProcessing: {round((i+1)/n_reps*100, 3)} %", end="")

    ml_cart_grid = {'ml_l': {'ccp_alpha': np.random.choice(np.arange(0.002, 0.052 , 0.002), 5, replace=False),
                             'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)},
                    'ml_m': {'ccp_alpha': np.random.choice(np.arange(0.002, 0.052 , 0.002), 5, replace=False),
                             'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)}}

    ml_boost_grid= {'ml_l': {'reg_lambda': np.random.choice(np.arange(0.2, 2 , 0.2), 5, replace=False), 
                                'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)},
                        'ml_m': {'reg_lambda': np.random.choice(np.arange(0.2, 2 , 0.2), 5, replace=False),
                                'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)}}
    
    data = make_static_panel_CP2025(num_n=4000, theta=theta, dgp_type=dgp, x_var=5**2, a_var=0.95**2)

    ## CRE
    cre_data = cre_fct(data)

    data_cre_pdml = DoubleMLClusterData(cre_data,
                                    y_col='y',
                                    d_cols='d',
                                    cluster_cols='id',
                                    x_cols=[col for col in cre_data.columns if "x" in col])

    # OLS
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_ols), clone(ml_ols), pdml_approach='cre_general', n_folds=5)
    dml_plpr.fit()
    res_cre_ols[i, 0] = dml_plpr.coef[0] - theta
    res_cre_ols[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_ols[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Lasso
    cre_data_ext = extend_data(cre_data)
    data_cre_pdml_ext = DoubleMLClusterData(cre_data_ext,
                                        y_col='y',
                                        d_cols='d',
                                        cluster_cols='id',
                                        x_cols=[col for col in cre_data_ext.columns if "x" in col]
                                        )

    dml_plpr = DoubleMLPLPR(data_cre_pdml_ext, clone(ml_lasso), clone(ml_lasso), pdml_approach='cre_general', n_folds=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_lasso[i, 0] = dml_plpr.coef[0] - theta
    res_cre_lasso[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_lasso[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Cart
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_cart), clone(ml_cart), pdml_approach='cre_general', n_folds=5)
    dml_plpr.tune(param_grids=ml_cart_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_cart[i, 0] = dml_plpr.coef[0] - theta
    res_cre_cart[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_cart[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Boost
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_boost), clone(ml_boost), pdml_approach='cre_general', n_folds=5)
    dml_plpr.tune(param_grids=ml_boost_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_boost[i, 0] = dml_plpr.coef[0] - theta
    res_cre_boost[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_boost[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

Processing: 100.0 %

In [15]:
# cre normal, dgp square
tab_dat = np.vstack([res_cre_ols.mean(axis=0), res_cre_lasso.mean(axis=0), 
                     res_cre_cart.mean(axis=0), res_cre_boost.mean(axis=0)])

tab_sd = np.vstack([res_cre_ols[:,1].std(), res_cre_lasso[:,1].std(), 
                    res_cre_cart[:,1].std(), res_cre_boost[:,1].std()])

tab_se = np.column_stack([res_cre_ols[:,1], res_cre_lasso[:,1], 
                          res_cre_cart[:,1], res_cre_boost[:,1]])

tab_rmse = np.vstack([np.sqrt(np.mean(res_cre_ols[:,0]**2)), np.sqrt(np.mean(res_cre_lasso[:,0]**2)), 
                      np.sqrt(np.mean(res_cre_cart[:,0]**2)), np.sqrt(np.mean(res_cre_boost[:,0]**2))])

se_sd = tab_sd / tab_dat[:,1].reshape((-1,1))

tab_dat = np.column_stack((tab_dat, se_sd, tab_rmse))

pd.DataFrame(tab_dat, columns=['Bias', 'SE', 'Coverage', 'SE/SD', 'RMSE'], 
                      index=['OLS (CRE)', 'Lasso (CRE)', 'Cart (CRE)', 'Boost (CRE)'])

Unnamed: 0,Bias,SE,Coverage,SE/SD,RMSE
OLS (CRE),0.992847,0.000642,0.0,0.010548,0.992848
Lasso (CRE),0.025631,0.012231,0.4,0.024428,0.027179
Cart (CRE),-0.008529,0.035518,0.2,0.293714,0.174186
Boost (CRE),-0.194521,0.018691,0.0,0.235885,0.197435


In [17]:
# cre general, dgp square
tab_dat = np.vstack([res_cre_ols.mean(axis=0), res_cre_lasso.mean(axis=0), 
                     res_cre_cart.mean(axis=0), res_cre_boost.mean(axis=0)])

tab_sd = np.vstack([res_cre_ols[:,1].std(), res_cre_lasso[:,1].std(), 
                    res_cre_cart[:,1].std(), res_cre_boost[:,1].std()])

tab_se = np.column_stack([res_cre_ols[:,1], res_cre_lasso[:,1], 
                          res_cre_cart[:,1], res_cre_boost[:,1]])

tab_rmse = np.vstack([np.sqrt(np.mean(res_cre_ols[:,0]**2)), np.sqrt(np.mean(res_cre_lasso[:,0]**2)), 
                      np.sqrt(np.mean(res_cre_cart[:,0]**2)), np.sqrt(np.mean(res_cre_boost[:,0]**2))])

se_sd = tab_sd / tab_dat[:,1].reshape((-1,1))

tab_dat = np.column_stack((tab_dat, se_sd, tab_rmse))

pd.DataFrame(tab_dat, columns=['Bias', 'SE', 'Coverage', 'SE/SD', 'RMSE'], 
                      index=['OLS (CRE)', 'Lasso (CRE)', 'Cart (CRE)', 'Boost (CRE)'])

Unnamed: 0,Bias,SE,Coverage,SE/SD,RMSE
OLS (CRE),0.992847,0.000642,0.0,0.010548,0.992848
Lasso (CRE),-0.008373,0.005419,0.6,0.008168,0.009689
Cart (CRE),0.059338,0.059713,0.4,0.452969,0.187953
Boost (CRE),0.036213,0.031977,0.7,0.247513,0.057652
