In [1]:
import numpy as np
import pandas as pd
from doubleml.data.base_data import DoubleMLData
from doubleml.plm.plpr import DoubleMLPLPR
from sklearn.linear_model import LassoCV, LinearRegression
from sklearn.base import clone
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
from doubleml.plm.utils._plpr_util import extend_data, cre_fct, fd_fct, wd_fct
from doubleml.plm.datasets.dgp_static_panel_CP2025 import make_static_panel_CP2025
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import warnings
warnings.filterwarnings("ignore")

In [2]:
ml_ols = LinearRegression()

ml_lasso = make_pipeline(StandardScaler(), LassoCV())

ml_cart = DecisionTreeRegressor()

ml_rf = RandomForestRegressor(n_estimators=100, 
                              max_features=1.0, 
                              min_samples_leaf=5)

# Rf
# ml_rf_grid = {'ml_l': {'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)},
#               'ml_m': {'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)}}

# dml_plpr = DoubleMLPLPR(data_pdml, clone(ml_rf), clone(ml_rf), pdml_approach='cre', n_folds=5)
# dml_plpr.tune(param_grids=ml_rf_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
# dml_plpr.fit(n_jobs_cv=5)
# res_cre_rf[i, 0] = dml_plpr.coef[0] - theta
# res_cre_rf[i, 1] = dml_plpr.se[0] 
# confint = dml_plpr.confint()
# res_cre_rf[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

ml_boost = LGBMRegressor(verbose=-1, 
                         n_estimators=100, 
                         learning_rate=0.3,
                         min_child_samples=1) 

In [3]:
data = make_static_panel_CP2025(num_n=100, dgp_type='dgp1', x_var=5**2, a_var=0.95**2)
data

Unnamed: 0,id,time,d,y,x1,x2,x3,x4,x5,x6,...,x21,x22,x23,x24,x25,x26,x27,x28,x29,x30
0,1,1,-8.112787,-8.912584,-5.796365,-0.601492,-3.487003,4.357256,-3.527997,-7.455948,...,5.577388,-1.605127,-0.814059,-3.103182,2.631538,-4.643003,5.162550,3.740774,2.113925,2.026183
1,1,2,-6.949439,-11.955038,-3.906188,2.728437,-4.309356,4.652335,4.837147,5.113480,...,-6.215166,-1.291356,1.542859,-5.832660,-6.999235,-1.041017,0.388897,0.135666,-5.257444,-4.460909
2,1,3,-4.068573,-6.083197,1.199280,1.113007,-3.238536,5.611841,-3.096405,7.262224,...,-6.793106,5.217539,4.765350,3.238961,-3.244586,0.046503,7.297417,5.151098,0.353556,-6.192547
3,1,4,4.268473,8.099756,-3.690119,-3.551698,7.695905,3.349990,-3.575687,-9.272200,...,2.183245,-9.719218,-3.691420,-4.724887,-2.681429,-3.256659,2.039591,-5.688881,-1.675406,-1.537060
4,1,5,-8.490611,-13.074335,-8.383416,1.125561,-4.826987,1.226380,0.565376,1.337693,...,-1.622405,-11.514240,-4.995206,-0.293343,5.670162,5.218059,-10.535997,-0.007612,4.940226,-2.512659
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,100,6,7.979518,13.313478,0.743929,0.479841,8.463661,-3.785925,3.066799,-5.972398,...,-8.675939,-0.339098,0.200580,4.741587,3.884253,0.082965,-3.765886,2.210837,-2.203842,9.350995
996,100,7,4.525037,7.323752,2.795891,-0.028399,3.351155,-13.480410,4.504775,2.866025,...,2.935810,-6.909156,-6.092518,7.090190,-0.192387,-0.971816,2.114409,7.572450,-3.337941,4.831238
997,100,8,2.510815,3.504373,4.272010,-3.236265,1.253958,1.062489,-7.690689,6.750913,...,-9.397734,1.931898,7.888287,0.276521,3.114361,4.152857,0.079838,2.297878,9.451616,-1.324771
998,100,9,-4.087541,-3.451450,0.115834,-2.387410,-1.961343,-4.106975,4.037239,-3.903956,...,-5.021652,1.694328,-1.283313,7.283484,8.015243,6.879811,-7.213541,-2.226587,-0.305480,-1.568153


In [4]:
wd_fct(data).columns

Index(['id', 'time', 'd', 'y', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8',
       'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', 'x16', 'x17', 'x18',
       'x19', 'x20', 'x21', 'x22', 'x23', 'x24', 'x25', 'x26', 'x27', 'x28',
       'x29', 'x30'],
      dtype='object')

In [None]:
n_reps = 20
theta = 0.5
dgp = 'dgp3'

res_cre_ols = np.full((n_reps, 3), np.nan)
res_cre_lasso = np.full((n_reps, 3), np.nan)
res_cre_cart = np.full((n_reps, 3), np.nan)
res_cre_boost = np.full((n_reps, 3), np.nan)

res_fd_ols = np.full((n_reps, 3), np.nan)
res_fd_lasso = np.full((n_reps, 3), np.nan)
res_fd_cart = np.full((n_reps, 3), np.nan)
res_fd_boost = np.full((n_reps, 3), np.nan)

res_wd_ols = np.full((n_reps, 3), np.nan)
res_wd_lasso = np.full((n_reps, 3), np.nan)
res_wd_cart = np.full((n_reps, 3), np.nan)
res_wd_boost = np.full((n_reps, 3), np.nan)


np.random.seed(123)

for i in range(n_reps):
    print(f"\rProcessing: {round((i+1)/n_reps*100, 3)} %", end="")

    ml_cart_grid = {'ml_l': {'ccp_alpha': np.random.choice(np.arange(0.002, 0.052 , 0.002), 5, replace=False),
                             'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)},
                    'ml_m': {'ccp_alpha': np.random.choice(np.arange(0.002, 0.052 , 0.002), 5, replace=False),
                             'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)}}

    ml_boost_grid= {'ml_l': {'reg_lambda': np.random.choice(np.arange(0.2, 2 , 0.2), 5, replace=False), 
                                'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)},
                        'ml_m': {'reg_lambda': np.random.choice(np.arange(0.2, 2 , 0.2), 5, replace=False),
                                'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)}}
    
    data = make_static_panel_CP2025(num_n=100, theta=theta, dgp_type=dgp, x_var=5**2, a_var=0.95**2)

    ## CRE
    cre_data = cre_fct(data)

    data_cre_pdml = DoubleMLData(cre_data,
                                 y_col='y',
                                 d_cols='d',
                                 cluster_cols='id',
                                 x_cols=[col for col in cre_data.columns if "x" in col])

    # OLS
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_ols), clone(ml_ols), pdml_approach='cre_general', n_folds=5)
    dml_plpr.fit()
    res_cre_ols[i, 0] = dml_plpr.coef[0] - theta
    res_cre_ols[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_ols[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Lasso
    cre_data_ext = extend_data(cre_data)
    data_cre_pdml_ext = DoubleMLData(cre_data_ext,
                                     y_col='y',
                                     d_cols='d',
                                     cluster_cols='id',
                                     x_cols=[col for col in cre_data_ext.columns if "x" in col]
                                     )

    dml_plpr = DoubleMLPLPR(data_cre_pdml_ext, clone(ml_lasso), clone(ml_lasso), pdml_approach='cre_general', n_folds=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_lasso[i, 0] = dml_plpr.coef[0] - theta
    res_cre_lasso[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_lasso[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Cart
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_cart), clone(ml_cart), pdml_approach='cre_general', n_folds=5)
    dml_plpr.tune(param_grids=ml_cart_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_cart[i, 0] = dml_plpr.coef[0] - theta
    res_cre_cart[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_cart[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Boost
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_boost), clone(ml_boost), pdml_approach='cre_general', n_folds=5)
    dml_plpr.tune(param_grids=ml_boost_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_boost[i, 0] = dml_plpr.coef[0] - theta
    res_cre_boost[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_boost[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    ## FD
    fd_data = fd_fct(data)

    data_fd_pdml = DoubleMLData(fd_data,
                                y_col='y_diff',
                                d_cols='d_diff',
                                cluster_cols='id',
                                x_cols=[col for col in fd_data.columns if "x" in col])

    # OLS
    dml_plpr = DoubleMLPLPR(data_fd_pdml, clone(ml_ols), clone(ml_ols), pdml_approach='transform', n_folds=5)
    dml_plpr.fit()
    res_fd_ols[i, 0] = dml_plpr.coef[0] - theta
    res_fd_ols[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_fd_ols[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Lasso
    fd_data_ext = extend_data(fd_data)
    data_fd_pdml_ext = DoubleMLData(fd_data_ext,
                                    y_col='y_diff',
                                    d_cols='d_diff',
                                    cluster_cols='id',
                                    x_cols=[col for col in fd_data_ext.columns if "x" in col]
                                    )

    dml_plpr = DoubleMLPLPR(data_fd_pdml_ext, clone(ml_lasso), clone(ml_lasso), pdml_approach='transform', n_folds=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_fd_lasso[i, 0] = dml_plpr.coef[0] - theta
    res_fd_lasso[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_fd_lasso[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Cart
    dml_plpr = DoubleMLPLPR(data_fd_pdml, clone(ml_cart), clone(ml_cart), pdml_approach='transform', n_folds=5)
    dml_plpr.tune(param_grids=ml_cart_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_fd_cart[i, 0] = dml_plpr.coef[0] - theta
    res_fd_cart[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_fd_cart[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Boost
    dml_plpr = DoubleMLPLPR(data_fd_pdml, clone(ml_boost), clone(ml_boost), pdml_approach='transform', n_folds=5)
    dml_plpr.tune(param_grids=ml_boost_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_fd_boost[i, 0] = dml_plpr.coef[0] - theta
    res_fd_boost[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_fd_boost[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    ## WD
    wd_data = wd_fct(data)

    data_wd_pdml = DoubleMLData(wd_data,
                                y_col='y',
                                d_cols='d',
                                cluster_cols='id',
                                x_cols=[col for col in wd_data.columns if "x" in col])

    # OLS
    dml_plpr = DoubleMLPLPR(data_wd_pdml, clone(ml_ols), clone(ml_ols), pdml_approach='transform', n_folds=5)
    dml_plpr.fit()
    res_wd_ols[i, 0] = dml_plpr.coef[0] - theta
    res_wd_ols[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_wd_ols[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Lasso
    wd_data_ext = extend_data(wd_data)
    data_wd_pdml_ext = DoubleMLData(wd_data_ext,
                                    y_col='y',
                                    d_cols='d',
                                    cluster_cols='id',
                                    x_cols=[col for col in wd_data_ext.columns if "x" in col]
                                    )

    dml_plpr = DoubleMLPLPR(data_wd_pdml_ext, clone(ml_lasso), clone(ml_lasso), pdml_approach='transform', n_folds=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_wd_lasso[i, 0] = dml_plpr.coef[0] - theta
    res_wd_lasso[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_wd_lasso[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Cart
    dml_plpr = DoubleMLPLPR(data_wd_pdml, clone(ml_cart), clone(ml_cart), pdml_approach='transform', n_folds=5)
    dml_plpr.tune(param_grids=ml_cart_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_wd_cart[i, 0] = dml_plpr.coef[0] - theta
    res_wd_cart[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_wd_cart[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Boost
    dml_plpr = DoubleMLPLPR(data_wd_pdml, clone(ml_boost), clone(ml_boost), pdml_approach='transform', n_folds=5)
    dml_plpr.tune(param_grids=ml_boost_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_wd_boost[i, 0] = dml_plpr.coef[0] - theta
    res_wd_boost[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_wd_boost[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

Processing: 100.0 %

In [10]:
# cre general, dgp square
tab_dat = np.vstack([res_cre_ols.mean(axis=0), res_cre_lasso.mean(axis=0), 
                     res_cre_cart.mean(axis=0), res_cre_boost.mean(axis=0),
                     res_fd_ols.mean(axis=0), res_fd_lasso.mean(axis=0), 
                     res_fd_cart.mean(axis=0), res_fd_boost.mean(axis=0),
                     res_wd_ols.mean(axis=0), res_wd_lasso.mean(axis=0), 
                     res_wd_cart.mean(axis=0), res_wd_boost.mean(axis=0)])

tab_sd = np.vstack([res_cre_ols[:,1].std(), res_cre_lasso[:,1].std(), 
                    res_cre_cart[:,1].std(), res_cre_boost[:,1].std(),
                    res_fd_ols[:,1].std(), res_fd_lasso[:,1].std(), 
                    res_fd_cart[:,1].std(), res_fd_boost[:,1].std(),
                    res_wd_ols[:,1].std(), res_wd_lasso[:,1].std(), 
                    res_wd_cart[:,1].std(), res_wd_boost[:,1].std()])

tab_se = np.column_stack([res_cre_ols[:,1], res_cre_lasso[:,1], 
                          res_cre_cart[:,1], res_cre_boost[:,1],
                          res_fd_ols[:,1], res_fd_lasso[:,1], 
                          res_fd_cart[:,1], res_fd_boost[:,1],
                          res_wd_ols[:,1], res_wd_lasso[:,1], 
                          res_wd_cart[:,1], res_wd_boost[:,1]])

tab_rmse = np.vstack([np.sqrt(np.mean(res_cre_ols[:,0]**2)), np.sqrt(np.mean(res_cre_lasso[:,0]**2)), 
                      np.sqrt(np.mean(res_cre_cart[:,0]**2)), np.sqrt(np.mean(res_cre_boost[:,0]**2)),
                      np.sqrt(np.mean(res_fd_ols[:,0]**2)), np.sqrt(np.mean(res_fd_lasso[:,0]**2)), 
                      np.sqrt(np.mean(res_fd_cart[:,0]**2)), np.sqrt(np.mean(res_fd_boost[:,0]**2)),
                      np.sqrt(np.mean(res_wd_ols[:,0]**2)), np.sqrt(np.mean(res_wd_lasso[:,0]**2)), 
                      np.sqrt(np.mean(res_wd_cart[:,0]**2)), np.sqrt(np.mean(res_wd_boost[:,0]**2))])

se_sd = tab_sd / tab_dat[:,1].reshape((-1,1))

tab_dat = np.column_stack((tab_dat, se_sd, tab_rmse))

pd.DataFrame(tab_dat, columns=['Bias', 'SE', 'Coverage', 'SE/SD', 'RMSE'], 
                      index=['OLS (CRE)', 'Lasso (CRE)', 'Cart (CRE)', 'Boost (CRE)',
                             'OLS (FD)', 'Lasso (FD)', 'Cart (FD)', 'Boost (FD)',
                             'OLS (WD)', 'Lasso (WD)', 'Cart (WD)', 'Boost (WD)'])

Unnamed: 0,Bias,SE,Coverage,SE/SD,RMSE
OLS (CRE),0.992168,0.004074,0.0,0.111677,0.992171
Lasso (CRE),0.006154,0.033672,1.0,0.090553,0.028267
Cart (CRE),0.665847,0.087393,0.0,0.405625,0.702282
Boost (CRE),0.667262,0.066499,0.0,0.225313,0.672432
OLS (FD),0.990554,0.004807,0.0,0.096511,0.990562
Lasso (FD),0.025886,0.039459,1.0,0.104909,0.037748
Cart (FD),0.851024,0.045228,0.0,0.402826,0.854973
Boost (FD),0.83522,0.040769,0.0,0.087226,0.837982
OLS (WD),0.992407,0.004118,0.0,0.111584,0.992413
Lasso (WD),0.969836,0.010887,0.0,0.084016,0.969867


In [None]:
n_reps = 20
theta = 0.5
dgp = 'dgp3'

res_cre_ols = np.full((n_reps, 3), np.nan)
res_cre_lasso = np.full((n_reps, 3), np.nan)
res_cre_cart = np.full((n_reps, 3), np.nan)
res_cre_boost = np.full((n_reps, 3), np.nan)

np.random.seed(123)

for i in range(n_reps):
    print(f"\rProcessing: {round((i+1)/n_reps*100, 3)} %", end="")

    ml_cart_grid = {'ml_l': {'ccp_alpha': np.random.choice(np.arange(0.002, 0.052 , 0.002), 5, replace=False),
                             'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)},
                    'ml_m': {'ccp_alpha': np.random.choice(np.arange(0.002, 0.052 , 0.002), 5, replace=False),
                             'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)}}

    ml_boost_grid= {'ml_l': {'reg_lambda': np.random.choice(np.arange(0.2, 2 , 0.2), 5, replace=False), 
                                'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)},
                        'ml_m': {'reg_lambda': np.random.choice(np.arange(0.2, 2 , 0.2), 5, replace=False),
                                'max_depth': np.random.choice(np.arange(2, 11 , 1), 5, replace=False)}}
    
    data = make_static_panel_CP2025(num_n=4000, theta=theta, dgp_type=dgp, x_var=5**2, a_var=0.95**2)

    ## CRE
    cre_data = cre_fct(data)

    data_cre_pdml = DoubleMLData(cre_data,
                                 y_col='y',
                                 d_cols='d',
                                 cluster_cols='id',
                                 x_cols=[col for col in cre_data.columns if "x" in col]
                                 )

    # OLS
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_ols), clone(ml_ols), pdml_approach='cre_general', n_folds=5)
    dml_plpr.fit()
    res_cre_ols[i, 0] = dml_plpr.coef[0] - theta
    res_cre_ols[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_ols[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Lasso
    cre_data_ext = extend_data(cre_data)
    data_cre_pdml_ext = DoubleMLData(cre_data_ext,
                                     y_col='y',
                                     d_cols='d',
                                     cluster_cols='id',
                                     x_cols=[col for col in cre_data_ext.columns if "x" in col]
                                     )

    dml_plpr = DoubleMLPLPR(data_cre_pdml_ext, clone(ml_lasso), clone(ml_lasso), pdml_approach='cre_general', n_folds=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_lasso[i, 0] = dml_plpr.coef[0] - theta
    res_cre_lasso[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_lasso[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Cart
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_cart), clone(ml_cart), pdml_approach='cre_general', n_folds=5)
    dml_plpr.tune(param_grids=ml_cart_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_cart[i, 0] = dml_plpr.coef[0] - theta
    res_cre_cart[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_cart[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

    # Boost
    dml_plpr = DoubleMLPLPR(data_cre_pdml, clone(ml_boost), clone(ml_boost), pdml_approach='cre_general', n_folds=5)
    dml_plpr.tune(param_grids=ml_boost_grid, search_mode='randomized_search', n_iter_randomized_search=5, n_jobs_cv=5)
    dml_plpr.fit(n_jobs_cv=5)
    res_cre_boost[i, 0] = dml_plpr.coef[0] - theta
    res_cre_boost[i, 1] = dml_plpr.se[0] 
    confint = dml_plpr.confint()
    res_cre_boost[i, 2] = (confint['2.5 %'].iloc[0] <= theta) & (confint['97.5 %'].iloc[0] >= theta)

Processing: 100.0 %

In [15]:
# cre normal, dgp square
tab_dat = np.vstack([res_cre_ols.mean(axis=0), res_cre_lasso.mean(axis=0), 
                     res_cre_cart.mean(axis=0), res_cre_boost.mean(axis=0)])

tab_sd = np.vstack([res_cre_ols[:,1].std(), res_cre_lasso[:,1].std(), 
                    res_cre_cart[:,1].std(), res_cre_boost[:,1].std()])

tab_se = np.column_stack([res_cre_ols[:,1], res_cre_lasso[:,1], 
                          res_cre_cart[:,1], res_cre_boost[:,1]])

tab_rmse = np.vstack([np.sqrt(np.mean(res_cre_ols[:,0]**2)), np.sqrt(np.mean(res_cre_lasso[:,0]**2)), 
                      np.sqrt(np.mean(res_cre_cart[:,0]**2)), np.sqrt(np.mean(res_cre_boost[:,0]**2))])

se_sd = tab_sd / tab_dat[:,1].reshape((-1,1))

tab_dat = np.column_stack((tab_dat, se_sd, tab_rmse))

pd.DataFrame(tab_dat, columns=['Bias', 'SE', 'Coverage', 'SE/SD', 'RMSE'], 
                      index=['OLS (CRE)', 'Lasso (CRE)', 'Cart (CRE)', 'Boost (CRE)'])

Unnamed: 0,Bias,SE,Coverage,SE/SD,RMSE
OLS (CRE),0.992847,0.000642,0.0,0.010548,0.992848
Lasso (CRE),0.025631,0.012231,0.4,0.024428,0.027179
Cart (CRE),-0.008529,0.035518,0.2,0.293714,0.174186
Boost (CRE),-0.194521,0.018691,0.0,0.235885,0.197435


In [17]:
# cre general, dgp square
tab_dat = np.vstack([res_cre_ols.mean(axis=0), res_cre_lasso.mean(axis=0), 
                     res_cre_cart.mean(axis=0), res_cre_boost.mean(axis=0)])

tab_sd = np.vstack([res_cre_ols[:,1].std(), res_cre_lasso[:,1].std(), 
                    res_cre_cart[:,1].std(), res_cre_boost[:,1].std()])

tab_se = np.column_stack([res_cre_ols[:,1], res_cre_lasso[:,1], 
                          res_cre_cart[:,1], res_cre_boost[:,1]])

tab_rmse = np.vstack([np.sqrt(np.mean(res_cre_ols[:,0]**2)), np.sqrt(np.mean(res_cre_lasso[:,0]**2)), 
                      np.sqrt(np.mean(res_cre_cart[:,0]**2)), np.sqrt(np.mean(res_cre_boost[:,0]**2))])

se_sd = tab_sd / tab_dat[:,1].reshape((-1,1))

tab_dat = np.column_stack((tab_dat, se_sd, tab_rmse))

pd.DataFrame(tab_dat, columns=['Bias', 'SE', 'Coverage', 'SE/SD', 'RMSE'], 
                      index=['OLS (CRE)', 'Lasso (CRE)', 'Cart (CRE)', 'Boost (CRE)'])

Unnamed: 0,Bias,SE,Coverage,SE/SD,RMSE
OLS (CRE),0.992847,0.000642,0.0,0.010548,0.992848
Lasso (CRE),-0.008373,0.005419,0.6,0.008168,0.009689
Cart (CRE),0.059338,0.059713,0.4,0.452969,0.187953
Boost (CRE),0.036213,0.031977,0.7,0.247513,0.057652
