In [72]:
import doubleml as dml
import numpy as np

from doubleml.datasets import make_irm_data

theta = 0
n_obs = 200
dim_x = 5

np.random.seed(42)
data = make_irm_data(theta=theta, n_obs=n_obs, dim_x=dim_x, return_type='DataFrame')
dml_data = dml.DoubleMLData(data, 'y', 'd')

In [73]:
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from lightgbm import LGBMClassifier, LGBMRegressor

n_estimators_list = [0.1, 0.3, 0.5, 0.7, 0.9]
n_est = len(n_estimators_list)
n_rep = 100

coefs = np.full(shape=(n_rep, n_est, n_est), fill_value=np.nan)
rmses_ml_g = np.full(shape=(n_rep, n_est, n_est), fill_value=np.nan)
rmses_ml_m = np.full(shape=(n_rep, n_est, n_est), fill_value=np.nan)

coverage = np.full(shape=(n_rep, n_est, n_est), fill_value=np.nan)

for i_rep in range(n_rep):
    print(i_rep)
    for i_ml_g, n_est_ml_g in enumerate(n_estimators_list):
        for i_ml_m, n_est_ml_m in enumerate(n_estimators_list):

            ml_g = LGBMRegressor(n_estimators=50, learning_rate=n_est_ml_g)
            ml_m = LGBMClassifier(n_estimators=50, learning_rate=n_est_ml_m)

            dml_plr = dml.DoubleMLPLR(dml_data, ml_g, ml_m)
            dml_plr.fit()

            coefs[i_rep, i_ml_g, i_ml_m] = dml_plr.coef[0]
            rmses_ml_g[i_rep, i_ml_g, i_ml_m] = dml_plr.rmses['ml_l']
            rmses_ml_m[i_rep, i_ml_g, i_ml_m] = dml_plr.rmses['ml_m']

            ci = dml_plr.confint()
            coverage[i_rep, i_ml_g, i_ml_m] = (ci['2.5 %'] <= theta) & (theta <= ci['97.5 %'])
        


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [74]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

x, y = np.meshgrid(n_estimators_list, n_estimators_list)

fig = make_subplots(rows=1, cols=2,
                    specs=[[{'is_3d': True}, {'is_3d': True}]],
                    subplot_titles=['Coverage', 'Product of RMSEs'],
                    )

fig.add_trace(go.Surface(x=x, y=y, z=coverage.mean(0)), 1, 1)
fig.add_trace(go.Surface(x=x, y=y, z=rmses_ml_g.mean(0) * rmses_ml_m.mean(0)), 1, 2)

fig.update_traces(contours_z=dict(show=True, usecolormap=True,
                                  highlightcolor="limegreen", project_z=True))

fig.update_layout(scene = dict(
                    xaxis_title='n_estimators for ml_l',
                    yaxis_title='n_estimators for ml_l',
                    zaxis_title='Coverage'),
                    width=700,
                    margin=dict(r=20, b=10, l=10, t=10))

fig.show()