# Python: GATE Sensitivity Analysis


In [170]:
import numpy as np
import pandas as pd
import doubleml as dml

from doubleml.datasets import make_irm_data
from lightgbm import LGBMRegressor, LGBMClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import Lasso, LogisticRegression

In [171]:
n_obs = 5000
p=5

x, y, d = make_irm_data(n_obs=n_obs, dim_x=p, R2_d=0.2, return_type='array')
x_add = np.random.normal(0, 1, size=(n_obs, 1))

data = pd.DataFrame({'y': y, 'd': d, 'z': x_add[:, 0]})
data = pd.concat([data, pd.DataFrame(x, columns=[f'X_{i}' for i in range(p)])], axis=1)
data

Unnamed: 0,y,d,z,X_0,X_1,X_2,X_3,X_4
0,1.616086,1.0,-0.211625,1.093350,1.468724,1.861200,0.109481,-0.447881
1,-0.173095,0.0,-0.327309,-0.369182,-0.617565,-0.573169,1.045328,-0.540476
2,0.259623,0.0,0.947759,0.220728,-0.442928,-1.086717,-1.183534,-1.189916
3,-1.156828,0.0,0.415699,-1.621338,-0.397609,0.916703,0.970026,0.062995
4,0.835840,0.0,0.401570,-0.558913,-0.319334,-0.073372,1.079542,2.631487
...,...,...,...,...,...,...,...,...
4995,-1.010232,1.0,-0.792577,-0.106349,-1.332315,-0.735812,-1.977642,-1.033334
4996,-1.554110,1.0,-0.107234,0.031514,-0.270187,-0.759946,-1.439273,-1.653805
4997,0.929441,0.0,-0.773312,-0.204930,0.807529,0.099922,-0.167826,0.983901
4998,-0.173641,1.0,-0.473332,-0.360392,-0.624195,-0.647965,-1.096721,-1.262595


In [172]:
dml_data = dml.DoubleMLData(data, 'y', 'd')
print(dml_data)


------------------ Data summary      ------------------
Outcome variable: y
Treatment variable(s): ['d']
Covariates: ['z', 'X_0', 'X_1', 'X_2', 'X_3', 'X_4']
Instrument variable(s): None
No. Observations: 5000

------------------ DataFrame info    ------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Columns: 8 entries, y to X_4
dtypes: float64(8)
memory usage: 312.6 KB



In [173]:
ml_g = LGBMRegressor()
ml_m = LGBMClassifier()

#ml_g = RandomForestRegressor()
#ml_m = RandomForestClassifier()

#ml_g = Lasso()
#ml_m = LogisticRegression()

In [174]:
dml_irm_obj = dml.DoubleMLIRM(
    dml_data,
    ml_g,
    ml_m,
    n_folds=5,
    n_rep=5,
    trimming_threshold=0.01, 
    weights=np.ones(n_obs,))

In [175]:
dml_irm_obj.fit()
print(dml_irm_obj)


------------------ Data summary      ------------------
Outcome variable: y
Treatment variable(s): ['d']
Covariates: ['z', 'X_0', 'X_1', 'X_2', 'X_3', 'X_4']
Instrument variable(s): None
No. Observations: 5000

------------------ Score & algorithm ------------------
Score function: ATE
DML algorithm: dml2

------------------ Machine learner   ------------------
Learner ml_g: LGBMRegressor()
Learner ml_m: LGBMClassifier()
Out-of-sample Performance:
Learner ml_g0 RMSE: [[1.06693086]
 [1.0597129 ]
 [1.06510768]
 [1.06885258]
 [1.06177779]]
Learner ml_g1 RMSE: [[1.07668569]
 [1.07321053]
 [1.05876816]
 [1.06007865]
 [1.07045144]]
Learner ml_m RMSE: [[0.47663561]
 [0.48353266]
 [0.48152412]
 [0.4833191 ]
 [0.4836403 ]]

------------------ Resampling        ------------------
No. folds: 5
No. repeated sample splits: 5
Apply cross-fitting: True

------------------ Fit summary       ------------------
       coef   std err         t     P>|t|     2.5 %    97.5 %
d  0.010758  0.051616  0.20841

In [176]:
dml_irm_obj.sensitivity_benchmark(benchmarking_set=['z'])

Unnamed: 0,cf_y,cf_d,rho,delta_theta
d,0.005084,0.0,0.069511,0.00157


In [177]:
dml_irm_obj

<doubleml.double_ml_irm.DoubleMLIRM at 0x261b75f6510>