# Python: GATE Sensitivity Analysis

In this simple example, we illustrate how the [DoubleML](https://docs.doubleml.org/stable/index.html) package can be used to perfrom a sensitivity analysis for group average treatment effects in the [DoubleMLIRM](https://docs.doubleml.org/stable/guide/models.html#interactive-regression-model-irm) model.


## Data

In [75]:
import numpy as np
import pandas as pd
import doubleml as dml

from doubleml.datasets import make_heterogeneous_data
from lightgbm import LGBMRegressor, LGBMClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import Lasso, LogisticRegression

from sklearn.isotonic import IsotonicRegression
from sklearn.base import clone

In [76]:
n_obs = 10000
p = 5

data_dict = make_heterogeneous_data(n_obs, p, binary_treatment=True, n_x=2)
data = data_dict['data']
# add random covariate
data['Z'] = np.random.normal(size=(n_obs, 1))
ite = data_dict['effects']

group = data['X_0'] >= 0.6

In [77]:
ite.mean()

4.471158510805745

In [78]:
dml_data = dml.DoubleMLData(data, 'y', 'd')
print(dml_data)


------------------ Data summary      ------------------
Outcome variable: y
Treatment variable(s): ['d']
Covariates: ['X_0', 'X_1', 'X_2', 'X_3', 'X_4', 'Z']
Instrument variable(s): None
No. Observations: 10000

------------------ DataFrame info    ------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Columns: 8 entries, y to Z
dtypes: float64(8)
memory usage: 625.1 KB



In [79]:
ml_g = LGBMRegressor()
ml_m = LGBMClassifier()

#ml_g = RandomForestRegressor()
#ml_m = RandomForestClassifier()

#ml_g = Lasso()
#ml_m = LogisticRegression()

In [80]:
threshold = 0.05
benchmarking_variable = 'Z'
#benchmarking_variable = 'X_4'

## ATE estimation

In [81]:
threshold = 0.05

dml_irm_obj = dml.DoubleMLIRM(
    dml_data,
    ml_g,
    ml_m,
    n_folds=5,
    n_rep=5,
    trimming_threshold=threshold, 
    weights=None)

In [82]:
dml_irm_obj.fit()
print(dml_irm_obj.summary)

       coef   std err           t  P>|t|   2.5 %    97.5 %
d  4.465126  0.027412  162.889346    0.0  4.4114  4.518853


In [83]:
dml_irm_obj.sensitivity_analysis()
print(dml_irm_obj.sensitivity_summary)


------------------ Scenario          ------------------
Significance Level: level=0.95
Sensitivity parameters: cf_y=0.03; cf_d=0.03, rho=1.0

------------------ Bounds with CI    ------------------
   CI lower  theta lower     theta  theta upper  CI upper
d  4.383087     4.428184  4.465126     4.502069  4.547158

------------------ Robustness Values ------------------
   H_0     RV (%)    RVa (%)
d  0.0  93.524845  93.278662


In [84]:
dml_irm_obj.sensitivity_benchmark(benchmarking_set=[benchmarking_variable])

Unnamed: 0,cf_y,cf_d,rho,delta_theta
d,0.0,0.008838,1.0,0.00098


In [85]:
ext_predictions = {"d": {}}
ext_predictions["d"]["ml_m"] = dml_irm_obj.predictions['ml_m'][:, :, 0]
ext_predictions["d"]["ml_g0"] = dml_irm_obj.predictions['ml_g0'][:, :, 0]
ext_predictions["d"]["ml_g1"] = dml_irm_obj.predictions['ml_g1'][:, :, 0]

In [86]:
dml_irm_obj = dml.DoubleMLIRM(
    dml_data,
    ml_g,
    ml_m,
    n_folds=5,
    n_rep=5,
    trimming_threshold=threshold, 
    weights=None)

calib_model = IsotonicRegression(y_min=threshold, y_max=1-threshold)
outcome_d = dml_irm_obj._dml_data._d

calib_ml_m = np.full_like(pred_ml_m, np.nan)
for i in range(pred_ml_m.shape[1]):
    model = clone(calib_model)
    model.fit(pred_ml_m[:, i], outcome_d)
    calib_ml_m[:, i] = model.predict(pred_ml_m[:, i])

ext_predictions["d"]["ml_m"] = calib_ml_m

In [87]:
dml_irm_obj.fit(external_predictions=ext_predictions)
print(dml_irm_obj.summary)

       coef   std err           t  P>|t|   2.5 %    97.5 %
d  4.465126  0.027412  162.889346    0.0  4.4114  4.518853


In [88]:
dml_irm_obj.sensitivity_analysis()
print(dml_irm_obj.sensitivity_summary)


------------------ Scenario          ------------------
Significance Level: level=0.95
Sensitivity parameters: cf_y=0.03; cf_d=0.03, rho=1.0

------------------ Bounds with CI    ------------------
   CI lower  theta lower     theta  theta upper  CI upper
d  4.383087     4.428184  4.465126     4.502069  4.547158

------------------ Robustness Values ------------------
   H_0     RV (%)    RVa (%)
d  0.0  93.524845  93.278662


In [89]:
dml_irm_obj.sensitivity_benchmark(benchmarking_set=[benchmarking_variable])

Unnamed: 0,cf_y,cf_d,rho,delta_theta
d,0.0,0.001088,1.0,0.003704


## GATE estimation

In [90]:
true_group_effect = ite[group].mean()
print(true_group_effect)

6.322531621471547


In [91]:
weights = group.to_numpy() / group.mean()
print(weights)

[0.         0.         0.         ... 2.48756219 2.48756219 0.        ]


In [92]:
dml_irm_obj = dml.DoubleMLIRM(
    dml_data,
    ml_g,
    ml_m,
    n_folds=5,
    n_rep=5,
    trimming_threshold=threshold, 
    weights=weights)

In [93]:
dml_irm_obj.fit(external_predictions=ext_predictions)
print(dml_irm_obj.summary)

       coef   std err          t  P>|t|     2.5 %   97.5 %
d  6.295059  0.086007  73.192366    0.0  6.126489  6.46363


In [94]:
dml_irm_obj.sensitivity_benchmark(benchmarking_set=[benchmarking_variable])

Unnamed: 0,cf_y,cf_d,rho,delta_theta
d,0.0,0.011791,-1.0,-0.004441


In [95]:
dml_irm_obj.sensitivity_analysis()
print(dml_irm_obj.sensitivity_summary)


------------------ Scenario          ------------------
Significance Level: level=0.95
Sensitivity parameters: cf_y=0.03; cf_d=0.03, rho=1.0

------------------ Bounds with CI    ------------------
   CI lower  theta lower     theta  theta upper  CI upper
d  6.094266     6.236329  6.295059      6.35379  6.494681

------------------ Robustness Values ------------------
   H_0     RV (%)    RVa (%)
d  0.0  92.055234  91.480086


## GATE ATTE

In [96]:
group_atte = (data['d'] == 1) * group
print(ite[group_atte].mean())

6.302728138378982


In [97]:
weights_atte = group_atte.to_numpy() / group_atte.mean()
m_0 = ext_predictions["d"]["ml_m"]

weights_bar_atte =  group.to_numpy().reshape(-1,1) * m_0 / group_atte.mean()
weight_dict = {'weights': weights_atte, 'weights_bar': weights_bar_atte}

In [98]:
dml_irm_obj_atte = dml.DoubleMLIRM(
    dml_data,
    ml_g,
    ml_m,
    n_folds=5,
    n_rep=5,
    trimming_threshold=threshold, 
    weights=weight_dict)

In [99]:
dml_irm_obj_atte.fit(external_predictions=ext_predictions)
print(dml_irm_obj_atte.summary)

       coef   std err          t  P>|t|     2.5 %    97.5 %
d  6.280159  0.115627  54.313975    0.0  6.053535  6.506784


In [100]:
dml_irm_obj_atte.sensitivity_benchmark(benchmarking_set=[benchmarking_variable])

Unnamed: 0,cf_y,cf_d,rho,delta_theta
d,0.0,0.0,0.112033,0.006748


In [101]:
dml_irm_obj_atte.sensitivity_analysis()
print(dml_irm_obj_atte.sensitivity_summary)


------------------ Scenario          ------------------
Significance Level: level=0.95
Sensitivity parameters: cf_y=0.03; cf_d=0.03, rho=1.0

------------------ Bounds with CI    ------------------
   CI lower  theta lower     theta  theta upper  CI upper
d  6.032506     6.224768  6.280159      6.33555  6.523701

------------------ Robustness Values ------------------
   H_0     RV (%)    RVa (%)
d  0.0  92.782454  91.612626


## Benchmarking GATE ATTE

Problem is that usually, we would like to fit

$$\bar{\omega}_{short}(D,X_{long}) = \mathbb{E}[\omega(D,X_{long})|X_{short}],$$

for $X_{short}\subset X_{long}$, but the implementation will rely on the weight definition of 

$$\bar{\omega}_{short}(D,X_{long}) = \bar{\omega}(D,X_{long}) = \mathbb{E}[\omega(D,X_{long})|X_{long}].$$

Due to the confounding in the propensity score, this might lead to biased estimates.

In [102]:
x_list_long = dml_irm_obj_atte._dml_data.x_cols
x_list_short = [x for x in x_list_long if x != benchmarking_variable]

In [103]:
learner_weights = clone(ml_m)
X_short = data[x_list_short]
learner_weights.fit(X_short, data['d'])

weights_bar_atte_2 = group.to_numpy().reshape(-1,1) * learner_weights.predict_proba(X_short)[:,1].reshape(-1, 1) / group_atte.mean()
weight_dict_short = {'weights': weights_atte, 'weights_bar': weights_bar_atte_2}

In [104]:
dml_data_short = dml.DoubleMLData(data, 'y', 'd', x_cols=x_list_short)

In [105]:
dml_irm_obj_short = dml.DoubleMLIRM(
    dml_data_short,
    ml_g,
    ml_m,
    n_folds=5,
    n_rep=1,
    trimming_threshold=threshold, 
    weights=weight_dict_short)

dml_irm_obj_short.fit()
print(dml_irm_obj_short.summary)

       coef   std err          t  P>|t|    2.5 %    97.5 %
d  6.275906  0.115092  54.529643    0.0  6.05033  6.501481


In [106]:
dml_long = dml_irm_obj_atte
dml_short = dml_irm_obj_short

# save elements for readability
var_y = np.var(dml_long._dml_data.y)
var_y_residuals_long = np.squeeze(dml_long.sensitivity_elements['sigma2'], axis=0)
nu2_long = np.squeeze(dml_long.sensitivity_elements['nu2'], axis=0)
var_y_residuals_short = np.squeeze(dml_short.sensitivity_elements['sigma2'], axis=0)
nu2_short = np.squeeze(dml_short.sensitivity_elements['nu2'], axis=0)

# compute nonparametric R2
R2_y_long = 1.0 - np.divide(var_y_residuals_long, var_y)
R2_y_short = 1.0 - np.divide(var_y_residuals_short, var_y)
R2_riesz = np.divide(nu2_short, nu2_long)

In [107]:
# Gain statistics
all_cf_y_benchmark = np.clip(np.divide((R2_y_long - R2_y_short), (1.0 - R2_y_long)), 0, 1)
all_cf_d_benchmark = np.clip(np.divide((1.0 - R2_riesz), R2_riesz), 0, 1)
cf_y_benchmark = np.median(all_cf_y_benchmark, axis=0)
cf_d_benchmark = np.median(all_cf_d_benchmark, axis=0)

In [108]:
print(f'cf_y_benchmark: {cf_y_benchmark}')
print(f'cf_d_benchmark: {cf_d_benchmark}')

cf_y_benchmark: [0.00371926]
cf_d_benchmark: [0.]
