# Simulation studies
This notebook allows to replicate the simulation studies in Curth, Alaa and van der Schaar (2020). Note: it requires a working installation of rpy2.

## Simulation study 1
Simulation studies using one dimensional data based on the motivating example of Kennedy (2020). 


In [None]:
N_JOBS = 4

In [None]:
N_REPEATS_SIM1 = 500

### The IF-learner

In [None]:
# make directories for saving
IF_PATH = 'paper_utils/if_paper/paper_results/if-learner/'
import os 
if not os.path.exists(IF_PATH):
    os.makedirs(IF_PATH)

In [None]:
from paper_utils.if_paper.helper_classes import RSmoothingSpline, AdaptiveLogisticGAM
from paper_utils.if_paper.if_learner_experiments import eval_range_bias, eval_range_n

from iflearn.simulation_utils.base import binary_gyorfi_baseline

In [None]:
# set range of training observations to consider
range_n = [200, 500, 1000, 2000, 3000, 5000, 10000, 30000]

#### Constant propensity (p=0.5)

In [None]:
res_n = eval_range_n(RSmoothingSpline(), range_n, repeats=N_REPEATS_SIM1, n_jobs=N_JOBS, 
                     verbose=1)
res_n.to_csv(IF_PATH + 'CATE_spline_p05.csv')

In [None]:
res_n = eval_range_n(AdaptiveLogisticGAM(), range_n, repeats=N_REPEATS_SIM1, n_jobs=N_JOBS, 
                     baseline_model=binary_gyorfi_baseline, setting='RR',
                     verbose=1, binary_y=True,  te_estimator=RSmoothingSpline())
res_n.to_csv(IF_PATH + 'RR_gam_p05.csv')

#### Propensity score from Kennedy (2020)

In [None]:
from iflearn.simulation_utils.treatment_effects import propensity_kennedy

In [None]:
res_n = eval_range_n(RSmoothingSpline(), range_n, repeats=N_REPEATS_SIM1,n_jobs=N_JOBS, 
                     verbose=1, propensity_model=propensity_kennedy)
res_n.to_csv(IF_PATH + 'CATE_spline_withpropensity.csv')

In [None]:
res_n = eval_range_n(AdaptiveLogisticGAM(), range_n, repeats=N_REPEATS_SIM1, 
                    n_jobs=N_JOBS,  setting='RR',
                     propensity_model=propensity_kennedy,
                     baseline_model=binary_gyorfi_baseline,
                     verbose=1, binary_y=True, te_estimator=RSmoothingSpline())
res_n.to_csv(IF_PATH + 'RR_gam_withpropensity.csv')

#### Unknown selection bias

In [None]:
import numpy as np
range_b =  [p for p in np.arange(0.1, 1, 0.05)] 
res_b = eval_range_bias(RSmoothingSpline(), range_b, repeats=N_REPEATS_SIM1, 
                        n_jobs=N_JOBS, verbose=1, n_train=500)
res_b.to_csv(IF_PATH + 'CATE_spline_withbias.csv')

### The Group-IF-learner

In [None]:
# make directories for saving
GROUP_PATH = 'paper_utils/if_paper/paper_results/group-if-learner/'
import os 
if not os.path.exists(GROUP_PATH):
    os.makedirs(GROUP_PATH)

In [None]:
from paper_utils.if_paper.group_if_learner_experiments import eval_range_n_group

In [None]:
range_n = [100, 200, 500, 750, 1000, 2000]

In [None]:
# experiment not in paper
res_n = eval_range_n_group(RSmoothingSpline(), range_n, repeats=N_REPEATS_SIM1, n_jobs=N_JOBS, 
                     verbose=1)
res_n.to_csv(GROUP_PATH + 'CATE_spline_p05_group.csv')

In [None]:
# experiment in paper
res_n = eval_range_n_group(RSmoothingSpline(), range_n, repeats=N_REPEATS_SIM1, n_jobs=N_JOBS, 
                     verbose=1, propensity_model=propensity_kennedy)
res_n.to_csv(GROUP_PATH + 'CATE_spline_withpropensity_group.csv')

## Simulation study 2: GRFs 

In [None]:
# make directories for saving
GRF_PATH = 'paper_utils/if_paper/paper_results/grf-if-learner/'
import os 
if not os.path.exists(GRF_PATH):
    os.makedirs(GRF_PATH)

In [None]:
from paper_utils.if_paper.grf_experiments import eval_range_grf

from iflearn.simulation_utils.base import constant_baseline, baseline_wa, uniform_covariate_model,\
                                         normal_error_model, ModelCaller
from iflearn.simulation_utils.treatment_effects import te_interaction_baseline, te_multiple_baseline,\
                                                        propensity_wa, nonlinear_treatment_effect_wa1

In [None]:
N_REPEATS_SIM2 = 200

In [None]:
# change defaults on uniform_covariate_model from [-1,1] to [0,1]
unif_01 = ModelCaller(uniform_covariate_model, args={'high':1, 'low': 0})

In [None]:
range_n = [800, 1600]

In [None]:
res_n = eval_range_grf(range_n, dimension_range=False, propensity_model=None,
                   repeats=N_REPEATS_SIM2, covariate_model=unif_01,
                   n_test=1000, d=10,
                   te_function=nonlinear_treatment_effect_wa1, 
                   baseline_model=constant_baseline, error_model=normal_error_model,
                   pre_dispatch='2*n_jobs', n_jobs=N_JOBS, verbose=1)
res_n.to_csv(GRF_PATH + 'GRF_nonlinearTE_noconfounding.csv')

In [None]:
res_n = eval_range_grf(range_n,  dimension_range=False, propensity_model=propensity_wa,
                       repeats=N_REPEATS_SIM2, covariate_model=unif_01,
                       n_test=1000,  d=10,
                       te_function=None, 
                       baseline_model=baseline_wa, error_model=normal_error_model,
                       n_jobs=N_JOBS, verbose=1)
res_n.to_csv(GRF_PATH + 'GRF_noTE_confounding.csv')

In [None]:
res_n = eval_range_grf(range_n, dimension_range=False, propensity_model=propensity_wa,
                      repeats=N_REPEATS_SIM2, covariate_model=unif_01,
                      n_test=1000, d=10,
                      te_function=nonlinear_treatment_effect_wa1, 
                    baseline_model=baseline_wa, error_model=normal_error_model,
                   n_jobs=N_JOBS, verbose=1)
res_n.to_csv(GRF_PATH + 'GRF_nonlinearTE_confounding.csv')

In [None]:
res_n = eval_range_grf(range_n,  dimension_range=False, propensity_model=propensity_wa,
                       repeats=N_REPEATS_SIM2, covariate_model=unif_01,
                       n_test=1000, d=10,
                       te_function=te_multiple_baseline, 
                       baseline_model=baseline_wa, error_model=normal_error_model,
                       n_jobs=N_JOBS, verbose=1)
res_n.to_csv(GRF_PATH + 'GRF_multipleTE_confounding.csv')

In [None]:
res_n = eval_range_grf(range_n,  dimension_range=False, propensity_model=propensity_wa,
                      repeats=N_REPEATS_SIM2, covariate_model=unif_01,
                      n_test=1000, d=10,
                      te_function=te_interaction_baseline, 
                      baseline_model=baseline_wa, error_model=normal_error_model,
                      n_jobs=N_JOBS, verbose=1)
res_n.to_csv(GRF_PATH + 'GRF_interactionTE_confounding.csv')