# Experiments for IF-learner

In [1]:
from paper_simulations.if_paper.helper_classes import RSmoothingSpline
from paper_simulations.if_paper.if_learner_experiments import eval_range_n, eval_range_bias, eval_range_d
from paper_simulations.if_paper.grf_experiments import eval_range_grf
from iflearn.simulation_utils.base import constant_baseline

In [2]:
range_n = [500, 1000, 3000, 5000, 10000, 30000]

In [3]:
res_n = eval_range_n(RSmoothingSpline(), range_n,  n_jobs=6, 
                     verbose=1)
res_n.to_csv('paper_simulations/if_paper/simulations/CATE_rangen_nobias.csv')

number of train-samples: 500


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    6.4s
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:   13.7s
[Parallel(n_jobs=6)]: Done 438 tasks      | elapsed:   25.4s
[Parallel(n_jobs=6)]: Done 500 out of 500 | elapsed:   28.0s finished
[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.


number of train-samples: 1000


[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    2.3s
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:   10.8s
[Parallel(n_jobs=6)]: Done 438 tasks      | elapsed:   24.9s
[Parallel(n_jobs=6)]: Done 500 out of 500 | elapsed:   28.3s finished
[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.


number of train-samples: 3000


[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    4.8s
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:   27.4s
[Parallel(n_jobs=6)]: Done 438 tasks      | elapsed:  1.1min
[Parallel(n_jobs=6)]: Done 500 out of 500 | elapsed:  1.3min finished
[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.


number of train-samples: 5000


[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:    8.0s
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:   40.1s
[Parallel(n_jobs=6)]: Done 438 tasks      | elapsed:  1.6min
[Parallel(n_jobs=6)]: Done 500 out of 500 | elapsed:  1.8min finished
[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.


number of train-samples: 10000


[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:   16.7s
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:  1.3min
[Parallel(n_jobs=6)]: Done 438 tasks      | elapsed:  2.9min
[Parallel(n_jobs=6)]: Done 500 out of 500 | elapsed:  3.3min finished
[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.


number of train-samples: 30000


[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed:   47.0s
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed:  3.6min
[Parallel(n_jobs=6)]: Done 438 tasks      | elapsed:  8.3min
[Parallel(n_jobs=6)]: Done 500 out of 500 | elapsed:  9.4min finished


# GRF experiments

In [2]:
import numpy as np
def _nonlinear_effect_1(x):
    return 1 + 1/(1+ np.exp((-20*(x-1/3))))

def nonlinear_treatment_effect_1(X, dim_0=0, dim_1=1):
    # example from Wager and Athey (2018)
    return _nonlinear_effect_1(X[:, dim_0]) * _nonlinear_effect_1(X[:, dim_1])


def normal_error_model(X, sd: float = 1, dim: int = 0):
    """
    Generate errors according to N(0, sd)
    Parameters
    ----------
    X: array-like
        input data to use
    dim: int, default 0
        Dimension of X to use

    Returns
    -------
    An error value for each row of X
    """
    return np.random.normal(0, sd, X.shape[0])


def uniform_covariate_model(n: int, d: int = 1, low: float = 0, high: float = 1):
    """
    Generate uniform covariates

    Parameters
    ----------
    n: int
        number of observations to generate
    d: int
        number of dimensions
    low: float
        lower bound of hypercube
    high: float
        upper bound of hypercube

    Returns
    -------
    np. array (n x d) with generated covariates
    """
    return np.random.uniform(low=low, high=high, size=d * n).reshape(-1, d)

In [3]:
range_dim = [10]
res_d = eval_range_grf(range_dim, dimension_range=True, propensity_model=None,
                   repeats=200, covariate_model=uniform_covariate_model,
                   n_test=1000, n_train=1600, d=1,
                   te_function=nonlinear_treatment_effect_1, 
                    baseline_model=constant_baseline, error_model=normal_error_model,
                   pre_dispatch='2*n_jobs', n_jobs=6, verbose=1)
res_d.to_csv('paper_simulations/if_paper/simulations/GRFnew_nonlinear_train1600.csv')

Ambient dimension: 10


[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed: 24.9min
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed: 106.0min
[Parallel(n_jobs=6)]: Done 200 out of 200 | elapsed: 110.6min finished


In [4]:
# with confounding baseline and propensity 
from scipy.stats import beta
def wa_baseline(X, dim: int = 2):
    return 2*X[:, dim] - 1

def wa_propensity(X, dim: int = 2):
    return 0.25 * (beta.pdf(X[:, dim], 2, 4) + 1)

In [5]:
range_dim = [10]
res_d = eval_range_grf(range_dim,  dimension_range=True, propensity_model=wa_propensity,
                   repeats=200, covariate_model=uniform_covariate_model,
                   n_test=1000, n_train=1600, d=1,
                   te_function=None, 
                    baseline_model=wa_baseline, error_model=normal_error_model,
                   n_jobs=6, verbose=1)
res_d.to_csv('paper_simulations/if_paper/simulations/GRFnew_confounding_train1600.csv')

[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.


Ambient dimension: 10


[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed: 23.6min
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed: 106.3min
[Parallel(n_jobs=6)]: Done 200 out of 200 | elapsed: 111.0min finished


In [6]:
range_dim = [10]
res_d = eval_range_grf(range_dim,  dimension_range=True, propensity_model=wa_propensity,
                   repeats=200, covariate_model=uniform_covariate_model,
                   n_test=1000, n_train=1600, d=1,
                   te_function=nonlinear_treatment_effect_1, 
                    baseline_model=wa_baseline, error_model=normal_error_model,
                   n_jobs=6, verbose=1)
res_d.to_csv('paper_simulations/if_paper/simulations/GRFnew_confoundingTE_train1600.csv')

[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.


Ambient dimension: 10


[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed: 23.0min
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed: 104.6min
[Parallel(n_jobs=6)]: Done 200 out of 200 | elapsed: 109.3min finished


In [7]:
def te_wa_baseline_multiple(X, dim: int = 2):
    return 3*(2*X[:, dim] - 1)
range_dim = [10]
res_d = eval_range_grf(range_dim,  dimension_range=True, propensity_model=wa_propensity,
                   repeats=200, covariate_model=uniform_covariate_model,
                   n_test=1000, n_train=1600, d=1,
                   te_function=te_wa_baseline_multiple, 
                    baseline_model=wa_baseline, error_model=normal_error_model,
                   n_jobs=6, verbose=1)
res_d.to_csv('paper_simulations/if_paper/simulations/GRFnew_confoundingTEmultiple_train1600.csv')

[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.


Ambient dimension: 10


[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed: 22.6min
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed: 102.6min
[Parallel(n_jobs=6)]: Done 200 out of 200 | elapsed: 107.1min finished


In [8]:
# have baseline supported on same covariates as te
def wa_baseline(X, dim: int = 0):
    return 2*X[:, dim] - 1

def wa_propensity(X, dim: int = 0):
    return 0.25 * (beta.pdf(X[:, dim], 2, 4) + 1)

range_dim = [10]
res_d = eval_range_grf(range_dim,  dimension_range=True, propensity_model=wa_propensity,
                   repeats=200, covariate_model=uniform_covariate_model,
                   n_test=1000, n_train=1600, d=1,
                   te_function=nonlinear_treatment_effect_1, 
                    baseline_model=wa_baseline, error_model=normal_error_model,
                   n_jobs=6, verbose=1)

res_d.to_csv('paper_simulations/if_paper/simulations/GRFnew_confoundingTEsupport_train1600.csv')

[Parallel(n_jobs=6)]: Using backend LokyBackend with 6 concurrent workers.


Ambient dimension: 10


[Parallel(n_jobs=6)]: Done  38 tasks      | elapsed: 22.8min
[Parallel(n_jobs=6)]: Done 188 tasks      | elapsed: 103.7min
[Parallel(n_jobs=6)]: Done 200 out of 200 | elapsed: 108.3min finished


In [None]:
# heteroskedasticity
