In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from grr import *

In [None]:
# Simulation parameters
exp_name = "simulation_study"
sample_size = 3000 # Number of samples
cov_dim = 3   # Number of covariates
treatment_effect = 5.0  # True treatment effect
num_trial = 100

# Generate covariates
np.random.seed(0)

true_list = []

nn_linear_sq_list = []
nn_logit_sq_list = []
nn_ukl_list = []
nn_with_D_ukl_list = []
nn_bkl_list = []
nn_bp_list = []

rkhs_linear_sq_list = []
rkhs_logit_sq_list = []
rkhs_ukl_list = []
rkhs_bkl_list = []
rkhs_bp_list = []

population_size = 100000
result_list = []

for tr in range(num_trial):
    print("tr", tr)
    result_list_temp = []
    
    # Set  (population) dataset
    ## Generate covariates
    covariates = np.random.normal(0, 1, (population_size, cov_dim))

    ## Generate treatment assignment 
    ### Define a propensity score model
    covariates_prop = np.concatenate([covariates, covariates**2, np.array([covariates[:, 0]*covariates[:, 1], 
                                                                        covariates[:, 1]*covariates[:, 2], 
                                                                        covariates[:, 0]*covariates[:, 2]]).T], axis=1)
    propensity_coef = np.random.normal(0, 0.5, covariates_prop.shape[1])
    ### Calculate propensity scores
    propensity_scores = 1 / (1 + np.exp(- covariates_prop @ propensity_coef))

    ### Generate treatment assignment based on propensity scores
    #propensity_scores = propensity_scores*0 + 0.5
    treatment = np.random.binomial(1, propensity_scores)
    
    ## Generate outcomes
    ### Define a regression model
    covariates_reg = np.concatenate([covariates, np.array([covariates[:, 0]*covariates[:, 1]]).T], axis=1)

    ### Generate outcome with treatment effect
    beta1 = np.random.normal(0, 1, covariates_reg.shape[1])
    gamma1 = np.random.normal(0, 1, covariates_reg.shape[1])
    fx1 = covariates_reg @ beta1
    gx1 = covariates_reg**2 @ gamma1
    EYX_D1 = (fx1)**2 + 1/(1 + np.exp(-gx1)) + 1.0 + treatment_effect
    EYX_D0 = (fx1)**2 + 1/(1 + np.exp(-gx1)) + 1.0
    EYX = (fx1)**2 + 1/(1 + np.exp(-gx1)) + 1.0 + treatment_effect*treatment
    outcome = EYX + np.random.normal(0, 1, population_size)
    
    # Obtain a sample
    treatment_all = treatment.copy()
    covariates, treatment, outcome, propensity_scores = covariates[:sample_size], treatment[:sample_size], outcome[:sample_size], propensity_scores[:sample_size]
    EYX, EYX_D1, EYX_D0 = EYX[:sample_size], EYX_D1[:sample_size], EYX_D0[:sample_size]
    
    # Below, we conduct each method to estimate the ATE
    
    ############################
    
    true_riesz = treatment/propensity_scores - (1 - treatment)/(1 - propensity_scores)
    
    true_DM_score = EYX_D1 - EYX_D0
    true_IPW_score = true_riesz*outcome
    true_AIPW_score = true_riesz*(outcome - EYX) + true_DM_score
    
    true_DM_est = np.mean(true_DM_score)
    true_IPW_est = np.mean(true_IPW_score)
    true_AIPW_est = np.mean(true_AIPW_score)
    
    true_DM_var = np.var(true_DM_score - true_DM_est)
    true_IPW_var = np.var(true_IPW_score - true_IPW_est)
    true_AIPW_var = np.var(true_AIPW_score - true_AIPW_est)

    true_DM_confband = 1.96 * np.sqrt(true_DM_var / sample_size)
    true_IPW_confband = 1.96 * np.sqrt(true_IPW_var / sample_size)
    true_AIPW_confband = 1.96 * np.sqrt(true_AIPW_var / sample_size)

    true_DM_confregion = [true_DM_est - true_DM_confband, true_DM_est + true_DM_confband]
    true_IPW_confregion = [true_IPW_est - true_IPW_confband, true_IPW_est + true_IPW_confband]
    true_AIPW_confregion = [true_AIPW_est - true_AIPW_confband, true_AIPW_est + true_AIPW_confband]
    
    true_list.append((true_DM_est, true_IPW_est, true_AIPW_est, true_DM_confregion[0], true_DM_confregion[1], true_IPW_confregion[0], true_IPW_confregion[1], true_AIPW_confregion[0], true_AIPW_confregion[1]))

    ############################

    cross_fitting_folds=2

    method = "NN_GRR"

    riesz_hidden_dim= 100
    reg_hidden_dim= 100
    riesz_max_iter = 100
    reg_max_iter = 100
    
    
    riesz_loss = "SQ"
    riesz_with_D = True
    riesz_link_name = "Linear"

    dd_ate = GRR_ATE()
    result = dd_ate.estimate(covariates, treatment, 
                    outcome, method=method, 
                    cross_fitting_folds = cross_fitting_folds,
                    riesz_loss=riesz_loss, 
                    riesz_with_D=riesz_with_D, 
                    riesz_link_name=riesz_link_name, 
                    riesz_hidden_dim=riesz_hidden_dim, 
                    reg_hidden_dim=reg_hidden_dim,
                    riesz_max_iter=riesz_max_iter, 
                    reg_max_iter=reg_max_iter)

    nn_linear_sq_list.append(result)
    print(result)
    
    riesz_loss = "SQ"
    riesz_with_D = True
    riesz_link_name = "Logit"

    dd_ate = GRR_ATE()
    result = dd_ate.estimate(covariates, treatment, 
                    outcome, method=method, 
                    cross_fitting_folds = cross_fitting_folds,
                    riesz_loss=riesz_loss, 
                    riesz_with_D=riesz_with_D, 
                    riesz_link_name=riesz_link_name, 
                    riesz_hidden_dim=riesz_hidden_dim, 
                    reg_hidden_dim=reg_hidden_dim,
                    riesz_max_iter=riesz_max_iter, 
                    reg_max_iter=reg_max_iter)

    nn_logit_sq_list.append(result)
    print(result)


    riesz_loss = "UKL"
    riesz_with_D = False
    riesz_link_name = "Logit"

    dd_ate = GRR_ATE()
    result = dd_ate.estimate(covariates, treatment, 
                    outcome, method=method, 
                    cross_fitting_folds = cross_fitting_folds,
                    riesz_loss=riesz_loss, 
                    riesz_with_D=riesz_with_D, 
                    riesz_link_name=riesz_link_name, 
                    riesz_hidden_dim=riesz_hidden_dim, 
                    reg_hidden_dim=reg_hidden_dim,
                    riesz_max_iter=riesz_max_iter, 
                    reg_max_iter=reg_max_iter)

    nn_ukl_list.append(result)
    print(result)
    
    riesz_loss = "UKL"
    riesz_with_D = True
    riesz_link_name = "Logit"

    dd_ate = GRR_ATE()
    result = dd_ate.estimate(covariates, treatment, 
                    outcome, method=method, 
                    cross_fitting_folds = cross_fitting_folds,
                    riesz_loss=riesz_loss, 
                    riesz_with_D=riesz_with_D, 
                    riesz_link_name=riesz_link_name, 
                    riesz_hidden_dim=riesz_hidden_dim, 
                    reg_hidden_dim=reg_hidden_dim,
                    riesz_max_iter=riesz_max_iter, 
                    reg_max_iter=reg_max_iter)

    nn_with_D_ukl_list.append(result)
    print(result)

    riesz_loss = "BKL"
    riesz_with_D = False
    riesz_link_name = "Logit"

    dd_ate = GRR_ATE()
    result = dd_ate.estimate(covariates, treatment, 
                    outcome, method=method, 
                    cross_fitting_folds = cross_fitting_folds,
                    riesz_loss=riesz_loss, 
                    riesz_with_D=riesz_with_D, 
                    riesz_link_name=riesz_link_name, 
                    riesz_hidden_dim=riesz_hidden_dim, 
                    reg_hidden_dim=reg_hidden_dim,
                    riesz_max_iter=riesz_max_iter, 
                    reg_max_iter=reg_max_iter)

    nn_bkl_list.append(result)

    ##############################

    method = "RKHS_GRR"

    folds = 3
    num_basis = 100
    
    
    riesz_loss = "SQ"
    riesz_with_D = True
    riesz_link_name = "Linear"
    is_separate = False

    result = dd_ate.estimate(covariates, treatment, outcome, method=method, cross_fitting_folds = cross_fitting_folds, riesz_loss=riesz_loss, riesz_with_D=riesz_with_D, riesz_link_name=riesz_link_name, is_separate=is_separate, folds=folds, num_basis=num_basis)
    rkhs_linear_sq_list.append(result)

    riesz_loss = "SQ"
    riesz_with_D = True
    riesz_link_name = "Linear"
    is_separate = False

    result = dd_ate.estimate(covariates, treatment, outcome, method=method, cross_fitting_folds = cross_fitting_folds, riesz_loss=riesz_loss, riesz_with_D=riesz_with_D, riesz_link_name=riesz_link_name, is_separate=is_separate, folds=folds, num_basis=num_basis)
    rkhs_logit_sq_list.append(result)

    
    riesz_loss = "UKL"
    riesz_with_D = False
    riesz_link_name = "Logit"
    is_separate = False

    result = dd_ate.estimate(covariates, treatment, outcome, method=method, cross_fitting_folds = cross_fitting_folds, riesz_loss=riesz_loss, riesz_with_D=riesz_with_D, riesz_link_name=riesz_link_name, is_separate=is_separate, folds=folds, num_basis=num_basis)
    rkhs_ukl_list.append(result)

    riesz_loss = "BKL"
    riesz_with_D = False
    riesz_link_name = "Logit"
    is_separate = False
    

    result = dd_ate.estimate(covariates, treatment, outcome, method=method, cross_fitting_folds = cross_fitting_folds, riesz_loss=riesz_loss, riesz_with_D=riesz_with_D, riesz_link_name=riesz_link_name, is_separate=is_separate, folds=folds, num_basis=num_basis)
    rkhs_bkl_list.append(result)
    
    
    np.savetxt("results/true_%s.csv"%exp_name, true_list)
    np.savetxt("results/nn_linear_sq_%s.csv"%exp_name, nn_linear_sq_list)
    np.savetxt("results/nn_logit_sq_%s.csv"%exp_name, nn_logit_sq_list)
    np.savetxt("results/nn_ukl_%s.csv"%exp_name, nn_ukl_list)
    np.savetxt("results/nn_with_D_ukl_%s.csv"%exp_name, nn_with_D_ukl_list)
    np.savetxt("results/nn_bkl_%s.csv"%exp_name, nn_bkl_list)
    np.savetxt("results/rkhs_linear_sq_%s.csv"%exp_name, rkhs_linear_sq_list)
    np.savetxt("results/rkhs_logit_sq_%s.csv"%exp_name, rkhs_logit_sq_list)
    np.savetxt("results/rkhs_ukl_%s.csv"%exp_name, rkhs_ukl_list)
    np.savetxt("results/rkhs_bkl_%s.csv"%exp_name, rkhs_bkl_list)


In [None]:
nn_ukl_list

In [None]:
nn_with_D_ukl_list

In [None]:
np.min(dd_ate.IPW_score)

In [None]:
np.min(dd_ate.model.riesz_predict(covariates, treatment))

In [None]:
import pandas as pd

In [None]:
true_list = np.array(true_list)
nn_linear_sq_list = np.array(nn_linear_sq_list)
nn_logit_sq_list = np.array(nn_logit_sq_list)
nn_ukl_list = np.array(nn_ukl_list)
nn_with_D_ukl_list = np.array(nn_with_D_ukl_list)
nn_bkl_list = np.array(nn_bkl_list)
rkhs_linear_sq_list = np.array(rkhs_linear_sq_list)
rkhs_logit_sq_list = np.array(rkhs_logit_sq_list)
rkhs_ukl_list = np.array(rkhs_ukl_list)
rkhs_bkl_list = np.array(rkhs_bkl_list)


In [None]:
results = np.array([
                  np.mean((true_list - 5)[:, :3]**2, axis=0),
                  np.mean((nn_linear_sq_list - 5)[:, :3]**2, axis=0), 
                  np.mean((nn_logit_sq_list - 5)[:, :3]**2, axis=0), 
                  np.mean((nn_ukl_list - 5)[:, :3]**2, axis=0),
                  np.mean((nn_with_D_ukl_list - 5)[:, :3]**2, axis=0),
                  np.mean((nn_bkl_list - 5)[:, :3]**2, axis=0),
                  np.mean((rkhs_linear_sq_list - 5)[:, :3]**2, axis=0), 
                  np.mean((rkhs_logit_sq_list - 5)[:, :3]**2, axis=0), 
                  np.mean((rkhs_ukl_list - 5)[:, :3]**2, axis=0),
                  np.mean((rkhs_bkl_list - 5)[:, :3]**2, axis=0)])

In [None]:
pd.DataFrame(results).T

In [None]:
true_conf_list = [np.mean((true_list[:, 3] <= 5) * (5 <= true_list[:, 4])), np.mean((true_list[:, 5] <= 5) * (5 <= true_list[:, 6])), np.mean((true_list[:, 7] <= 5) * (5 <= true_list[:, 8]))]
nn_ls_conf_list = [np.mean((nn_ls_list[:, 3] <= 5) * (5 <= nn_ls_list[:, 4])), np.mean((nn_ls_list[:, 5] <= 5) * (5 <= nn_ls_list[:, 6])), np.mean((nn_ls_list[:, 7] <= 5) * (5 <= nn_ls_list[:, 8]))]
nn_kl_conf_list = [np.mean((nn_kl_list[:, 3] <= 5) * (5 <= nn_kl_list[:, 4])), np.mean((nn_kl_list[:, 5] <= 5) * (5 <= nn_kl_list[:, 6])), np.mean((nn_kl_list[:, 7] <= 5) * (5 <= nn_kl_list[:, 8]))]
nn_tl_conf_list = [np.mean((nn_tl_list[:, 3] <= 5) * (5 <= nn_tl_list[:, 4])), np.mean((nn_tl_list[:, 5] <= 5) * (5 <= nn_tl_list[:, 6])), np.mean((nn_tl_list[:, 7] <= 5) * (5 <= nn_tl_list[:, 8]))]
nn_mle_conf_list = [np.mean((nn_mle_list[:, 3] <= 5) * (5 <= nn_mle_list[:, 4])), np.mean((nn_mle_list[:, 5] <= 5) * (5 <= nn_mle_list[:, 6])), np.mean((nn_mle_list[:, 7] <= 5) * (5 <= nn_mle_list[:, 8]))]
rkhs_ls_conf_list = [np.mean((rkhs_ls_list[:, 3] <= 5) * (5 <= rkhs_ls_list[:, 4])), np.mean((rkhs_ls_list[:, 5] <= 5) * (5 <= rkhs_ls_list[:, 6])), np.mean((rkhs_ls_list[:, 7] <= 5) * (5 <= rkhs_ls_list[:, 8]))]
rkhs_kl_conf_list = [np.mean((rkhs_kl_list[:, 3] <= 5) * (5 <= rkhs_kl_list[:, 4])), np.mean((rkhs_kl_list[:, 5] <= 5) * (5 <= rkhs_kl_list[:, 6])), np.mean((rkhs_kl_list[:, 7] <= 5) * (5 <= rkhs_kl_list[:, 8]))]
rkhs_tl_conf_list = [np.mean((rkhs_tl_list[:, 3] <= 5) * (5 <= rkhs_tl_list[:, 4])), np.mean((rkhs_tl_list[:, 5] <= 5) * (5 <= rkhs_tl_list[:, 6])), np.mean((rkhs_tl_list[:, 7] <= 5) * (5 <= rkhs_tl_list[:, 8]))]
rkhs_mle_conf_list = [np.mean((rkhs_mle_list[:, 3] <= 5) * (5 <= rkhs_mle_list[:, 4])), np.mean((rkhs_mle_list[:, 5] <= 5) * (5 <= rkhs_mle_list[:, 6])), np.mean((rkhs_mle_list[:, 7] <= 5) * (5 <= rkhs_mle_list[:, 8]))]


In [None]:
results2 = np.array([
                  true_conf_list,
                  nn_ls_conf_list,
                  nn_kl_conf_list,
                  nn_tl_conf_list,
                  nn_mle_conf_list,
                  rkhs_ls_conf_list,
                  rkhs_kl_conf_list,
                  rkhs_tl_conf_list,
                  rkhs_mle_conf_list])

In [None]:
print(pd.DataFrame([results.flatten()[:15], results2.flatten()[:15]]).to_latex(float_format="%.2f"))