In [None]:
import numpy as np
import pandas as pd
import torch
torch.set_default_tensor_type(torch.FloatTensor) 
import copy

from rct_data_generator import *
from outcome_models import *
from plotting_functions import *
from mcmc_bayes_update import *
from eig_comp_utils import *
from research_exp_utils import *


### 1. Simulating the data

In [None]:
rng = np.random.RandomState(42)

n_rct_before_split = 10**6
n_host = 200

power_x, power_x_t = 1, 1
causal_param_first_index = 3
outcome_function = lambda X, T, eps: 1 + 1 * X[:,0] - 1 * X[:,1] + 10 * T + 20* X[:,0]*T + 20* X[:,1]*T + eps 
std_true_y = 1 # Standard deviation for the true Y

X0 = np.random.beta(12, 3, size= n_rct_before_split)
X1 = np.random.normal(loc=4, scale=1, size=n_rct_before_split)
x_distributions= {0: X0, 1: X1}

p_assigned_to_host = lambda X_0, X_1, T, eps: sigmoid(1 + 2*X_0 - X_1 + 5*T + eps)
p_assigned_to_cand2 = lambda X_0, X_1, T, eps: sigmoid(1 + 2*X_0 - X_1 + 5*T + eps)
# p_assigned_to_cand2 = lambda X_0, X_1, T, eps: sigmoid(1 - 2*X_0 + eps)

Simulation parameters

In [None]:
# Prior parameters for Bayesian update on host
prior_mean = np.array([0, 1, 0, 1, 0, 0])
sigma_prior = 1

# Hyperparameters for Bayesian update on host
warmup_steps = 50
max_tree_depth = 5
sigma_rand_error = 1

# Number of samples used to estimate outer expectation
n_samples_for_expectation = 50
m_samples_for_expectation = int(np.ceil(np.sqrt(n_samples_for_expectation)))

# Incorporating sqrt constraint into MCMC samples
n_mcmc = (n_samples_for_expectation * (m_samples_for_expectation+1)) 

### 2. EIG closed form for varying sample sizes

In [None]:
n_both_candidates_list = [200, 500, 1000]
proportion = 1 #n_cand2 = prorportion * n_both_candidates_list
std_true_y = 1

EIG_obs_closed_form, EIG_caus_closed_form = eig_closed_form_varying_sample_size(generate_host_and_mirror, n_both_candidates_list, proportion, n_rct_before_split, x_distributions, \
                                        p_assigned_to_cand2, n_host, power_x, power_x_t, outcome_function, std_true_y, causal_param_first_index, \
                                        max_tree_depth, warmup_steps, sigma_rand_error, sigma_prior)

In [None]:
plot_array(n_both_candidates_list, EIG_obs_closed_form, names=['mirror','cand2'], title= 'EIG obs')

In [None]:
plot_array(n_both_candidates_list, EIG_caus_closed_form, names=['mirror','cand2'], title= 'EIG causal')

### Exact experiment

In [None]:
n_both_candidates_list = [200, 500, 1000]
proportion = 1 #n_cand2 = proportion * n_both_candidates_list
std_true_y = 1


EIG_obs_closed_form_exact, EIG_caus_closed_form_exact = eig_closed_form_varying_sample_size(generate_host_and_exact_mirror, n_both_candidates_list, proportion, n_rct_before_split, x_distributions, p_assigned_to_cand2, n_host, power_x, power_x_t, outcome_function, std_true_y)

In [None]:
plot_array(n_both_candidates_list, EIG_obs_closed_form_exact, names=['mirror','cand2'], title= 'EIG obs')

In [None]:
plot_array(n_both_candidates_list, EIG_caus_closed_form_exact, names=['mirror','cand2'], title= 'EIG obs')

### 3. EIG from samples for varying sample sizes

In [None]:
n_non_causal_expectation = 2
EIG_obs_samples, EIG_caus_samples = eig_from_samples_varying_sample_size(n_both_candidates_list, n_rct_before_split, x_distributions, p_assigned_to_cand2, n_host, power_x, power_x_t, outcome_function, std_true_y)

In [None]:
plot_array(n_both_candidates_list, EIG_obs_samples, names=['mirror','cand2'], title= 'EIG obs from sample')

In [None]:
plot_array(n_both_candidates_list, EIG_obs_samples, names=['mirror','cand2'], title= 'EIG obs from sample')