In [1]:
import torch
torch.set_default_tensor_type(torch.FloatTensor) 
import copy

from rct_data_generator import *
from outcome_models import *
from plotting_functions import *
from mcmc_bayes_update import *
from eig_comp_utils import *
from research_exp_utils import *
from rct_data_generator import generate_host_and_mirror


  _C._set_default_tensor_type(t)
  from .autonotebook import tqdm as notebook_tqdm


## Set-up

In [2]:
n_host_sample = 100 
sigma_error = 1 
d = 10 
A = torch.randn((d,d))
A = 1/(torch.det(A)) * A

T_allocation_host = torch.randn(d)
T_allocation_host = 100/torch.norm(T_allocation_host)*T_allocation_host

mu_nc = torch.randn(d)
mu_nc = 1/torch.norm(mu_nc)*mu_nc

mu_c = torch.randn(d)
mu_c = 1/torch.norm(mu_c)*mu_c

mu = torch.concat([mu_nc,mu_c])

In [3]:

X_host_no_T = (torch.randn((n_host_sample,d)) @ A ) 
T_host = torch.bernoulli(torch.sigmoid(X_host_no_T@ T_allocation_host))
X_host_times_T = (T_host.unsqueeze(dim=0).T * X_host_no_T)
X_host = torch.concat([X_host_no_T,X_host_times_T],dim=1)

Y_host = X_host @ mu
Y_host = (1/Y_host.norm()) * Y_host + sigma_error * torch.randn_like(Y_host)


In [4]:
prior_mean = torch.zeros(2 * d)
beta_0, sigma_0_sq,inv_cov_0 = prior_mean, sigma_error,torch.eye(2*d)
prior_hyperparameters = {'beta_0': beta_0, 'sigma_0_sq': sigma_0_sq,"inv_cov_0":inv_cov_0}
bayesian_regression = BayesianLinearRegression(prior_hyperparameters)
bayesian_regression.set_causal_index(10)

In [5]:
bayesian_regression.fit(X_host,Y_host)

{'posterior_mean': tensor([-0.0993, -0.4288,  0.1954,  0.1010,  0.1545, -0.2526,  0.0726,  0.3289,
          0.0246,  0.1450,  0.4419,  0.3629,  0.3544, -0.3680, -0.3977, -0.5572,
         -0.2526, -0.1299, -0.0577, -0.2675]),
 'posterior_cov_matrix': tensor([[ 3.5682e-01,  1.2320e-01, -9.3331e-02, -1.2754e-01,  7.8837e-02,
           1.4067e-01, -7.6041e-02,  2.5061e-02, -1.4616e-03, -1.8630e-01,
          -1.0920e-01,  1.4244e-02, -2.2028e-02, -1.2258e-02,  1.5998e-02,
           1.0973e-02,  2.2834e-02,  3.4346e-02, -2.5485e-02,  4.6799e-02],
         [ 1.2320e-01,  2.1868e-01, -1.3813e-02, -5.9499e-02,  2.1279e-01,
           7.9516e-02, -1.6701e-02,  1.3430e-01, -5.6237e-02, -4.9815e-02,
           6.6499e-03, -7.2574e-02, -1.3676e-03, -1.2394e-02,  2.3465e-02,
          -1.1726e-02, -2.0050e-02, -2.5891e-02, -2.2355e-02, -1.1038e-02],
         [-9.3331e-02, -1.3813e-02,  2.4772e-01,  6.0934e-02,  1.2825e-01,
          -1.0334e-01, -1.8265e-02,  5.1199e-04,  3.3621e-02,  9.8286e-0

In [6]:
bayesian_regression.closed_form_obs_EIG(X_host),bayesian_regression.closed_form_causal_EIG(X_host)

(4.686799916529671, 0.5021537110448984)

In [7]:
bayesian_regression.samples_obs_EIG(X_host,100,10),bayesian_regression.samples_causal_EIG(X_host,100,10)

  out = random_state.multivariate_normal(mean, cov, size)


(7.164575598259461, 4.122947296239175)

In [8]:
T_zero = torch.zeros_like(T_host)
T_one = 1 - T_zero

X_host_times_T_zero = (T_zero.unsqueeze(dim=0).T * X_host_no_T)
X_host_times_T_one = (T_one.unsqueeze(dim=0).T * X_host_no_T)

X_host_T_zero = torch.concat([X_host_no_T,X_host_times_T_zero],dim=1)
X_host_T_one = torch.concat([X_host_no_T,X_host_times_T_one],dim=1)

Y_host = X_host @ mu
Y_host = (1/Y_host.norm()) * Y_host + sigma_error * torch.randn_like(Y_host)

In [9]:
bayesian_regression.closed_form_obs_EIG(X_host_T_zero),bayesian_regression.closed_form_causal_EIG(X_host_T_zero)

(3.700399792443208, -0.4842464130415651)

In [10]:
bayesian_regression.samples_obs_EIG(X_host_T_zero,100,500),bayesian_regression.samples_causal_EIG(X_host_T_zero,100,500)

(4.634569417680041, 1.3264991103249884)

In [11]:
bayesian_regression.closed_form_obs_EIG(X_host_T_one),bayesian_regression.closed_form_causal_EIG(X_host_T_one)

(4.354952490316906, 0.17030628483213306)

In [12]:
bayesian_regression.samples_obs_EIG(X_host_T_one,100,150),bayesian_regression.samples_causal_EIG(X_host_T_one,100,150)

(5.315682526527723, 2.840860429568238)

## Experiment

In [13]:
n_host_sample = 200 
sigma_error = 1 
d = 10 
n_repeats = 40


## BART

In [14]:
from outcome_models import BayesianCausalForest

In [15]:
prior_hyperparameters = {'sigma_0_sq':1,'p_categorical_pr':0,'p_categorical_trt':0 }
bcf = BayesianCausalForest(prior_hyperparameters)

In [16]:
X_host_np = np.array(X_host,dtype=np.float32)
T_host_np = np.array(T_host,dtype=np.int32)
Y_host_np = np.array(Y_host,dtype=np.float32)

In [17]:
bcf.store_train_data(X=X_host_np,T=T_host_np,Y=Y_host_np)

In [18]:
n_samples_inner_expectation=50
n_samples_outer_expectation=50
X = X_host_np
T = T_host_np

n_samples = n_samples_outer_expectation*(n_samples_inner_expectation+1)


In [19]:
predicitions = bcf.posterior_sample_predictions(X=X, T=T,  n_samples=n_samples)

In [20]:
from eig_comp_utils import predictions_in_EIG_obs_form,compute_EIG_obs_from_samples

In [21]:
pred_in_form = predictions_in_EIG_obs_form(predicitions.T,n_outer_expectation=n_samples_outer_expectation,m_inner_expectation=n_samples_inner_expectation)

In [30]:
compute_EIG_obs_from_samples(pred_in_form,1)

2.1825973409419532

In [31]:
bcf.samples_obs_EIG(X_host_np,T_host_np,n_samples_inner_expectation=50,n_samples_outer_expectation=50)

1.2830443155226305

In [25]:
pred = pred.T 

In [26]:
(pred- Y_host_np).shape

(150, 100)

In [27]:
T = np.random.randint(0,2,size=100)

In [28]:
T

array([0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0])

In [29]:
pred[:,:,1]

IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed

In [None]:
pred[np.arange((pred.shape[0])),:,T]

array([[-0.27826393, -0.26635271, -0.26018044, ..., -0.24245816,
        -0.23516247, -0.24750389],
       [-0.31057554, -0.1626953 , -0.24562265, ..., -0.24173723,
        -0.19809073, -0.27968158],
       [-0.0212074 , -0.16190154, -0.23623946, ..., -0.23977634,
        -0.21797352, -0.28044146],
       ...,
       [-0.08591532, -0.31005134, -0.23185605, ..., -0.2383347 ,
        -0.25160518, -0.26212767],
       [-0.0473603 , -0.02099823,  0.01353105, ...,  0.00777841,
        -0.16268113, -0.00635422],
       [-0.14408347, -0.07653664, -0.23156936, ..., -0.23383851,
        -0.23836371, -0.2869225 ]])

In [None]:
pred[:,:,0].shape

(100, 150)

In [None]:
bcf.model.fit(
        x_t=bcf.X_train, # Covariates treatment effect
        x=bcf.X_train_prog, # Covariates outcome (including propensity score)
        y=bcf.Y_train,  # Outcome
        z=bcf.T_train, # Treatment group
        )

XBCF(num_sweeps = 100, burnin = 15, max_depth = 250, Nmin = 1, num_cutpoints = 100, no_split_penality = 4.605170185988092, mtry_pr = 20, mtry_trt = 20, p_categorical_pr = 0, p_categorical_trt = 0, num_trees_pr = 30, alpha_pr = 0.95, beta_pr = 1.25, tau_pr = 0.018378095626831056, kap_pr = 16.0, s_pr = 4.0, pr_scale = False, num_trees_trt = 10, alpha_trt = 0.25, beta_trt = 3.0, tau_trt = 0.009189047813415528, kap_trt = 16.0, s_trt = 4.0, trt_scale = False, verbose = False, parallel = False, set_random_seed = False, random_seed = 0, sample_weights_flag = True, a_scaling = True, b_scaling = True)

In [None]:
pred_0.shape

(100, 100)

In [None]:
np.expand_dims(pred_0,axis=2).shape

(100, 100, 1)

In [None]:
np.concatenate([np.expand_dims(pred_1,axis=2),np.expand_dims(pred_1,axis=2)],axis = 2)

array([[[ 0.04913624,  0.04913624],
        [ 0.00871902,  0.00871902],
        [-0.51544197, -0.51544197],
        ...,
        [-0.04524   , -0.04524   ],
        [ 0.14792401,  0.14792401],
        [-0.04184422, -0.04184422]],

       [[ 0.22082462,  0.22082462],
        [-0.0692701 , -0.0692701 ],
        [ 0.00254678,  0.00254678],
        ...,
        [-0.17167079, -0.17167079],
        [ 0.43322948,  0.43322948],
        [-0.15339206, -0.15339206]],

       [[-0.0161834 , -0.0161834 ],
        [-0.14554069, -0.14554069],
        [-0.08925596, -0.08925596],
        ...,
        [-0.14696226, -0.14696226],
        [ 0.22852622,  0.22852622],
        [-0.12987373, -0.12987373]],

       ...,

       [[ 0.1145945 ,  0.1145945 ],
        [ 0.16093073,  0.16093073],
        [ 0.25959919,  0.25959919],
        ...,
        [ 0.35443366,  0.35443366],
        [ 0.23965383,  0.23965383],
        [ 0.42399624,  0.42399624]],

       [[ 0.34570552,  0.34570552],
        [ 0.18532383,  0.18

In [None]:
bcf.samples_obs_EIG(X_host_np,100,100)

IndexError: tuple index out of range