$
\newcommand{\ELBO}{\text{ELBO}}
\newcommand{\EE}{\mathbb{E}}
\newcommand{\bs}[1]{\boldsymbol{#1}}
\newcommand{\ud}{\,\text{d}}
\newcommand{\CN}{\mathcal{N}}
\newcommand{\CB}{\mathcal{B}}
\newcommand{\aux}{\text{aux}}
\newcommand{\do}{do}
\newcommand{\Xmat}{\boldsymbol{X}}
\newcommand{\zv}{\bs{z}}
\newcommand{\xv}{\bs{x}}
\newcommand{\Xv}{\bs{X}}
\newcommand{\Yv}{\bs{Y}}
\newcommand{\BD}{\mathbb{D}}
\newcommand{\KL}{\text{KL}}
$

## T-learner BART baseline for IHDP
* Please install bartpy first
```
pip3 install git+https://github.com/JakeColtman/bartpy.git@ReadOneTrees --upgrade
```

In [1]:
import os
import sys

from time import time

import math

import numpy as np

import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class flags:
    
    # dim = 2
    
    x_dim = 25
    y_dim = 1
    t_dim = 2
    # M = 100
    M = 30
    
    # optimization
    learning_rate = 1e-3 # Base learning rate
    lr_decay = 0.999995 # Learning rate decay, applied every step of the optimization
    
    batch_size = 128 # Batch size during training per GPU
    hidden_size = 2
    
    
FLAGS = flags()
args = FLAGS


In [3]:
def onehot(t,dim):
    
    m_samples = t.shape[0]
    tt = np.zeros([m_samples,dim])
    
    for i in range(m_samples):
        tt[i,np.int(t[i])] = 1
        
    return tt

In [4]:
def eval_pehe(tau_hat,tau):
    return np.sqrt(np.mean(np.square(tau-tau_hat)))

In [5]:
def load_ihdp(trial_id=0,filepath='./data/',istrain=True):
    
    if istrain:
        data_file = filepath+'ihdp_npci_1-1000.train.npz'
    else:
        data_file = filepath+'ihdp_npci_1-1000.test.npz'
        
    data = np.load(data_file)
    
    x = data['x'][:,:,trial_id]
    y = data['yf'][:,trial_id]
    t = data['t'][:,trial_id]
    ycf = data['ycf'][:,trial_id]
    mu0 = data['mu0'][:,trial_id]
    mu1 = data['mu1'][:,trial_id]
    
    return x,y,t,ycf,mu0,mu1

In [6]:
# Loading normalized IHDP example
data = np.load('../ihdp_example.npy',allow_pickle=True).item()
X = data['X'] # Confounder proxiess
Y = data['Y'] # Factual outcomes
T = data['T'] # Treatment
Tau = data['Tau']  # Ground truth ITE

n_samples = X.shape[0]

In [7]:
t1_ind = T[:,1]==1   # find which column has the treatment == 1
t0_ind = T[:,0]==1 

n0 = np.sum(t0_ind)
n1 = np.sum(t1_ind)

X0 = X[t0_ind]
X1 = X[t1_ind]

Y0 = Y[t0_ind]
Y1 = Y[t1_ind]

### Training

In [9]:
def show_results():
    
    return ;

In [10]:
from bartpy.sklearnmodel import SklearnModel

In [11]:
Y0 = Y0.reshape([-1,])
Y1 = Y1.reshape([-1,])

In [16]:
n_trees = 10

model0 = SklearnModel(n_trees=n_trees) # Use default parameters
model0.fit(X0, Y0) # Fit the model
model1 = SklearnModel(n_trees=n_trees) # Use default parameters
model1.fit(X1, Y1) # Fit the model

SklearnModel(alpha=0.95, beta=2.0, initializer=None, n_burn=200, n_chains=4,
             n_jobs=-1, n_samples=200, n_trees=10, sigma_a=0.001, sigma_b=0.001,
             store_acceptance_trace=False, store_in_sample_predictions=False,
             thin=0.1,
             tree_sampler=<bartpy.samplers.unconstrainedtree.treemutation.UnconstrainedTreeMutationSampler object at 0x7f04d6bde6a0>)

### Evaluation

In [17]:
tau_hat = model1.predict(X) - model0.predict(X)
# tau_hat_val = model1.predict(X_val) - model0.predict(X_val)
# tau_hat_test = model1.predict(X_test) - model0.predict(X_test)

pehe_ls = eval_pehe(tau_hat, Tau)

print(pehe_ls)

0.5037417739801686


In [None]:
_ = plt.hist(tau_hat,bins=20,range=[-1,5])
_ = plt.hist(Tau,bins=20,range=[-1,5])