In [25]:
import numpy as np
from random import choice

def parse():
    ap = argparse.ArgumentParser() 
    ap.add_argument("-size_input",action ='store',dest = 'n', default = 500000, help ="data input size")
    ap.add_argument("-num_dim",action ='store',dest = 'd',default = 20,help ="dimention of each instance")
    ap.add_argument("-condition_sim",action ='store',dest = 'sim',help ="simulation condition")
    arg = ap.parse_args()
    return arg

class simulate_data():

    def __init__(self, n,d,sim):
        super().__init__()
        self.n = n
        self.d = d
        self.sim = sim
        
    def simulate_feature_vec(self, a=2):
        if self.sim == 6:
            self.x = x = np.random.uniform(0,1,(self.n,self.d))
        else:
            # Random states
            def get_states(random_state, low, high, size):
                rs = np.random.RandomState(random_state)
                states = rs.randint(low=low, high=high, size=size)
                return states 
            self.states1 = get_states(random_state=42, low=0, high=100000, size=self.d)
            self.states2 = get_states(random_state=1028, low=0, high=100000, size=self.d)
            # generate one sequence for raw covariance matrice
            def generate_seq(i):
                np.random.seed(self.states1[i])
                a = np.random.randn(self.d)
                np.random.seed(self.states2[i])
                b = np.random.randn(1)
                return a+b 
            # Generate random covariance matrice
            A = np.matrix([generate_seq(i) for i in range(self.d)])
            A = A*np.transpose(A)
            D_half = np.diag(np.diag(A)**(-0.5))
            C = D_half*A*D_half
            # Generate d-dimensional feature vector
            mean = np.zeros(self.d)
            cov = C
            self.x = np.random.multivariate_normal(mean, cov, self.n) # shape (n,d)

    def generate_y_act(self):
        
        def simulate_y1():
            # Generate error term matrice
            e1 = np.random.randn(self.n)
            e0 = np.random.randn(self.n)
            # Generate mu
            beta = np.random.uniform(-5,5,self.d)
            mu0 = np.dot(self.x, beta) +5*(self.x[:,0] > 0.5) 
            mu1 = mu0 + 8*(self.x[:,1] > 0.1)
            # Calculate y
            self.y1 = mu1 + e1 # y1:treatement group #y0:control group  
            self.y0 = mu0 + e0

        def simulate_y2():
            # Generate error term matrice
            e1 = np.random.randn(self.n)
            e0 = np.random.randn(self.n)
            # Generate mu
            beta0 = np.random.uniform(1,30,self.d)
            beta1 = np.random.uniform(1,30,self.d)
            mu0 = np.dot(self.x, beta0)
            mu1 = np.dot(self.x, beta1)
            # Calculate y
            self.y1 = mu1 + e1
            self.y0 = mu0 + e0

        def simulate_y3():
            # Generate error term matrice
            e1 = np.random.randn(self.n)
            e0 = np.random.randn(self.n)
            # Generate mu
            effect = 4 / (1+np.exp(-12 * ((self.x[:,0]-1)/2))) * (1+np.exp(-12 * ((self.x[:,1]-1)/2)))
            mu1 = 0.5 * effect
            mu0 = -mu1
            # Calculate y
            self.y1 = mu1 + e1
            self.y0 = mu0 + e0

        def simulate_y4():
            # Generate error term matrice
            e1 = np.random.randn(self.n)
            e0 = np.random.randn(self.n)
            # Generate mu
            beta = np.random.uniform(1,30,self.d)
            mu0 = np.dot(self.x, beta)
            mu1 = mu0
            # Calculate y
            self.y1 = mu1 + e1
            self.y0 = mu0 + e0

        def simulate_y5():
            # Generate error term matrice
            e1 = np.random.randn(self.n)
            e0 = np.random.randn(self.n)
            # Generate mu
            beta = np.random.uniform(-15,15,self.d)
            beta_m_dim = min(self.d,5)
            beta_m = beta[0:beta_m_dim]
            mu0 = np.zeros(self.n)
            mu0[self.x[:,19] < -0.4] = np.dot(self.x[self.x[:,19] < 0.-0.4][:,0:beta_m_dim],beta_m)
            mu0[(self.x[:,19] < 0.4) & (self.x[:,19] >= -0.4)] = np.dot(self.x[(self.x[:,19] < 0.4) & (self.x[:,19] >= -0.4)][:,beta_m_dim:(2*beta_m_dim)],beta_m)
            mu0[self.x[:,19] >= 0.4] = np.dot(self.x[self.x[:,19] >= 0.4][:,2*beta_m_dim:3*beta_m_dim],beta_m)
            mu0 = np.dot(self.x, beta)
            mu1 = mu0
            # Calculate y
            self.y1 = mu1 + e1
            self.y0 = mu0 + e0
        
        def simulate_y6():
            # Generate error term matrice
            e1 = np.random.randn(self.n)
            e0 = np.random.randn(self.n)
            # Generate mu
            mu0 = 2 * self.x[:,0] - 1
            mu1 = mu0
            # Calculate y
            self.y1 = mu1 + e1
            self.y0 = mu0 + e0
    
        if self.sim == 0:
            simulate_y1()
        elif self.sim == 1:
            simulate_y1()
        elif self.sim == 2:
            simulate_y2()
        elif self.sim == 3:
            simulate_y3()
        elif self.sim == 4:
            simulate_y4()
        elif self.sim == 5:
            simulate_y5()
        elif self.sim == 6:
            simulate_y6()
    
    def simulate_assignment(self):
        if self.sim == 6:
            # Different propensity scores
            prop_list = 1/4 * (1+ np.random.beta(2,4,self.n))
            self.w = np.random.binomial(1,prop_list,self.n)
        if self.sim == 0:
            self.w = np.random.binomial(1,0.01,self.n)
        else:
            self.w = np.random.binomial(1,0.5,self.n)
        
    def generate_assigned_y(self):
        self.y_obs = self.y1*self.w - self.y0*(self.w-1)
        
    def calculate_effect(self):
        self.eff_act = self.y1 - self.y0

In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [28]:
# Split data into training and testing set of assgined groups
def split_data(n_train, n_test, x, y_obs, eff_act,w):
    x_train, x_test = x[:n_train,:], x[n_train:n_train+n_test,:]
    y_obs_train, y_obs_test =  y_obs[:n_train], y_obs[n_train:n_train+n_test]
    eff_actual_train, eff_actual_test = eff_act[:n_train], eff_act[n_train:n_train+n_test]
    w_train, w_test = w[:n_train], w[n_train:n_train+n_test]
    return x_train, x_test, y_obs_train, y_obs_test, eff_actual_train, eff_actual_test, w_train, w_test
# x_trt_train, x_trt_test = x_train[np.where(w == 1)], x_train[np.where(w == 0)]

In [29]:
def evaluate_inf(outputs, target):
    mse = (np.square(outputs - target)).mean(axis=None)
    return mse

In [30]:
# Define forward process
class M(nn.Module):
    def __init__(self, n_feature,n_hidden, n_output = 1):
        super(M, self).__init__() 
        self.hidden = nn.Linear(n_feature, n_hidden)
        self.predict = nn.Linear(n_hidden, n_output)

    def forward(self, x):
        # Here the forward pass is simply a linear function
        x = F.relu(self.hidden(x))
        x = self.predict(x)
        
        return x

In [181]:
a =simulate_data(500000, 20, 6) 
a.simulate_feature_vec()
a.generate_y_act()
a.simulate_assignment()
a.generate_assigned_y()
a.calculate_effect()

In [239]:
n_train = 200000
n_test = 2000
x = a.x
y_obs = a.y_obs
eff_act = a.eff_act
w = a.w

In [240]:
x_train, x_test, y_obs_train, y_obs_test, eff_act_train, eff_act_test,w_train, w_test = split_data(n_train, n_test, x, y_obs, eff_act,w)

In [241]:
x_trt_train, x_trt_test = x_train[np.where(w_train == 1)], x_test[np.where(w_test == 1)]
y_obs_trt_train, y_obs_trt_test = y_obs_train[np.where(w_train == 1)], y_obs_test[np.where(w_test == 1)]

x_con_train, x_con_test = x_train[np.where(w_train == 0)], x_test[np.where(w_test == 0)]
y_obs_con_train, y_obs_con_test = y_obs_train[np.where(w_train == 0)], y_obs_test[np.where(w_test == 0)]

## S-NN

In [242]:
n_feature = 21 # dimentionality of Xi
n_hidden = 40
n_output = 1 # Dimensionality of Yi
l_rate = 0.01
epochs = 300

In [243]:
# Train M1 (treatment group)
# Define model, loss function and optimizer
M_model1 = M(n_feature,n_hidden, n_output)
criterion = torch.nn.MSELoss()
optimizer1 = torch.optim.SGD(M_model1.parameters(), lr = l_rate) #Stochastic Gradient Descent

x = torch.from_numpy(x_train).float()
w = torch.from_numpy(w_train).float().unsqueeze(1)
x_1 = torch.cat((x,w),1)
y_1 = torch.from_numpy(y_obs_train).float().unsqueeze(1)
# Training
for epoch in range(epochs):
    outputs = M_model1(x_1) 
    loss = criterion(outputs, y_1)

    optimizer1.zero_grad() # clear gradients for next train
    loss.backward() # backprops
    optimizer1.step() # update gradients

    if epoch % 100 == 0:
    # plot and show learning process
#        plt.cla()
#        plt.scatter(x.data.numpy(), y.data.numpy())
#        plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
#        plt.text(0.5, 0, 'Loss=%.4f' % loss.data.numpy(), fontdict={'size': 20, 'color':  'red'})
#        plt.pause(0.1)
        print('epoch {}, loss {}'.format(epoch,loss.data[0]))



epoch 0, loss 1.354793667793274
epoch 100, loss 1.3036384582519531
epoch 200, loss 1.26758873462677


In [244]:
# Transform input and target to tensor and reshape
x = torch.from_numpy(x_train).float()
w_1 = np.ones(n_train)
w_1 = torch.from_numpy(w_1).float().unsqueeze(1)
x_1 = torch.cat((x,w_1),1)

x = torch.from_numpy(x_train).float()
w_0 = np.zeros(n_train)
w_0 = torch.from_numpy(w_0).float().unsqueeze(1)
x_0 = torch.cat((x,w_0),1)

In [245]:
mu1 = M_model1(x_1)
mu0 = M_model0(x_0)

In [246]:
outputs = mu1-mu0
outputs = outputs.data.numpy().reshape((n_train,))
target = eff_act_train

mse = evaluate_inf(outputs, target)

In [247]:
mse

66.02627620483156

## S-BART