# Data Simulation 

## Define Functions

In [2]:
import numpy as np
from random import choice

In [3]:
# Generate d-dimensional feature vector
def simulate_feature_vec(n, d, a=2):
    # Random states
    def get_states(random_state, low, high, size):
        rs = np.random.RandomState(random_state)
        states = rs.randint(low=low, high=high, size=size)
        return states
    states1 = get_states(random_state=42, low=0, high=100000, size=d)
    states2 = get_states(random_state=1028, low=0, high=100000, size=d)    
    # generate one sequence for raw covariance matrice
    def generate_seq(i):
        np.random.seed(states1[i])
        a = np.random.randn(d)
        np.random.seed(states2[i])
        b = np.random.randn(1)
        return a+b    
    # Generate random covariance matrice
    A = np.matrix([generate_seq(i) for i in range(d)])
    A = A*np.transpose(A)
    D_half = np.diag(np.diag(A)**(-0.5))
    C = D_half*A*D_half
    # Generate d-dimensional feature vector
    mean = np.zeros(d)
    cov = C
    x = np.random.multivariate_normal(mean, cov, n) 
    return x # shape (n,d)

In [4]:
# Generate potential outcomes
def simulate_y1(n,d):
    # Generate error term matrice
    e1 = np.random.randn(n)
    e0 = np.random.randn(n)
    # Generate mu
    beta = np.random.uniform(-5,5,d)
    mu0 = np.dot(x, beta) +5*(x[:,0] > 0.5) 
    mu1 = mu0 + 8*(x[:,1] > 0.1)
    # Calculate y
    y1 = mu1 + e1
    y0 = mu0 + e0
    return y1,y0 # y1:treatement group #y0:control group  

In [5]:
# Generate potential outcomes
def simulate_y2(n,d):
    # Generate error term matrice
    e1 = np.random.randn(n)
    e0 = np.random.randn(n)
    # Generate mu
    beta0 = np.random.uniform(1,30,d)
    beta1 = np.random.uniform(1,30,d)
    mu0 = np.dot(x, beta0)
    mu1 = np.dot(x, beta1)
    # Calculate y
    y1 = mu1 + e1
    y0 = mu0 + e0
    return y1,y0 # y1:treatement group #y0:control group  

In [17]:
# Generate potential outcomes
def simulate_y3(n,d):
    # Generate error term matrice
    e1 = np.random.randn(n)
    e0 = np.random.randn(n)
    # Generate mu
    effect = 4 / (1+np.exp(-12 * ((x[:,0]-1)/2))) * (1+np.exp(-12 * ((x[:,1]-1)/2)))
    mu1 = 0.5 * effect
    mu0 = -mu1
    # Calculate y
    y1 = mu1 + e1
    y0 = mu0 + e0
    return y1,y0 # y1:treatement group #y0:control group  

In [7]:
# Generate potential outcomes
def simulate_y4(n,d):
    # Generate error term matrice
    e1 = np.random.randn(n)
    e0 = np.random.randn(n)
    # Generate mu
    beta = np.random.uniform(1,30,d)
    mu0 = np.dot(x, beta)
    mu1 = mu0
    # Calculate y
    y1 = mu1 + e1
    y0 = mu0 + e0
    return y1,y0 # y1:treatement group #y0:control group  

In [31]:
# Generate potential outcomes
def simulate_y5(n,d):
    # Generate error term matrice
    e1 = np.random.randn(n)
    e0 = np.random.randn(n)
    # Generate mu
    beta = np.random.uniform(-15,15,d)
    beta_m_dim = min(d,5)
    beta_m = beta[0:beta_m_dim]
    mu0 = np.zeros(n)
    mu0[x[:,19] < -0.4] = np.dot(x[x[:,19] < 0.-0.4][:,0:beta_m_dim],beta_m)
    mu0[(x[:,19] < 0.4) & (x[:,19] >= -0.4)] = np.dot(x[(x[:,19] < 0.4) & (x[:,19] >= -0.4)][:,beta_m_dim:(2*beta_m_dim)],beta_m)
    mu0[x[:,19] >= 0.4] = np.dot(x[x[:,19] >= 0.4][:,2*beta_m_dim:3*beta_m_dim],beta_m)
    mu0 = np.dot(x, beta)
    mu1 = mu0
    # Calculate y
    y1 = mu1 + e1
    y0 = mu0 + e0
    return y1,y0 # y1:treatement group #y0:control group  

In [36]:
# Generate potential outcomes
def simulate_y6(n,d):
    # Generate error term matrice
    e1 = np.random.randn(n)
    e0 = np.random.randn(n)
    # Generate mu
    mu0 = 2 * x[:,0] - 1
    mu1 = mu0
    # Calculate y
    y1 = mu1 + e1
    y0 = mu0 + e0
    return y1,y0 # y1:treatement group #y0:control group 

In [89]:
# Generate treatment assignment
def simulate_assignment(propensity):
    w = np.random.binomial(1,propensity,n)
    return w

# Different propensity scores
def simulate_assignment_y6(n):
    prop_list = 1/4 * (1+ np.random.beta(2,4,n))
    w = np.random.binomial(1,prop_list,n)
    return w

In [10]:
# Generate assigned y
def generate_assigned_y(y1,y0,w):
    y_obs = y1*w - y0*(w-1)
    return y_obs

In [11]:
# Calculate effect
def calculate_effect(y1, y0):
    eff_act = y1 - y0
    return eff_act

## Give parameters and generate data

In [45]:
# Simulation_1_a: Unbalanced
# Parameters
n = 500000 # number of samples 
d = 20 # dimentionality of Xi
a = 2 # Concentration of covariance matrix 1, -1,0
propensity = 0.01 # propensity score of Bernouli distribution of Y

# Generate data
x = simulate_feature_vec(n,d) 
y1, y0 = simulate_y1(n,d)
w = simulate_assignment(propensity)
y_obs = generate_assigned_y(y1,y0,w)
eff_act = calculate_effect(y1, y0)

In [14]:
# Simulation_1_b: Unbalanced
# Parameters
n = 500000 # number of samples 
d = 20 # dimentionality of Xi
a = 2 # Concentration of covariance matrix 1, -1,0
propensity = 0.5 # propensity score of Bernouli distribution of Y

# Generate data
x = simulate_feature_vec(n,d) 
y1, y0 = simulate_y1(n,d)
w = simulate_assignment(propensity)
y_obs = generate_assigned_y(y1,y0,w)
eff_act = calculate_effect(y1, y0)

In [15]:
# Simulation_2
# Parameters
n = 500000 # number of samples 
d = 20 # dimentionality of Xi
a = 2 # Concentration of covariance matrix 1, -1,0
propensity = 0.5 # propensity score of Bernouli distribution of Y

# Generate data
x = simulate_feature_vec(n,d) 
y1, y0 = simulate_y2(n,d)
w = simulate_assignment(propensity)
y_obs = generate_assigned_y(y1,y0,w)
eff_act = calculate_effect(y1, y0)

In [18]:
# Simulation_3
# Parameters
n = 500000 # number of samples 
d = 20 # dimentionality of Xi
a = 2 # Concentration of covariance matrix 1, -1,0
propensity = 0.5 # propensity score of Bernouli distribution of Y

# Generate data
x = simulate_feature_vec(n,d) 
y1, y0 = simulate_y3(n,d)
w = simulate_assignment(propensity)
y_obs = generate_assigned_y(y1,y0,w)
eff_act = calculate_effect(y1, y0)

In [19]:
# Simulation_4
# Parameters
n = 500000 # number of samples 
d = 20 # dimentionality of Xi
a = 2 # Concentr ation of covariance matrix 1, -1,0
propensity = 0.5 # propensity score of Bernouli distribution of Y

# Generate data
x = simulate_feature_vec(n,d) 
y1, y0 = simulate_y4(n,d)
w = simulate_assignment(propensity)
y_obs = generate_assigned_y(y1,y0,w)
eff_act = calculate_effect(y1, y0)

In [32]:
# Simulation_5
# Parameters
n = 500000 # number of samples 
d = 20 # dimentionality of Xi
a = 2 # Concentration of covariance matrix 1, -1,0
propensity = 0.5 # propensity score of Bernouli distribution of Y

# Generate data
x = simulate_feature_vec(n,d) 
y1, y0 = simulate_y5(n,d)
w = simulate_assignment(propensity)
y_obs = generate_assigned_y(y1,y0,w)
eff_act = calculate_effect(y1, y0)

In [90]:
# Simulation_6
# Parameters
n = 500000 # number of samples 
d = 20 # dimentionality of Xi
a = 2 # Concentration of covariance matrix 1, -1,0

# Generate Data
x = np.random.uniform(0,1,(n,d)) 
y1, y0 = simulate_y6(n,d)
w = simulate_assignment_y6(n)
y_obs = generate_assigned_y(y1,y0,w)
eff_act = calculate_effect(y1, y0)