In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
import random 
import numpy.random as rand
from random import randrange
from scipy.stats import bernoulli, binom
from sklearn.linear_model import LinearRegression
import scipy.linalg as spla

# 3. Generate simulation study 

## 3.1. RCT simulation

In [6]:
# Observations

random.seed(10)
n = 100

In [7]:
def mutlivariate_normal_sampler(mean,covariance,n_samples):
  # compute cholesky decomposition of covariance matrix
  L = spla.cholesky(covariance)
  # Generate white guassian noise, 
  Z = np.random.normal(size=(n_samples,covariance.shape[0]))
  return Z.dot(L)+mean


In [8]:
# Generate variables (Measured covariates, unmeasured covariates , binary treatment and assignment).

d_rct1 = pd.DataFrame()

# Lalonde dataset
df = pd.read_stata("/Users/mawuliagamah/gitprojects/causal_inference/causal_inference/nsw.dta")
del df['data_id']
del df['treat']
del df['re78']

# Get covariance matrix from data
cov_matrix = df.cov()

# Generate Pre-treatment covariates

mu = 0 # mean 
sigma = 1 #v ariance

cov_array = mutlivariate_normal_sampler(mu,cov_matrix,n_samples=n) # Array of covariance from lalonde 
row_index = pd.RangeIndex(range(cov_array.shape[0]))
col_index = pd.RangeIndex(range(cov_array.shape[1]))
df_rct = pd.DataFrame(data=cov_array, index = row_index,columns= col_index)
df_rct.columns = ['X1', 'X2', 'X3', 'X4', 'X5','X6','X7']

# Binary treatment assignment W (drawn from bernoulli dsitribution with p = 0.5 )
p_bernoulli = 0.5
W_uniform = np.random.rand(n) #treatment from uniform distribution
df_rct['W'] = (W_uniform <= p_bernoulli).astype(np.int32) # transform to bernoulli

# Error term 
epsilon_rct = np.random.normal(mu,sigma , size = (n,))

het = 1 + df_rct['X1'] + df_rct['X1']**2 + df_rct['X2'] + df_rct['X2']**2 # Heterogenity of treatment effect
df_rct['Y'] = df_rct['W']*het+ df_rct.sum(axis=1) + epsilon_rct 




In [9]:
#Plot correlation matrix for the simulated RCT 
corr = df_rct.corr()
corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,W,Y
X1,1.0,-0.159494,0.107157,-0.141343,0.316716,-0.074348,-0.023223,0.132173,-0.021362
X2,-0.159494,1.0,0.017295,-0.112525,-0.109765,-0.594434,0.19063,-0.199879,0.190339
X3,0.107157,0.017295,1.0,-0.70245,-0.124924,0.033305,-0.220869,-0.146272,-0.220279
X4,-0.141343,-0.112525,-0.70245,1.0,0.145256,0.018101,0.0083,0.181113,0.007505
X5,0.316716,-0.109765,-0.124924,0.145256,1.0,-0.03524,0.201292,0.048719,0.20195
X6,-0.074348,-0.594434,0.033305,0.018101,-0.03524,1.0,-0.019249,0.205234,-0.018571
X7,-0.023223,0.19063,-0.220869,0.0083,0.201292,-0.019249,1.0,0.001062,0.999957
W,0.132173,-0.199879,-0.146272,0.181113,0.048719,0.205234,0.001062,1.0,0.005724
Y,-0.021362,0.190339,-0.220279,0.007505,0.20195,-0.018571,0.999957,0.005724,1.0


## 3.2 Simulate ODB