# Step 0: Import Required Packages 

#### If the cell below doesn't run then do 'pip install rpy2' or 'conda install -c r rpy2' and 'conda install tzlocal' in Anaconda Prompt
#### Also, run pip install CausalInference or conda install -c conda-forge dowhy if you're using Anaconda Prompt
#### Change the paths for os.environ below to match your R folder directory and version

In [1]:
import os
import rpy2

try:
    import rpy2.robjects as robjects
except:
    os.environ["R_HOME"] = r"C:\Program Files\R\R-4.0.2"
    os.environ["PATH"]   = r"C:\Program Files\R\R-4.0.2\bin\x64" + ";" + os.environ["PATH"]
    import rpy2.robjects as robjects
    
import rpy2.robjects.packages as rpackages
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.vectors import StrVector
from rpy2.robjects import FloatVector, Formula

#### Run pip install tabulate 
#### Run 'from causalinference import CausalModel' below if you used pip install, or 'from dowhy import CausalModel' if you used conda install above

In [2]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import mahalanobis
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
import math
from sklearn.metrics import pairwise_distances
from sklearn.model_selection import GridSearchCV
import time

from causalinference import CausalModel 
#from dowhy import CausalModel
from IPython.display import HTML, display
import tabulate

# Step 1: Read the files

In [3]:
lowDim_dataset = pd.read_csv('../data/lowDim_dataset.csv')
highDim_dataset = pd.read_csv('../data/highDim_dataset.csv')

lowDim_true_ATE = 2.5
highDim_true_ATE = -3

# Step 2: Calculate Propensity and Linear Propensity Scores

In [4]:
#low dim grid search (commented out since it takes a few minutes to run)

#X=lowDim_dataset.iloc[:,2:].values
#A=lowDim_dataset['A'].values
#Y=lowDim_dataset['Y'].values

#params = {'learning_rate':[0.01,0.05,0.1,0.5], 'max_depth': [1,2,3,4], 'n_estimators':[50,100,150],
#          'min_samples_leaf':[1,3,5],'min_samples_split':[2,4,6]}
#gscv = GridSearchCV(GradientBoostingClassifier(),params,cv=5).fit(X,A)
#gscv.best_params_

#output: {'learning_rate': 0.01,
# 'max_depth': 2,
# 'min_samples_leaf': 1,
# 'min_samples_split': 2,
# 'n_estimators': 150}

In [5]:
#high dim grid search (commented out since it takes a few minutes to run)

#X=highDim_dataset.iloc[:,2:].values
#A=highDim_dataset['A'].values
#Y=highDim_dataset['Y'].values

#params = {'learning_rate':[0.01,0.05,0.1,0.5], 'max_depth': [1,2,3,4], 'n_estimators':[50,100,150],
#          'min_samples_leaf':[1,3,5],'min_samples_split':[2,4,6]}
#gscv = GridSearchCV(GradientBoostingClassifier(),params,cv=5).fit(X,A)
#gscv.best_params_


#output: {'learning_rate': 0.05,
# 'max_depth': 1,
# 'min_samples_leaf': 5,
# 'min_samples_split': 2,
# 'n_estimators': 100}

In [6]:
def logit(x):
    return math.log(x/(1-x))

### Low Dimensional Dataset

In [7]:
X=lowDim_dataset.iloc[:,2:].values
A=lowDim_dataset['A'].values
Y=lowDim_dataset['Y'].values

gbm = GradientBoostingClassifier(learning_rate = 0.01, max_depth = 2, min_samples_leaf = 1,
                                min_samples_split = 2, n_estimators = 150).fit(X,A)

low_dim_propensity_scores = [x[1] for x in gbm.predict_proba(X)]
low_dim_linear_propensity_scores = [logit(x) for x in low_dim_propensity_scores]

In [8]:
lowDim_dataset_propensity = lowDim_dataset.copy(deep=True)
lowDim_dataset_propensity['propensity_score'] = low_dim_propensity_scores

In [9]:
lowDim_dataset_linear_propensity = lowDim_dataset.copy(deep=True)
lowDim_dataset_linear_propensity['linear_propensity_score'] = low_dim_linear_propensity_scores

In [10]:
pd.DataFrame({'propensity_scores':low_dim_propensity_scores}).to_csv('../output/low_dim_propensity_scores.csv')
pd.DataFrame({'linear_propensity_scores':low_dim_linear_propensity_scores}).to_csv('../output/low_dim_linear_propensity_scores.csv')

### High Dimensional Dataset

In [11]:
X=highDim_dataset.iloc[:,2:].values
A=highDim_dataset['A'].values
Y=highDim_dataset['Y'].values

gbm = GradientBoostingClassifier(learning_rate = 0.05, max_depth = 1, min_samples_leaf = 5,
                                min_samples_split = 2, n_estimators = 100).fit(X,A)

high_dim_propensity_scores = [x[1] for x in gbm.predict_proba(X)]
high_dim_linear_propensity_scores = [logit(x) for x in high_dim_propensity_scores]

In [12]:
highDim_dataset_propensity = highDim_dataset.copy(deep=True)
highDim_dataset_propensity['propensity_score'] = high_dim_propensity_scores

In [13]:
highDim_dataset_linear_propensity = highDim_dataset.copy(deep=True)
highDim_dataset_linear_propensity['linear_propensity_score'] = high_dim_linear_propensity_scores

In [14]:
pd.DataFrame({'propensity_scores':high_dim_propensity_scores}).to_csv('../output/high_dim_propensity_scores.csv')
pd.DataFrame({'linear_propensity_scores':high_dim_linear_propensity_scores}).to_csv('../output/high_dim_linear_propensity_scores.csv')

# Step 3: Perform Full Matching

### True ATE: 2.5 for low dim and -3 for high dim

### Set Up rpy2 (Python Interface to R)

In [15]:
%%capture 
utils = importr('utils')
utils.chooseCRANmirror(ind=1)
packnames = ('optmatch')


In [16]:
names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(names_to_install) > 0:
    utils.install_packages(StrVector(names_to_install))

In [17]:
%%capture 
utils.chooseCRANmirror(ind=1)
robjects.r(f'install.packages("{"optmatch"}")')

In [18]:
optmatch = rpackages.importr('optmatch')

### Convert Python Dataframes to Python Dataframes

In [19]:
with localconverter(robjects.default_converter + pandas2ri.converter):
    try:
        lowDim_R_runtime = time.time()
        lowDim_dataset_R = robjects.conversion.py2rpy(lowDim_dataset)
        lowDim_R_runtime = time.time()-lowDim_R_runtime
        
        lowDim_propensity_R_runtime = time.time()
        lowDim_dataset_propensity_R = robjects.conversion.py2rpy(lowDim_dataset_propensity)
        lowDim_propensity_R_runtime = time.time()-lowDim_propensity_R_runtime
        
        lowDim_linear_propensity_R_runtime = time.time()
        lowDim_dataset_linear_propensity_R = robjects.conversion.py2rpy(lowDim_dataset_linear_propensity)
        lowDim_linear_propensity_R_runtime = time.time()-lowDim_linear_propensity_R_runtime
        
    except:
        lowDim_R_runtime = time.time()
        lowDim_dataset_R = pandas2ri.py2ri(lowDim_dataset)
        lowDim_R_runtime = time.time()-lowDim_R_runtime
        
        lowDim_propensity_R_runtime = time.time()
        lowDim_dataset_propensity_R = pandas2ri.py2ri(lowDim_dataset_propensity)
        lowDim_propensity_R_runtime = time.time()-lowDim_propensity_R_runtime
        
        lowDim_linear_propensity_R_runtime = time.time()
        lowDim_dataset_linear_propensity_R = pandas2ri.py2ri(lowDim_dataset_linear_propensity)
        lowDim_linear_propensity_R_runtime = time.time()-lowDim_linear_propensity_R_runtime

In [20]:
with localconverter(robjects.default_converter + pandas2ri.converter):
    try:
        highDim_R_runtime = time.time()
        highDim_dataset_R = robjects.conversion.py2rpy(highDim_dataset)
        highDim_R_runtime = time.time()-highDim_R_runtime
        
        highDim_propensity_R_runtime = time.time()
        highDim_dataset_propensity_R = robjects.conversion.py2rpy(highDim_dataset_propensity)
        highDim_propensity_R_runtime = time.time()-highDim_propensity_R_runtime
        
        highDim_linear_propensity_R_runtime = time.time()
        highDim_dataset_linear_propensity_R = robjects.conversion.py2rpy(highDim_dataset_linear_propensity)
        highDim_linear_propensity_R_runtime = time.time()-highDim_linear_propensity_R_runtime
        
    except:
        highDim_R_runtime = time.time()
        highDim_dataset_R = pandas2ri.py2ri(highDim_dataset)
        highDim_R_runtime = time.time()-highDim_R_runtime
        
        highDim_propensity_R_runtime = time.time()
        highDim_dataset_propensity_R = pandas2ri.py2ri(highDim_dataset_propensity)
        highDim_propensity_R_runtime = time.time()-highDim_propensity_R_runtime
        
        highDim_linear_propensity_R_runtime = time.time()
        highDim_dataset_linear_propensity_R = pandas2ri.py2ri(highDim_dataset_linear_propensity)
        highDim_linear_propensity_R_runtime = time.time()-highDim_linear_propensity_R_runtime

### Method 1: Mahalanobis

Mahalanobis distance is 
$$D_{ij} = (X_i-X_j)^T\Sigma^{-1}(X_i-X_j)$$
where $\Sigma$ is the covariance matrix of $X$ in the pooled treatment and full control groups.

#### a. Low Dim Data

In [21]:
start = time.time()
full_match_Mahalanobis_factor = optmatch.fullmatch(optmatch.match_on(Formula('A~.-Y'),data=lowDim_dataset_R,method='mahalanobis'),data=lowDim_dataset_R)
lowDim_dataset['assign'] = list(full_match_Mahalanobis_factor)

In [22]:
#compute ATE
ATE_vec = []
weights = []

for i in range(max(list(full_match_Mahalanobis_factor))):
    temp = lowDim_dataset.loc[lowDim_dataset['assign']==i+1]
    
    treatment_Y = temp.loc[temp['A']==1]['Y'].values
    control_Y = temp.loc[temp['A']==0]['Y'].values
    
    ATE_vec.append(np.mean(treatment_Y)-np.mean(control_Y))
    weights.append(len(treatment_Y)+len(control_Y))

Mahalanobis_lowDim_est_ATE = np.average(ATE_vec, weights = weights)

end = time.time()
lowDim_mahalanobis_match_runtime = end-start

In [23]:
#runtime is time to convert to R data frame + time to do matching
lowDim_mahalanobis_runtime = "{:,.3f}".format(lowDim_R_runtime+lowDim_mahalanobis_match_runtime)
lowDim_mahalanobis_runtime

'0.409'

In [24]:
Mahalanobis_lowDim_est_ATE ="{:,.3f}".format(Mahalanobis_lowDim_est_ATE)
print(Mahalanobis_lowDim_est_ATE)

2.906


#### b. High Dim Data

In [25]:
start = time.time()
full_match_Mahalanobis_factor = optmatch.fullmatch(optmatch.match_on(Formula('A~.-Y'),data=highDim_dataset_R,method='mahalanobis'),data=highDim_dataset_R)
highDim_dataset['assign'] = list(full_match_Mahalanobis_factor)

In [26]:
#compute ATE
ATE_vec = []
weights = []

for i in range(max(list(full_match_Mahalanobis_factor))):
    temp = highDim_dataset.loc[highDim_dataset['assign']==i+1]
    
    treatment_Y = temp.loc[temp['A']==1]['Y'].values
    control_Y = temp.loc[temp['A']==0]['Y'].values
    
    ATE_vec.append(np.mean(treatment_Y)-np.mean(control_Y))
    weights.append(len(treatment_Y)+len(control_Y))
    
highDim_Mahalanobis_est_ATE = np.average(ATE_vec, weights=weights)
    
end = time.time()
highDim_mahalanobis_match_runtime = end-start

In [27]:
highDim_mahalanobis_runtime = "{:,.3f}".format(highDim_R_runtime+highDim_mahalanobis_match_runtime)
highDim_mahalanobis_runtime

'48.466'

In [28]:
highDim_Mahalanobis_est_ATE= "{:,.3f}".format(highDim_Mahalanobis_est_ATE)
print(highDim_Mahalanobis_est_ATE)

-1.553


### Method 2: Propensity Score

#### a. Low Dim Data

In [29]:
start = time.time()
full_match_propensity_factor = optmatch.fullmatch(optmatch.match_on(Formula('A~propensity_score'),data=lowDim_dataset_propensity_R,method='euclidean'),data=lowDim_dataset_propensity_R)
lowDim_dataset_propensity['assign'] = list(full_match_propensity_factor)

In [30]:
#compute ATE
ATE_vec = []
weights = []

for i in range(max(list(full_match_propensity_factor))):
    temp = lowDim_dataset_propensity.loc[lowDim_dataset_propensity['assign']==i+1]
    
    treatment_Y = temp.loc[temp['A']==1]['Y'].values
    control_Y = temp.loc[temp['A']==0]['Y'].values
    
    ATE_vec.append(np.mean(treatment_Y)-np.mean(control_Y))
    weights.append(len(treatment_Y)+len(control_Y))

lowDim_propensity_est_ATE = np.average(ATE_vec, weights=weights)
    
end = time.time()
lowDim_propensity_match_runtime = end-start
    

In [31]:
lowDim_propensity_runtime = "{:,.3f}".format(lowDim_propensity_R_runtime+lowDim_propensity_match_runtime)
lowDim_propensity_runtime

'0.336'

In [32]:
lowDim_propensity_est_ATE = "{:,.3f}".format(lowDim_propensity_est_ATE)
print(lowDim_propensity_est_ATE)

3.388


#### b. High Dim Data

In [33]:
start = time.time()
full_match_propensity_factor = optmatch.fullmatch(optmatch.match_on(Formula('A~propensity_score'),data=highDim_dataset_propensity_R,method='euclidean'),data=highDim_dataset_propensity_R)
highDim_dataset_propensity['assign'] = list(full_match_propensity_factor)

In [34]:
#compute ATE
ATE_vec = []
weights = []

for i in range(max(list(full_match_propensity_factor))):
    temp = highDim_dataset_propensity.loc[highDim_dataset_propensity['assign']==i+1]
    
    treatment_Y = temp.loc[temp['A']==1]['Y'].values
    control_Y = temp.loc[temp['A']==0]['Y'].values
    
    ATE_vec.append(np.mean(treatment_Y)-np.mean(control_Y))
    weights.append(len(treatment_Y)+len(control_Y))

highDim_propensity_est_ATE = np.average(ATE_vec, weights=weights)    

end = time.time()
highDim_propensity_match_runtime = end-start

In [35]:
highDim_propensity_runtime = "{:,.3f}".format(highDim_propensity_R_runtime+highDim_propensity_match_runtime)
highDim_propensity_runtime

'5.467'

In [36]:
highDim_propensity_est_ATE = "{:,.3f}".format(highDim_propensity_est_ATE)
print(highDim_propensity_est_ATE)

-3.292


### Method 3: Linear Propensity Score

#### a. Low Dim Data

In [37]:
start = time.time()
full_match_linear_propensity_factor = optmatch.fullmatch(optmatch.match_on(Formula('A~linear_propensity_score'),data=lowDim_dataset_linear_propensity_R,method='euclidean'),data=lowDim_dataset_linear_propensity_R)
lowDim_dataset_linear_propensity['assign'] = list(full_match_linear_propensity_factor)

In [38]:
#compute ATE
ATE_vec = []
weights = []

for i in range(max(list(full_match_linear_propensity_factor))):
    temp = lowDim_dataset_linear_propensity.loc[lowDim_dataset_linear_propensity['assign']==i+1]
    
    treatment_Y = temp.loc[temp['A']==1]['Y'].values
    control_Y = temp.loc[temp['A']==0]['Y'].values
    
    ATE_vec.append(np.mean(treatment_Y)-np.mean(control_Y))
    weights.append(len(treatment_Y)+len(control_Y))

lowDim_linear_propensity_est_ATE = np.average(ATE_vec, weights=weights)

end = time.time()
lowDim_linear_propensity_match_runtime = end-start

In [39]:
lowDim_linear_propensity_runtime = "{:,.3f}".format(lowDim_linear_propensity_R_runtime+lowDim_linear_propensity_match_runtime)
lowDim_linear_propensity_runtime

'0.306'

In [40]:
lowDim_linear_propensity_est_ATE = "{:,.3f}".format(lowDim_linear_propensity_est_ATE)
print(lowDim_linear_propensity_est_ATE)

3.476


#### b. High Dim Data

In [41]:
start = time.time()
full_match_linear_propensity_factor = optmatch.fullmatch(optmatch.match_on(Formula('A~linear_propensity_score'),data=highDim_dataset_linear_propensity_R,
                                                                           method='euclidean'),data=highDim_dataset_linear_propensity_R)
highDim_dataset_linear_propensity['assign'] = list(full_match_linear_propensity_factor)

In [42]:
#compute ATE
ATE_vec = []
weights = []

for i in range(max(list(full_match_linear_propensity_factor))):
    temp = highDim_dataset_linear_propensity.loc[highDim_dataset_linear_propensity['assign']==i+1]
    
    treatment_Y = temp.loc[temp['A']==1]['Y'].values
    control_Y = temp.loc[temp['A']==0]['Y'].values
    
    ATE_vec.append(np.mean(treatment_Y)-np.mean(control_Y))
    weights.append(len(treatment_Y)+len(control_Y))
    
highDim_linear_propensity_est_ATE=np.average(ATE_vec, weights=weights)

end = time.time()
highDim_linear_propensity_match_runtime = end-start

In [43]:
highDim_linear_propensity_runtime = "{:,.3f}".format(highDim_linear_propensity_R_runtime+highDim_linear_propensity_match_runtime)
highDim_linear_propensity_runtime

'5.301'

In [44]:
highDim_linear_propensity_est_ATE= "{:,.3f}".format(highDim_linear_propensity_est_ATE)
print(highDim_linear_propensity_est_ATE)

-3.232


## Step 4: Inverse Propensity Weighting Algorithm

### 1. Reset data & Define Functions

In [45]:
lowDim_dataset = pd.read_csv('../data/lowDim_dataset.csv')
highDim_dataset = pd.read_csv('../data/highDim_dataset.csv')

In [46]:
def ipw_ate(dataset):
    treated = 0
    controlled = 0
    for i in range(dataset.shape[0]):
        if dataset['A'][i] == 1:
            treated += dataset['Y'][i] * dataset['weight'][i]
        else:
            controlled += dataset['Y'][i] * dataset['weight'][i]

    print(treated - controlled)
    ate = (treated - controlled)/dataset.shape[0]
    return ate

In [47]:
def train_params(X, A, params, dataset):
    runtime = time.time()
    gscv = GridSearchCV(GradientBoostingClassifier(),params,cv=5).fit(X, A)
    print('best_param: ', gscv.best_params_)
    print('best_score: ', gscv.best_score_)
    gbm_best = gscv.best_estimator_
    gbm_best.fit(X, A)
    propensity_new = [x[1] for x in gbm_best.predict_proba(X)]
    dataset_temp = dataset
    dataset_temp['score'] = propensity_new
    dataset_temp['weight'] = dataset_temp['A']/dataset_temp['score'] + (1 - dataset_temp['A'])/(1 - dataset_temp['score'])
    runtime = time.time()-runtime
    runtime_str = "{:,.3f}".format(runtime)
    print('runtime: ', runtime_str )
    return dataset_temp, runtime_str

### a. Low Dim Data

In [48]:
params = {'learning_rate':[0.001,0.01,0.05,0.1], 'max_depth': [1,2,3], 'n_estimators':[50,100,200],
          'min_samples_leaf':[1,2],'min_samples_split':[2,4]}
X=lowDim_dataset.iloc[:,2:].values
A=lowDim_dataset['A'].values
dataset_test, runtime = train_params(X, A, params, lowDim_dataset)

best_param:  {'learning_rate': 0.05, 'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
best_score:  0.7642105263157896
runtime:  53.239


In [49]:
ate_low = "{:,.3f}".format(ipw_ate(dataset_test))
ate_low

852.5985560478684


'1.795'

### b. High Dim Data

In [50]:
params = {'learning_rate':[0.05,0.1,0.5,1], 'max_depth': [1,2,3], 'n_estimators':[50,100,150],
          'min_samples_leaf':[1,2],'min_samples_split':[2]}
X=highDim_dataset.iloc[:,2:].values
A=highDim_dataset['A'].values
dataset_test_high, runtime_high = train_params(X, A, params, highDim_dataset)

best_param:  {'learning_rate': 0.1, 'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
best_score:  0.595
runtime:  345.671


In [51]:
ate_high = "{:,.3f}".format(ipw_ate(dataset_test_high))
ate_high

-4105.192745053388


'-2.053'

## Step 5: Stratification

### 1. Reload data

In [52]:
lowDim_dataset = pd.read_csv('../data/lowDim_dataset.csv')
highDim_dataset = pd.read_csv('../data/highDim_dataset.csv')

### a. Low Dim data

In [53]:
Y = lowDim_dataset['Y']
D = lowDim_dataset['A']
X = lowDim_dataset.iloc[:,2:].values
N = lowDim_dataset.shape[0]


In [54]:
start = time.time()
cm = CausalModel(Y=Y, D=D, X=X)
    
cm.est_propensity_s()
cm.stratify_s()
print(cm.strata)
cm.est_via_blocking()
print(cm.estimates)
lowDim_stratification_ATE = cm.estimates['blocking']['ate']
end = time.time()
lowdim_strat_runtime = end - start


Stratification Summary

              Propensity Score         Sample Size     Ave. Propensity   Outcome
   Stratum      Min.      Max.  Controls   Treated  Controls   Treated  Raw-diff
--------------------------------------------------------------------------------
         1     0.001     0.995       363       112     0.184     0.404     4.994


Treatment Effect Estimates: Blocking

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE      2.467      0.113     21.793      0.000      2.246      2.689
           ATC      2.467      0.113     21.793      0.000      2.246      2.689
           ATT      2.467      0.113     21.793      0.000      2.246      2.689



  olscoef = np.linalg.lstsq(Z, Y)[0]


In [66]:
lowdim_strat_runtime = "{:,.3f}".format(lowdim_strat_runtime)
lowDim_stratification_ATE = "{:,.3f}".format(lowDim_stratification_ATE)
print(lowDim_stratification_ATE)
print(lowdim_strat_runtime)

2.467
3.707


### b. High Dim Data

In [56]:
Y = highDim_dataset['Y']
D = highDim_dataset['A']
X = highDim_dataset.iloc[:,2:].values
N = highDim_dataset.shape[0]

In [57]:
start = time.time()
cm = CausalModel(Y=Y, D=D, X=X)
cm.est_propensity_s()
cm.stratify_s()
print(cm.strata)
cm.est_via_blocking()
print(cm.estimates)
highDim_stratification_ATE = cm.estimates['blocking']['ate']
end = time.time()


Stratification Summary

              Propensity Score         Sample Size     Ave. Propensity   Outcome
   Stratum      Min.      Max.  Controls   Treated  Controls   Treated  Raw-diff
--------------------------------------------------------------------------------
         1     0.045     0.431       707       294     0.276     0.326    -2.362
         2     0.431     0.996       396       603     0.570     0.630    -1.843


Treatment Effect Estimates: Blocking

                     Est.       S.e.          z      P>|z|      [95% Conf. int.]
--------------------------------------------------------------------------------
           ATE     -2.948      0.047    -63.044      0.000     -3.039     -2.856
           ATC     -2.939      0.050    -58.573      0.000     -3.037     -2.840
           ATT     -2.959      0.047    -62.347      0.000     -3.052     -2.866



  olscoef = np.linalg.lstsq(Z, Y)[0]


In [65]:
highdim_strat_runtime="{:,.3f}".format(end-start)
highDim_stratification_ATE = "{:,.3f}".format(highDim_stratification_ATE)
print(highDim_stratification_ATE)
print(highdim_strat_runtime)

-2.948
636.758


## Step 6: Comparison

In [67]:
table = [["Best ATE score",'Low Dim',Mahalanobis_lowDim_est_ATE,lowDim_propensity_est_ATE,
          lowDim_linear_propensity_est_ATE,ate_low,lowDim_stratification_ATE],
        ["",'High Dim',highDim_Mahalanobis_est_ATE,highDim_propensity_est_ATE,
          highDim_linear_propensity_est_ATE,ate_high,highDim_stratification_ATE],
        ["Run Time (sec)",'Low Dim',lowDim_mahalanobis_runtime,lowDim_propensity_runtime,lowDim_linear_propensity_runtime,
         runtime,lowdim_strat_runtime],
        ["",'High Dim',highDim_mahalanobis_runtime,highDim_propensity_runtime,highDim_linear_propensity_runtime,
         runtime_high,highdim_strat_runtime],
        ["Computer Used",'','PC','PC','PC','PC','PC'],
        ["Stable/Nonstable",'','Stable','Stable','Stable','Stable','Stable']]

display(HTML(tabulate.tabulate(table, headers=["Metric","Dimension", "Full Matching-\nMahalanobis",
                                               "Full Matching-\nPropensity score", "Full Matching-\nLinear Propensity Score",
                                               "Inverse Propensity\nWeighting", 'Stratification'],
                                tablefmt='html')))

Metric,Dimension,Full Matching- Mahalanobis,Full Matching- Propensity score,Full Matching- Linear Propensity Score,Inverse Propensity Weighting,Stratification
Best ATE score,Low Dim,2.906,3.388,3.476,1.795,2.467
,High Dim,-1.553,-3.292,-3.232,-2.053,-2.948
Run Time (sec),Low Dim,0.409,0.336,0.306,53.239,3.707
,High Dim,48.466,5.467,5.301,345.671,636.758
Computer Used,,PC,PC,PC,PC,PC
Stable/Nonstable,,Stable,Stable,Stable,Stable,Stable


## Reference Papers

https://projecteuclid.org/download/pdfview_1/euclid.ss/1280841730
https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36552.pdf
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5784842/
https://www.researchgate.net/publication/8132035_Propensity_Score_Estimation_With_Boosted_Regression_for_Evaluating_Causal_Effects_in_Observational_Studies