In [1]:
import numpy as np
import rpy2
import rpy2.robjects as robjects
import pickle
from time import time
import os

from rpy2.robjects import IntVector, FloatVector, Formula
from rpy2.robjects.packages import importr
from rpy2.robjects import numpy2ri
numpy2ri.activate()

stats = importr('stats')
matching = importr('Matching')
snow = importr('snow')
set_seed = robjects.r['set.seed']

set_seed(1234); None

### Data Generation

#### Raw Data

In [2]:
# GLOBAL CONFIG

# Var count
n_vars = 10

# Data types (default is standard normal)
binary_indeces = [1, 3, 6, 8, 9]
binarize = True

# Associations between vars an treat/outcome
treat_vars = [0,1,2,3,4,5,6,7]
outcome_vars = [0,1,2,3,4,8,9,10]

# Treat/outcome generation weights
assignment_weights = np.array([0, 0.8, -0.25, 0.6, -0.4, -0.8, -0.5, 0.7])
outcome_weights = np.array([-3.85, 0.3, -0.36, -0.73, -0.2, 0.71, -0.19, 0.26])
true_treat_effect = -0.4

def generate_data(n_samples=1000):
    # Generate 10 Random Vars
    # 1-4 are confounders: associated with outcome + treatment
    # 5-7 are exposure predictors
    # 8-10 are outcome predictors
    X = np.random.normal(loc=0.0, scale=1.0, size=(n_samples, n_vars))

    # Binarize specified vars if requested.
    if binarize:
        for var in binary_indeces:
            X[:, var-1] = (X[:, var -1] > 0).astype(int)

    # Add dummy for bias param     
    X = np.hstack([np.ones((n_samples, 1)), X])
    return X

In [3]:
# DEBUG
# X = generate_data(2000)
# X.shape

#### Assignment

In [4]:
# Create the models

assignment_models={}

def nonlinear_transform(X, B, quad_indeces):
    for quad_index in quad_indeces:
        quad = X[:, quad_index]**2
        X = np.hstack([X, quad.reshape(-1, 1)])
        B = np.append(B, B[quad_index])
    
    return X, B

def nonadditive_transform(X, B, interaction_indeces, interaction_weights=None):
    for interaction_index, var_indeces in enumerate(interaction_indeces):
        int_1, int_2 = var_indeces
        interaction_val = X[:, int_1]*X[:, int_2]
        
        if not interaction_weights:
            interaction_val = interaction_val*0.5
        else:
            interaction_val = interaction_val*interaction_weights[interaction_index]
            
        X = np.hstack([X, interaction_val.reshape(-1, 1)])
        B = np.append(B, B[int_1])
    
    return X, B

# Scenario 1
assignment_models["A_add_lin"] = lambda B, X: np.dot(X, B)

# Scenario 2:     
assignment_models["B_add_mild_nlin"] = lambda B, X: np.dot(*nonlinear_transform(X, B,
                                                       quad_indeces=[2]))
# Scenario 3:
assignment_models["C_add_mod_nlin"] = lambda B, X: np.dot(*nonlinear_transform(X, B,
                                                       quad_indeces=[2, 4, 7]))
# Scenario 4:
assignment_models["D_mild_nadd_lin"] = lambda B, X: np.dot(*nonadditive_transform(X, B,
                                                       interaction_indeces=[(1,3), (2, 4), (4,5), (5,6)]))

# Scenario 5:
assignment_models["E_mild_nadd_mild_nlin"] = lambda B, X: np.dot(*nonlinear_transform(*nonadditive_transform(X, B,
                                                       interaction_indeces=[(1,3), (2, 4), (4,5), (5,6)]), quad_indeces=[2]))
# Scenario 6
assignment_models["F_mod_nadd_lin"] = lambda B, X: np.dot(*nonadditive_transform(X, B,
                                                       interaction_indeces=[(1,3), (2, 4), (3,5), (4,6), (5,7), (1,6), (2,3),
                                                                            (3,4), (4,5), (5,6)],
                                                       interaction_weights=[0.5, 0.7, 0.5, 0.7, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5]))
# Scenario 7
assignment_models["G_mod_nadd_mod_nlin"] = lambda B, X: np.dot(*nonlinear_transform(*nonadditive_transform(X, B,
                                                       interaction_indeces=[(1,3), (2, 4), (3,5), (4,6), (5,7), (1,6), (2,3),
                                                                            (3,4), (4,5), (5,6)],
                                                       interaction_weights=[0.5, 0.7, 0.5, 0.7, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5]), 
                                                                            quad_indeces=[2, 4, 7]))

assignment_model_names = ['A_add_lin', 'B_add_mild_nlin', 'C_add_mod_nlin', 'D_mild_nadd_lin',
                     'E_mild_nadd_mild_nlin', 'F_mod_nadd_lin', 'G_mod_nadd_mod_nlin']

In [5]:
# Tests 
assert(set(assignment_models["A_add_lin"](np.array([2, 0.5, 1.5]),
                                                np.array([[1, 2,4], [1, 10, 20]]))) == set([9, 37]))

assert(set(assignment_models["B_add_mild_nlin"](np.array([2, 0.5, 1.5]),
                                                np.array([[1, 2,4], [1, 10, 20]]))) == set([33, 637]))

assert(set(assignment_models["C_add_mod_nlin"](np.array([2, 0.5, 1.5, 1, 1, 1, 2, 3]),
                                                np.array([[1, 2,4,5,6,7,8,9], [1, 10, 20, 30, 40, 50, 60, 60]]))) == set([373, 13457]))

assert(set(assignment_models["D_mild_nadd_lin"](np.array([2, 0.5, 1.5, 1, 1, 1, 2, 3]),
                                                np.array([[1, 2,4,5,6,7,8,9], [1, 10, 20, 30, 40, 50, 60, 60]]))) == set([139.5, 3632]))

assert(set(assignment_models["E_mild_nadd_mild_nlin"](np.array([2, 0.5, 1.5, 1, 1, 1, 2, 3]),
                                                np.array([[1, 2,4,5,6,7,8,9], [1, 10, 20, 30, 40, 50, 60, 60]]))) == set([163.5, 4232]))


In [6]:
def get_assignments(B, X, n_samples, scenario="A_add_lin"):
    X_usable = X[:, treat_vars]
    
    # Calculate the probabilities of assignment
    linear_assignment_data = assignment_models[scenario](B, X_usable)
    p_treat = 1.0/(1+np.exp(-1*linear_assignment_data))

    # Assign
    rand = np.random.random(n_samples)
    assignments = (rand < p_treat).astype(int)
    
    return assignments

#### Outcome

In [7]:
def get_outcomes(B, X, assignments, effect=true_treat_effect):
    X_usable = X[:, outcome_vars]
    return effect*assignments + np.dot(X_usable, B)

In [8]:
# DEBUG
# assignments = get_assignments(assignment_weights, X, "mild_nonaddititive_mild_nonlinear")
# outcomes = get_outcomes(outcome_weights, X, assignments)

### Cluster Compute

Some code is going to >48 hours to ran. Lucky it's highly parellalisable so we can use a compute cluster. The one option is local to split across CPU cores. The better option is to go remote and explote 32 cores on multiple AWS machines.

On AWS, this is straightforward. Manually you need to port forward!. This allows the remote machine to connect to ports on the master via it's localhost loopback. 

```
# ~/.bash_profile
# Allow remote host to connect to local machine
# usage: $ remote_pfwd hostname {6000..6009}
function remote_pfwd {
  for i in ${@:2}
  do
    ssh -N -R $i:localhost:$i $1 &
  done
}
```
`remote_pfwd ubuntu@52.90.20.45 {11305..11307}`

In [9]:
AWS_MASTER_DNS="ip-172-31-42-147.ec2.internal"
AWS_SLAVE_1 = "ubuntu@ip-172-31-43-193.ec2.internal"
AWS_SLAVE_2 = "ubuntu@ip-172-31-81-244.ec2.internal"
AWS_MASTER_PORT_RANGE = list(range(11305, 11340))

class ClusterProvider(object):
    def __init__(self, n_nodes=8, remote_hosts=None, ports=None):
        if remote_hosts is None:
            self.cl = snow.makeSOCKcluster(["localhost"]*n_nodes)
        else:
            # Set the acceptable ports for connection
            # from the slaves
            if not ports:
                ports = AWS_MASTER_PORT_RANGE
            
            # Construct the connection string
            addresses = []
            for remote_host, n_nodes in remote_hosts:
                addresses+=[remote_host]*n_nodes
                
            self.cl = snow.makeSOCKcluster(addresses, rscript="Rscript", manual=False, snowlib="/usr/local/lib/R/site-library",
                                           port=IntVector(ports), master=AWS_MASTER_DNS, outfile="/dev/stdout", timeout=10)
    
    def get_cluster(self):
        return self.cl
    
    def kill_cluster(self):
        snow.stopCluster(self.cl)

In [10]:
# Local cluster
cluster_provider = ClusterProvider(n_nodes=8)

In [11]:
# Remote cluster
# cluster_provider = ClusterProvider(remote_hosts=[(AWS_SLAVE_1, 8)],
#                                     ports = list(range(11305, 11314)))

In [12]:
# Run this with True to kill the cluster
kill = False
if kill:
    cluster_provider.kill_cluster()

### Analysis Helpers

Estimators and data persistence code

#### Estimators

Define methods which can process outcomes, assignments and covariate data into a treatment effect estimate. 

1. Logistic Regression
2. GenMatch
3. VAE

In [13]:
def get_propensity_scores(assignments, covariate_data):
    # Setup
    y = IntVector(assignments)
    fmla = Formula('y ~ X')
    env = fmla.environment
    
    # Run propensiy regression
    env['X'] = covariate_data
    env['y'] = y
    fit = stats.glm(fmla, family="binomial")
    
    # DEBUG: fit.rx("coefficients")
    return fit.rx2("fitted.values")

In [14]:
# 1. Logisic Regression Propensity Matching
def logistic_prop_matching_est(outcomes, assignments, covariate_data, *args, **kwargs):
    
    propensity_scores = get_propensity_scores(assignments, covariate_data)
    
    # Run matching
    match_out = matching.Match(
        Y=FloatVector(outcomes),
        Tr=IntVector(assignments),
        X=propensity_scores,
        replace=True)
    
    return np.array(match_out.rx2("est").rx(1,1))[0]

In [58]:
# 2. GenMatch Matching
def genmatch_est(outcomes, assignments, covariate_data, *args, **kwargs):
    global gm_warnings
    
    # Get the singleton cluster
    cl = cluster_provider.get_cluster()
    
    if kwargs.get("genmatch_with_prop_scores", True):
        
        propensity_vars = kwargs.get("propensity_vars", None)
        if propensity_vars is None:
            propensity_vars = covariate_data
        else:
            if gm_warnings:
                print("Finding propensity scores with custom vars")
            
        propensity_scores = np.array(get_propensity_scores(assignments, propensity_vars))
        
        # Add prop scores to covar data
        matching_data = np.hstack([covariate_data, propensity_scores.reshape(-1, 1)])
    else:
        if gm_warnings:
            print("Not using prop scores")
        matching_data = covariate_data
        
    balance_vars = kwargs.get("balance_vars", None)
    if balance_vars is None:
        balance_vars = covariate_data
    else:
        if gm_warnings:
            print("Evaluating balance on custom vars")
    
    start = time()
    gen_out = matching.GenMatch(
        Tr=IntVector(assignments),
        X=matching_data,
        BalanceMatrix=balance_vars,
        print_level=0,
        cluster=cl)
    if gm_warnings:
        print("GenMatch Time: ", time() - start)
    
    match_out = matching.Match(
        Y=FloatVector(outcomes),
        Tr=IntVector(assignments),
        X=matching_data,
        replace=True,
        Weight_matrix=gen_out)
    
    gm_warnings = False # only warn once
    return np.array(match_out.rx2("est").rx(1,1))[0]

In [16]:
# DEBUG
# est = logistic_prop_matching_est(assignments, X[:, 1:]) # exclude the bias term
# np.array(est)

In [17]:
# DEBUG
# est = genmatch_est(assignments, X[:, 1:]) # exclude the bias term
# np.array(est)

In [59]:
def distributional_distance(mu1, var1, mu2, var2, metric="md"):
    # Formulas from:
    # https://en.wikipedia.org/wiki/Bhattacharyya_distance
    # https://en.wikipedia.org/wiki/Mahalanobis_distance
    
    if metric not in ["md", "bhat"]:
        raise Exception("Invalid Metric")
        
    var1 = np.exp(var1)
    var2 = np.exp(var2)
    
    V1 = np.diag(var1)
    V2 = np.diag(var2)
    
    if metric =="bhat":
        V = (V1 + V2)/2.0
    else:
        V = V1
        
    VI = np.linalg.inv(V)
    
    md = np.sqrt(np.dot(np.dot((mu1-mu2),VI),(mu1-mu2).T))
    
    if metric =="md":
        return md
    
    bhat_additive = 0.5*np.log(float(np.linalg.det(V))/np.sqrt(np.linalg.det(V1) + np.linalg.det(V2)))
    
    return ((1/8.0)*md) + bhat_additive

# def bhattacharyya_distance(mu1, var1, mu2, var2):
#     # Formula from: https://en.wikipedia.org/wiki/Bhattacharyya_distance
    
#     var1 = np.exp(var1)
#     var2 = np.exp(var2)
    
#     V1 = np.diag(var1)
#     V2 = np.diag(var2)
    
#     V = (V1 + V2)/2.0
    
#     md = (1/8.0)*mahalanobis_distance(mu1, V, mu2)
#     bat_additive = 0.5*np.log(np.linalg.det(V)/np.sqrt(np.linalg.det(V1) + np.linalg.det(V2)))
    
#     return md + bat_additive

def mahalanobis_matching(outcomes, assignments, covariate_data, covariate_covariance, *args, **kwargs):
    global gm_warnings #warn once mechanism
    
    if kwargs.get("md_with_prop_scores", True):
        
        propensity_vars = kwargs.get("propensity_vars", None)
        if propensity_vars is None:
            propensity_vars = covariate_data
        else:
            if gm_warnings:
                print("Finding propensity scores with custom vars")
            
        propensity_scores = np.array(get_propensity_scores(assignments, propensity_vars))
        prop_var = np.var(propensity_scores)
        propensity_variance = np.full((propensity_scores.shape[0], 1), prop_var)
        
        # Add prop scores to covar data
        covariate_data = np.hstack([covariate_data, propensity_scores.reshape(-1, 1)])
        covariate_covariance = np.hstack([covariate_covariance, propensity_variance])
    else:
        if gm_warnings:
            print("Not using prop scores")
    
    treated = covariate_data[assignments==1]
    treated_var = covariate_covariance[assignments==1]
    
    control = covariate_data[assignments==0]
    control_var = covariate_covariance[assignments==0]
    
    num_treated = treated.shape[0]
    num_control = control.shape[0]
    
    m_distances = np.zeros((num_treated, num_control))
    
    start = time()
    for treated_index, treat_mu in enumerate(treated):
        treat_variance = treated_var[treated_index]
        
        for control_index, control_mu in enumerate(control): 
            metric = kwargs.get("distance_metric", "md")
            control_variance = control_var[control_index]
                
            m_distances[treated_index, control_index] = \
                distributional_distance(treat_mu, treat_variance, control_mu,
                                        control_variance, metric=metric)
                
    if gm_warnings:
        elapsed = np.round(time() - start, 2)
        print("Mahalanobis D. time: ", elapsed, "seconds")
    
    # Match
    md_minimum_matches = np.argmin(m_distances, axis=1) 
    
    # Find treatment effects for the treated
    effects = outcomes[assignments==1] - outcomes[assignments==0][md_minimum_matches]
    
    gm_warnings = False # only warn once
    return np.mean(effects) #ATT

#### Data Interconnect

Code to generate experimental data and to read save and read data to/from files. Files are used to pass data from this process to DL models and back. 

In [19]:
def get_data(n_samples, assignment_model):
    X = generate_data(n_samples)
    assignments = get_assignments(assignment_weights, X,
                                  n_samples, assignment_model)

    outcomes = get_outcomes(outcome_weights, X, assignments)
    
    return assignments, outcomes, X

def get_estimate(outcomes, assignments, covar_data, method, *args, **kwargs):
    return method(outcomes, assignments, covar_data, *args, **kwargs)

In [20]:
RAW_DATA_DIR = "./Data/Raw"
PROCESSED_DATA_DIR = "./Data/Processed"

# if not os.path.exists(RAW_DATA_DIR):
#     os.mkdir("./Data")
#     os.mkdir("./Data/Raw")
    
# if not os.path.exists(PROCESSED_DATA_DIR):
#     os.mkdir("./Data")
#     os.mkdir("./Data/Processed")

In [21]:
def get_data_file_name(n_samples, model, file_num, data_suffix, data_folder="", processed=False):
    file_name = "/{}n_{}_model_{}_v_{}_{}.csv".format(
        data_folder,
        n_samples,
        model,
        file_num, 
        data_suffix)
    if not processed:
        return RAW_DATA_DIR + file_name
    
    return PROCESSED_DATA_DIR + file_name

def write_data_files(n_files, n_samples, model="A_add_lin"):

    for file_num in range(n_files): 
        assignments, outcomes, covariates = get_data(n_samples, model)
        file_prefix = get_data_file_name(n_samples, model, file_num)
        
        np.savetxt(file_prefix + "covar_data.csv", covariates, delimiter=",")
        np.savetxt(file_prefix + "outcome_data.csv", outcomes, delimiter=",")
        np.savetxt(file_prefix + "assignment_data.csv", assignments, delimiter=",")

def get_data_from_file(n_samples, model, file_num, loss_type):
    extra_data = {}
    if loss_type in ["reconstruction", "sparsity"]:
        covariate_suffix = "covar_data_{}".format(loss_type)
        covariate_file = get_data_file_name(n_samples, model, file_num, covariate_suffix, processed=True)
        covariates = np.loadtxt(covariate_file, delimiter=",")
    elif loss_type in ["vae"]:
        covariate_suffix = "covar_data"
        covariate_file = get_data_file_name(n_samples, model, file_num, 
                                            covariate_suffix, data_folder="VAE/", processed=True)
        covariates_with_std = np.loadtxt(covariate_file, delimiter=",")
        covariates = covariates_with_std[:, :4]
        extra_data["covariate_covariance"] = covariates_with_std[:, 4:]
    else:
        raise Exception("Invalid loss type")

    original_covariate_suffix = "covar_data"
    original_covariate_file = get_data_file_name(n_samples, model, file_num,
                                                 original_covariate_suffix, processed=False)

    outcome_file = get_data_file_name(n_samples, model, file_num, "outcome_data",processed=False)
    assignment_file = get_data_file_name(n_samples, model, file_num, "assignment_data",processed=False)

    
    original_covariates = np.loadtxt(original_covariate_file, delimiter=",")
    outcomes = np.loadtxt(outcome_file, delimiter=",")
    assignments = np.loadtxt(assignment_file, delimiter=",")

    return assignments, outcomes, covariates, original_covariates, extra_data

In [22]:
# Write data files for 1000 runs all models
# Careful with this, it writes ~3GB of data. 
write_files = False
if write_files:
    for model in assignment_model_names:
        write_data_files(n_files=1000, n_samples=1000, model=model)

In [23]:
def store_results_dict(results, name):
    pickle.dump(results, open("./Results/{}.p".format(name), "wb" ))
    
def retrieve_results_dict(name):
    try:
        return pickle.load(open( "./Results/{}.p".format(name), "rb" ))
    except Exception as e:
        return None

### Monte Carlo Runner Code

#### Single Simulation

Run a single model for n runs

In [64]:
def run_simulation(runs=1000, n_samples=1000,
                   assignment_model="additive_linear",
                   estimator=logistic_prop_matching_est,
                   from_files=False,
                   file_numbers=None,
                   verbose=True,
                   *args, **kwargs):
    
    global gm_warnings
    gm_warnings = True
    
    progress_tick = max(1, int(runs/10))
    results = np.zeros(runs)

    print("Simulation running. Config:")
    print("n_samples:", n_samples)
    print("assignment_model:", assignment_model)
    print("from_files:", from_files)
    if from_files:
        print("loss_type:", kwargs["loss_type"])
    
    if file_numbers is None:
        file_numbers = range(runs)
    else:
        if runs != len(file_numbers):
            raise exception("Invalid number of file numbers supplied")
        
    for i, file_number in enumerate(file_numbers):
        if from_files:
            if not "loss_type" in kwargs:
                raise Exception("Must supply loss type to read from files")
                
            assignments, outcomes, covar_data, original_covars, extra_data = get_data_from_file(n_samples,
                                                                   model=assignment_model,
                                                                   file_num=file_number,
                                                                   loss_type=kwargs["loss_type"])
            if kwargs.get("evaluate_on_original_covars", False):
                balance_vars=original_covars
            else:
                balance_vars = None
                
            
            if kwargs.get("propensity_on_original_covars", False):
                propensity_vars=original_covars
            else:
                propensity_vars = None
                
        else:
            assignments, outcomes, covar_data = get_data(n_samples, assignment_model)
            covar_data = covar_data[:, 1:] #exclude bias term
        
        results[i] = get_estimate(outcomes,
                                  assignments,
                                  covar_data,
                                  estimator,
                                  balance_vars=balance_vars,
                                  propensity_vars=propensity_vars,
                                  *args,
                                  **extra_data,
                                  **kwargs)
        
        if i%progress_tick == progress_tick-1 and verbose:
            print("Done {} of {}".format(i+1, runs))
    
    biases = (true_treat_effect-results)/true_treat_effect * 100
    errors = (true_treat_effect-results)**2
    
    bias = np.abs(np.mean(biases))
    rmse = np.mean(errors)**0.5
    
    if verbose:
        print("\nRMSE", rmse)
        print("Bias", bias)
        print("===============\n\n")
    
    return {"RMSE": rmse, "Bias": bias, "biases": biases, "errors": errors}

In [25]:
# run_simulation(runs=1, n_samples=1000, assignment_model="A_add_lin",
#               estimator=mahalanobis_matching, verbose=True, from_files=True, loss_type="vae")

In [26]:
# sim_results["biases"]

#### Simulation Battery

Run a simulation for all assignment models

In [27]:
def get_store_name(subfolder, models_being_run, est, runs, n_samples):
    # Storage
    
    if set(models_being_run) == set(assignment_model_names):
        store_name = "{}/est_{}_runs_{}_n_{}".format(
            subfolder,
            est.__name__,
            runs,
            n_samples)
    else:
        store_name = "{}/est_{}_runs_{}_n_{}_models_{}".format(
            subfolder,
            est.__name__,
            runs,
            n_samples,
            "_".join(models_being_run))
    
    return store_name

def run_test_battery(est,
                     store_name=None, 
                     runs=1000,
                     n_samples=1000,
                     models=assignment_models,
                     overwrite=False, verbosity=1,
                     *args, **kwargs):
    # Logging
    def printer(level, *args):
        if level <= verbosity:
            print(*args)
    
    # Storage config
    if store_name is None:
        if "results_subfolder" in kwargs:
            subfolder = kwargs["results_subfolder"]
        else:
            subfolder = "Original"
        store_name = get_store_name(subfolder, models, est, runs, n_samples)
        print("Results File:", store_name)
            
    results = retrieve_results_dict(store_name)

    if overwrite or (not results):
        printer(1, "No valid, existant results found. Beggining battery.\n")
        results = {}
        for model in models:
            printer(1, "Running: ", model)
            results[model] = run_simulation(
                                runs=runs,
                                n_samples=n_samples,
                                assignment_model=model,
                                estimator=est,
                                verbose=(verbosity==2),
                                *args, **kwargs)
            store_results_dict(results[model], store_name+"_checkpoint_"+model)
            printer(1, "Done.\n")

        store_results_dict(results, store_name)
    else:
        printer(1, "Displaying cached results.\n")
    
    printer(1, "Results")
    for model, results in results.items():
        printer(1, "Model: ", model)
        print(1, "Bias: ", results["Bias"])
        print(1, "RMSE: ", results["RMSE"], "\n")

### Run the Logistic Regression Battery

This one is easy. So we run on one machine.

In [28]:
run_test_battery(
    est=logistic_prop_matching_est,
    runs=1000,
    n_samples=1000)

Results File: Original/est_logistic_prop_matching_est_runs_1000_n_1000
Displaying cached results.

Results
Model:  A_add_lin
1 Bias:  0.045874914703647685
1 RMSE:  0.07310500057973227 

Model:  B_add_mild_nlin
1 Bias:  3.1844355433209786
1 RMSE:  0.06588422028138122 

Model:  C_add_mod_nlin
1 Bias:  10.094350684204597
1 RMSE:  0.07650839711310455 

Model:  D_mild_nadd_lin
1 Bias:  6.720731771408928
1 RMSE:  0.08531717119502563 

Model:  E_mild_nadd_mild_nlin
1 Bias:  10.36168716658826
1 RMSE:  0.09094245826533698 

Model:  F_mod_nadd_lin
1 Bias:  3.1228082403965436
1 RMSE:  0.07605107262377982 

Model:  G_mod_nadd_mod_nlin
1 Bias:  11.830178367664905
1 RMSE:  0.07798212919046259 



### Run the GenMatch Battery

We split this across three machines using remote clusters.

In [100]:
gm_est = genmatch_est
gm_runs = 1000
gm_n_samples = 1000
gm_models_sets = [assignment_model_names[:3], assignment_model_names[3:5], assignment_model_names[5:]]
gm_files_to_be_produced = []

for model_set in gm_models_sets:
    gm_files_to_be_produced.append(get_store_name("Original", model_set, gm_est, gm_runs, gm_n_samples))

gm_files_to_be_produced

['Original/est_genmatch_est_runs_1000_n_1000_models_A_add_lin_B_add_mild_nlin_C_add_mod_nlin',
 'Original/est_genmatch_est_runs_1000_n_1000_models_D_mild_nadd_lin_E_mild_nadd_mild_nlin',
 'Original/est_genmatch_est_runs_1000_n_1000_models_F_mod_nadd_lin_G_mod_nadd_mod_nlin']

In [101]:
run_test_battery(
    est=gm_est,
    runs=gm_runs,
    n_samples=gm_n_samples,
    models=gm_models_sets[0],
    verbosity=2)

Results File: Original/est_genmatch_est_runs_1000_n_1000_models_A_add_lin_B_add_mild_nlin_C_add_mod_nlin
Displaying cached results.

Results
Model:  A_add_lin
1 Bias:  5.5585826624331105
1 RMSE:  0.041571354846349606 

Model:  B_add_mild_nlin
1 Bias:  4.309919000663494
1 RMSE:  0.03799524129548324 

Model:  C_add_mod_nlin
1 Bias:  3.715796982487495
1 RMSE:  0.043206587873791204 



In [102]:
run_test_battery(
    est=gm_est,
    runs=gm_runs,
    n_samples=gm_n_samples,
    models=gm_models_sets[1],
    verbosity=2)

Results File: Original/est_genmatch_est_runs_1000_n_1000_models_D_mild_nadd_lin_E_mild_nadd_mild_nlin
Displaying cached results.

Results
Model:  D_mild_nadd_lin
1 Bias:  2.30672600038697
1 RMSE:  0.040751955269481575 

Model:  E_mild_nadd_mild_nlin
1 Bias:  1.6356097465092616
1 RMSE:  0.0388547493767899 



In [103]:
run_test_battery(
    est=gm_est,
    runs=gm_runs,
    n_samples=gm_n_samples,
    models=gm_models_sets[2],
    verbosity=2)

Results File: Original/est_genmatch_est_runs_1000_n_1000_models_F_mod_nadd_lin_G_mod_nadd_mod_nlin
Displaying cached results.

Results
Model:  F_mod_nadd_lin
1 Bias:  5.058677376450292
1 RMSE:  0.04404046330729526 

Model:  G_mod_nadd_mod_nlin
1 Bias:  3.185538423779952
1 RMSE:  0.04439998296725508 



#### Combine results

In [104]:
gm_combined_name = get_store_name("Original", assignment_model_names, gm_est, gm_runs, gm_n_samples)
linear_combined_name = get_store_name("Original", assignment_model_names, logistic_prop_matching_est, 1000, 1000)

In [105]:
results = {}
for file in gm_files_to_be_produced:
    results.update(retrieve_results_dict(file))

store_results_dict(results, gm_combined_name)
results

{'A_add_lin': {'RMSE': 0.041571354846349606, 'Bias': 5.5585826624331105},
 'B_add_mild_nlin': {'RMSE': 0.03799524129548324, 'Bias': 4.309919000663494},
 'C_add_mod_nlin': {'RMSE': 0.043206587873791204, 'Bias': 3.715796982487495},
 'D_mild_nadd_lin': {'RMSE': 0.040751955269481575, 'Bias': 2.30672600038697},
 'E_mild_nadd_mild_nlin': {'RMSE': 0.0388547493767899,
  'Bias': 1.6356097465092616},
 'F_mod_nadd_lin': {'RMSE': 0.04404046330729526, 'Bias': 5.058677376450292},
 'G_mod_nadd_mod_nlin': {'RMSE': 0.04439998296725508,
  'Bias': 3.185538423779952}}

### GenMatch Vs Logistic Propensity Score Matching

In [106]:
gm_results = retrieve_results_dict(gm_combined_name)
lin_results = retrieve_results_dict(linear_combined_name)

results = {
    "Linear": lin_results,
    "GenMatch": gm_results
}

for model in assignment_model_names:
    print(model, "\n")
    for matching in results.keys():
        print(matching)
        print("RMSE:", results[matching][model]["RMSE"], "Bias:", results[matching][model]["Bias"])
        
    print("==============")
    print()
    

A_add_lin 

Linear
RMSE: 0.07310500057973227 Bias: 0.045874914703647685
GenMatch
RMSE: 0.041571354846349606 Bias: 5.5585826624331105

B_add_mild_nlin 

Linear
RMSE: 0.06588422028138122 Bias: 3.1844355433209786
GenMatch
RMSE: 0.03799524129548324 Bias: 4.309919000663494

C_add_mod_nlin 

Linear
RMSE: 0.07650839711310455 Bias: 10.094350684204597
GenMatch
RMSE: 0.043206587873791204 Bias: 3.715796982487495

D_mild_nadd_lin 

Linear
RMSE: 0.08531717119502563 Bias: 6.720731771408928
GenMatch
RMSE: 0.040751955269481575 Bias: 2.30672600038697

E_mild_nadd_mild_nlin 

Linear
RMSE: 0.09094245826533698 Bias: 10.36168716658826
GenMatch
RMSE: 0.0388547493767899 Bias: 1.6356097465092616

F_mod_nadd_lin 

Linear
RMSE: 0.07605107262377982 Bias: 3.1228082403965436
GenMatch
RMSE: 0.04404046330729526 Bias: 5.058677376450292

G_mod_nadd_mod_nlin 

Linear
RMSE: 0.07798212919046259 Bias: 11.830178367664905
GenMatch
RMSE: 0.04439998296725508 Bias: 3.185538423779952



### AutoEncoder: Deep Dimensionality Reducation

First, we try a model which compresses down to 4 dimensions.

The hope is that a) smaller representation means for equal population size, we get a wider search of the space with an effect large enough to justify the loss of information from compression and that b) useful information is learnt with some noise stripped - which depends on the regularization.

Downside we lose convexity. So we may get better results than PCA reduction but no guarantees. We also have no idea a prior which the best compression is and no real way to find this. We rely on a general approx. No knowledge of the data was used to customize the encoder in this case.

There are a number of step we can take:

Architecture
* Network needs to be deep and wide enough to create representations. To shallow/small saturates. 
* Too large also leads to bad performance on smaller datasets. 
    * Often only one hidden layer. This still gives us universal approximation. But we cannot guarantee the satisfaction of constraints. 
    * More layers are acceptable and beneficial with constraints. Experimentally, deep autoencoders yield much better compression than corresponding shallow or linear autoencoders (Hinton and Salakhutdinov, 2006). 
    * Depth can also reduce both amount of data and the computational cost of training. 
* **To try: Overcomplete autoencodeer with more dimensions?**
    * Consider using a softmax activation on this layer to get a soft matching model.

LR rate
* Use annealing to rapidly descend in correct direction then slow down to converge. 
* Consider using GSD with restarts. 
* Stability in Stoch Grad Desc is helped by large batches size.

Regularization
* ADAM built in weight decay
* Try:
    * Sparsity encoder to get useful latent H
    * **Denoising AutoEncoder to learn to remove noise (is there noise in our data?**
    
#### Inital Run
The network is 16/8/4. The initial results do not look promising.

Thinking: same population size, with smaller dimension means searching more of the space. This may have an advantage large enough to justify the loss of information from compression given the non-linearity of the compression results in less info.

Autoencoder test run shows potential. Faster times, result is not off by much. But the model at this point is poor.

#### First Autoencoder run

In [50]:
sim_results = run_simulation(runs=50, n_samples=1000, assignment_model="A_add_lin",
              estimator=genmatch_est, verbose=True, from_files=True)

GenMatch Time:  14.542949914932251
GenMatch Time:  12.573060274124146
GenMatch Time:  4.643509149551392
GenMatch Time:  10.866560935974121
GenMatch Time:  9.31788420677185
Done 5 of 50
GenMatch Time:  7.009507894515991
GenMatch Time:  17.980069160461426
GenMatch Time:  10.20099687576294
GenMatch Time:  10.495553016662598
GenMatch Time:  8.953205823898315
Done 10 of 50
GenMatch Time:  7.9619269371032715
GenMatch Time:  6.404529809951782
GenMatch Time:  7.565530300140381
GenMatch Time:  10.25570797920227
Increasing memory because of ties: allocating a matrix of size 3 times 200000 doubles.

I would be faster with the ties=FALSE option.

Increasing memory because of ties: allocating a matrix of size 3 times 300000 doubles.

I would be faster with the ties=FALSE option.

GenMatch Time:  20.35411787033081
Increasing memory because of ties: allocating a matrix of size 3 times 200000 doubles.

I would be faster with the ties=FALSE option.

Increasing memory because of ties: allocating a matri

In [53]:
print("Bias", sim_results["Bias"])
print("RMSE", sim_results["RMSE"])
print("Bias", np.std(sim_results["biases"]))

Bias 7.702298102804893
RMSE 0.09401208388296421
Bias 22.20510298752568


#### Improved DL Autoencoder

In [65]:
sim_results = run_simulation(runs=50, n_samples=1000, assignment_model="A_add_lin",
              estimator=genmatch_est, verbose=True, from_files=True, loss_type="reconstruction")

GenMatch Time:  12.774646997451782
GenMatch Time:  10.59249472618103
GenMatch Time:  7.4202821254730225
GenMatch Time:  7.195416688919067
GenMatch Time:  9.110630989074707
Done 5 of 50
GenMatch Time:  13.191488981246948
GenMatch Time:  7.4685118198394775
GenMatch Time:  12.925118923187256
GenMatch Time:  11.985275268554688
GenMatch Time:  12.163190841674805
Done 10 of 50
GenMatch Time:  16.372169017791748
GenMatch Time:  11.672435998916626
GenMatch Time:  13.543527126312256
GenMatch Time:  9.46323299407959
GenMatch Time:  8.263458251953125
Done 15 of 50
GenMatch Time:  12.385226964950562
GenMatch Time:  6.942894697189331
GenMatch Time:  12.462768077850342
GenMatch Time:  10.957695960998535
GenMatch Time:  19.400758028030396
Done 20 of 50
GenMatch Time:  12.536571979522705
GenMatch Time:  11.522716999053955
GenMatch Time:  10.43092393875122
GenMatch Time:  14.181069135665894
GenMatch Time:  8.839200973510742
Done 25 of 50
GenMatch Time:  9.25986909866333
GenMatch Time:  17.5240621566772

In [67]:
print("Bias", sim_results["Bias"])
print("RMSE", sim_results["RMSE"])
print("Bias", np.std(sim_results["biases"]))

Bias 0.10453957073077
RMSE 0.06900712191994966
Bias 17.251463741022853


### Autoencoder Test Battery

In [None]:
ae_runs = 200

#### Pure Recon

#### Config 1
Pure reconstruction with propensity score estimates

In [47]:
run_test_battery(
    est=genmatch_est,
    runs=ae_runs,
    n_samples=1000,
    from_files=True,
    loss_type="reconstruction",
    results_subfolder="AE/Reconstruction",
    verbosity=2)

Results File: AE/Reconstruction/est_genmatch_est_runs_200_n_1000
No valid, existant results found. Beggining battery.

Running:  A_add_lin
Simulation running. Config:
n_samples: 1000
assignment_model: A_add_lin
from_files: True
loss_type: reconstruction
GenMatch Time:  9.870866775512695
Done 20 of 200
Done 40 of 200
Done 60 of 200
Done 80 of 200
Done 100 of 200
Done 120 of 200
Done 140 of 200
Done 160 of 200
Done 180 of 200
Done 200 of 200

RMSE 0.07940586362085975
Bias 0.4982566498005895


Done.

Running:  B_add_mild_nlin
Simulation running. Config:
n_samples: 1000
assignment_model: B_add_mild_nlin
from_files: True
loss_type: reconstruction
GenMatch Time:  10.684785842895508
Done 20 of 200
Done 40 of 200
Done 60 of 200
Done 80 of 200
Done 100 of 200
Done 120 of 200
Done 140 of 200
Done 160 of 200
Done 180 of 200
Done 200 of 200

RMSE 0.08253689905638634
Bias 5.619851855780028


Done.

Running:  C_add_mod_nlin
Simulation running. Config:
n_samples: 1000
assignment_model: C_add_mod_nlin

#### Config 2
Pure reconstruction *without* propensity score estimates

In [None]:
run_test_battery(
    est=genmatch_est,
    runs=ae_runs,
    n_samples=1000,
    from_files=True,
    loss_type="reconstruction",
    results_subfolder="AE/Reconstruction/nopropscores",
    genmatch_with_prop_scores=False,
    verbosity=2)

Results File: AE/Reconstruction/nopropscores/est_genmatch_est_runs_200_n_1000
No valid, existant results found. Beggining battery.

Running:  A_add_lin
Simulation running. Config:
n_samples: 1000
assignment_model: A_add_lin
from_files: True
loss_type: reconstruction
Not using prop scores
GenMatch Time:  7.852561950683594
Done 20 of 200
Done 40 of 200
Done 60 of 200
Done 80 of 200
Done 100 of 200
Done 120 of 200
Done 140 of 200
Done 160 of 200
Done 180 of 200
Done 200 of 200

RMSE 0.07368715901276338
Bias 0.5388832593158581


Done.

Running:  B_add_mild_nlin
Simulation running. Config:
n_samples: 1000
assignment_model: B_add_mild_nlin
from_files: True
loss_type: reconstruction
Not using prop scores
GenMatch Time:  12.392183065414429
Done 20 of 200
Done 40 of 200
Done 60 of 200
Done 80 of 200
Done 100 of 200
Done 120 of 200
Done 140 of 200
Done 160 of 200
Done 180 of 200
Done 200 of 200

RMSE 0.0816027177764639
Bias 3.276210486566693


Done.

Running:  C_add_mod_nlin
Simulation running. 

#### Config 3
Pure reconstruction, evaluating balance on uncompressed data, *without* propensity score.

In [None]:
run_test_battery(
    est=genmatch_est,
    runs=ae_runs,
    n_samples=1000,
    from_files=True,
    loss_type="reconstruction",
    results_subfolder="AE/Reconstruction/evalonoriginal",
    evaluate_on_original_covars=True,
    genmatch_with_prop_scores=False,
    verbosity=2)

#### Config 4
Pure reconstruction *with* propensity score derived from uncompressed data. Evaluating on uncompressed.

In [None]:
run_test_battery(
    est=genmatch_est,
    runs=ae_runs,
    n_samples=1000,
    from_files=True,
    loss_type="reconstruction",
    results_subfolder="AE/Reconstruction/evalonoriginal_withp",
    evaluate_on_original_covars=True,
    propensity_on_original_covars=True,
    verbosity=2)

#### Config 5
Pure reconstruction *with* propensity score derived from uncompressed data. Evaluating on compressed.

In [None]:
run_test_battery(
    est=genmatch_est,
    runs=ae_runs,
    n_samples=1000,
    from_files=True,
    loss_type="reconstruction",
    results_subfolder="AE/Reconstruction/withp",
    evaluate_on_original_covars=False,
    propensity_on_original_covars=True,
    verbosity=2)

#### Sparse 

#### Config 1

Sparse reconstruction *without* propensity score.

In [None]:
run_test_battery(
    est=genmatch_est,
    runs=ae_runs,
    n_samples=1000,
    from_files=True,
    loss_type="sparsity",
    results_subfolder="AE/Sparsity",
    genmatch_with_prop_scores=False,
    verbosity=2)

#### Config 2

Sparse with prop scores

In [None]:
run_test_battery(
    est=genmatch_est,
    runs=ae_runs,
    n_samples=1000,
    from_files=True,
    loss_type="sparsity",
    results_subfolder="AE/Sparsity/withp",
    genmatch_with_prop_scores=True,
    propensity_on_original_covars=True,
    verbosity=2)

#### Config 3
Sparse with prop scores, eval on original

In [None]:
run_test_battery(
    est=genmatch_est,
    runs=ae_runs,
    n_samples=1000,
    from_files=True,
    loss_type="sparsity",
    results_subfolder="AE/Sparsity/evalonoriginal_withp",
    genmatch_with_prop_scores=True,
    propensity_on_original_covars=True,
    evaluate_on_original_covars=True,
    verbosity=2)

### Results

In [None]:
gm_results = retrieve_results_dict(gm_combined_name)
lin_results = retrieve_results_dict(linear_combined_name)

gm_ae_recon_results = retrieve_results_dict(
    get_store_name("AE/Reconstruction", assignment_model_names, genmatch_est, ae_runs, 1000))

gm_ae_recon_no_prop_score_results = retrieve_results_dict(
    get_store_name("AE/Reconstruction/nopropscores", assignment_model_names, genmatch_est, ae_runs, 1000))

gm_ae_recon_original_fitness_results = retrieve_results_dict(
    get_store_name("AE/Reconstruction/evalonoriginal", assignment_model_names, genmatch_est, ae_runs, 1000))

gm_ae_recon_original_fitness_with_prop_score_results = retrieve_results_dict(
    get_store_name("AE/Reconstruction/evalonoriginal_withp", assignment_model_names, genmatch_est, ae_runs, 1000))

gm_ae_recon_with_prop_score_results = retrieve_results_dict(
    get_store_name("AE/Reconstruction/withp", assignment_model_names, genmatch_est, ae_runs, 1000))


###

gm_ae_sparse_results = retrieve_results_dict(
    get_store_name("AE/Sparsity", assignment_model_names, genmatch_est, ae_runs, 1000))

gm_ae_sparse_original_fitness_with_prop_score_results = retrieve_results_dict(
    get_store_name("AE/Sparsity/evalonoriginal_withp", assignment_model_names, genmatch_est, ae_runs, 1000))

gm_ae_sparse_with_prop_score_results = retrieve_results_dict(
    get_store_name("AE/Sparsity/withp", assignment_model_names, genmatch_est, ae_runs, 1000))



results = {
    "Linear": lin_results,
    "GenMatch": gm_results,
    "GenMatch AE Recon": gm_ae_recon_results,
    "GenMatch AE Recon, No P Score": gm_ae_recon_no_prop_score_results,
    "GenMatch AE Recon, Org. Fitness": gm_ae_recon_original_fitness_results,
    "GenMatch AE Recon, Org. Fitness, With P": gm_ae_recon_original_fitness_with_prop_score_results,
    "GenMatch AE Recon, With P": gm_ae_recon_with_prop_score_results,
    ###
    "GenMatch AE Sparse": gm_ae_sparse_results,
    "GenMatch AE Sparse, Org. Fitness, With P": gm_ae_sparse_original_fitness_with_prop_score_results,
    "GenMatch AE Sparse, With P": gm_ae_sparse_with_prop_score_results,
}

for model in assignment_model_names:
    print(model, "\n")
    for matching in results.keys():
        print(matching)
        print("RMSE:", np.round(results[matching][model]["RMSE"], 4), "Bias:",
              np.round(results[matching][model]["Bias"], 4))
        
    print("==============")
    print()
    

### VAE

Learning a latent variable representation under an assumption of a generative process. Highly non-linear clustering. 

- Mean field vs amortized inference for the latent variables? We get a different latent distribution for each data point. We hope that distributions near to each other are similar in terms of underlying generation process.

- Train on which data?
  - Only treated? Then encode for similarity?
  - On both?
  
  
- Semi-supervised?
  - Conceptually similar to training on one class. 
  - We give it information to separate the classes such that similar ones after this have greater weight.

Matching Metric?
- Mahalanobis - distance from distro to point or two points in same distro
- Bhattacharyya distance - distance between distributions (overlaps)
- KL Divergence
- Direct matching on the latent vars (with GenMatch)


Todo:
 - Try training on only the treated data
 - Try semi-supervised


In [112]:
run_test_battery(
    est=genmatch_est,
    runs=80,
    n_samples=1000,
    from_files=True,
    loss_type="vae",
    results_subfolder="VAE/",
    genmatch_with_prop_scores=False,
    verbosity=2)

Results File: VAE//est_genmatch_est_runs_80_n_1000
No valid, existant results found. Beggining battery.

Running:  A_add_lin
Simulation running. Config:
n_samples: 1000
assignment_model: A_add_lin
from_files: True
loss_type: vae
Not using prop scores
GenMatch Time:  5.832606554031372
Done 8 of 80
Done 16 of 80
Done 24 of 80
Done 32 of 80
Done 40 of 80
Done 48 of 80
Done 56 of 80
Done 64 of 80
Done 72 of 80
Done 80 of 80

RMSE 0.0723205572405488
Bias 9.106490903045117


Done.

Running:  B_add_mild_nlin
Simulation running. Config:
n_samples: 1000
assignment_model: B_add_mild_nlin
from_files: True
loss_type: vae
Not using prop scores
GenMatch Time:  7.0213463306427
Done 8 of 80
Done 16 of 80
Done 24 of 80
Done 32 of 80
Done 40 of 80
Done 48 of 80
Done 56 of 80
Done 64 of 80
Done 72 of 80
Done 80 of 80

RMSE 0.07910459036883763
Bias 10.795659318918672


Done.

Running:  C_add_mod_nlin
Simulation running. Config:
n_samples: 1000
assignment_model: C_add_mod_nlin
from_files: True
loss_type: v

In [113]:
run_test_battery(
    est=genmatch_est,
    runs=80,
    n_samples=1000,
    from_files=True,
    loss_type="vae",
    results_subfolder="VAE/withp",
    genmatch_with_prop_scores=True,
    propensity_on_original_covars=True,
    verbosity=2)

Results File: VAE/withp/est_genmatch_est_runs_80_n_1000
No valid, existant results found. Beggining battery.

Running:  A_add_lin
Simulation running. Config:
n_samples: 1000
assignment_model: A_add_lin
from_files: True
loss_type: vae
Finding propensity scores with custom vars
GenMatch Time:  8.643538475036621
Done 8 of 80
Done 16 of 80
Done 24 of 80
Done 32 of 80
Done 40 of 80
Done 48 of 80
Done 56 of 80
Done 64 of 80
Done 72 of 80
Done 80 of 80

RMSE 0.05328860725062369
Bias 3.212292740875502


Done.

Running:  B_add_mild_nlin
Simulation running. Config:
n_samples: 1000
assignment_model: B_add_mild_nlin
from_files: True
loss_type: vae
Finding propensity scores with custom vars
GenMatch Time:  7.035321235656738
Done 8 of 80
Done 16 of 80
Done 24 of 80
Done 32 of 80
Done 40 of 80
Done 48 of 80
Done 56 of 80
Done 64 of 80
Done 72 of 80
Done 80 of 80

RMSE 0.05548976643655791
Bias 3.056263112295894


Done.

Running:  C_add_mod_nlin
Simulation running. Config:
n_samples: 1000
assignment_mo

In [35]:
run_test_battery(
    est=genmatch_est,
    runs=80,
    n_samples=1000,
    from_files=True,
    loss_type="vae",
    results_subfolder="VAE/evalonoriginal_withp",
    genmatch_with_prop_scores=True,
    propensity_on_original_covars=True,
    evaluate_on_original_covars=True,
    verbosity=2)

Results File: VAE/evalonoriginal_withp/est_genmatch_est_runs_80_n_1000
Displaying cached results.

Results
Model:  A_add_lin
1 Bias:  2.423558103697892
1 RMSE:  0.04828732501317596 

Model:  B_add_mild_nlin
1 Bias:  3.2575472113723536
1 RMSE:  0.05197959450036149 

Model:  C_add_mod_nlin
1 Bias:  1.1675734397168027
1 RMSE:  0.053871266715644614 

Model:  D_mild_nadd_lin
1 Bias:  2.638277840091008
1 RMSE:  0.04792808356505064 

Model:  E_mild_nadd_mild_nlin
1 Bias:  1.6225481932649424
1 RMSE:  0.05501783586090174 

Model:  F_mod_nadd_lin
1 Bias:  2.9046458491052567
1 RMSE:  0.057462759930764475 

Model:  G_mod_nadd_mod_nlin
1 Bias:  1.0888323352423446
1 RMSE:  0.05458601539759988 



#### MD Distance

#### Config 1

MD distance plain

In [36]:
run_test_battery(
    est=mahalanobis_matching,
    runs=80,
    n_samples=1000,
    from_files=True,
    loss_type="vae",
    results_subfolder="VAE/md",
    verbosity=2)

Results File: VAE/md/est_mahalanobis_matching_runs_80_n_1000
No valid, existant results found. Beggining battery.

Running:  A_add_lin
Simulation running. Config:
n_samples: 1000
assignment_model: A_add_lin
from_files: True
loss_type: vae
Mahalanobis D. time:  5.35 seconds
Mahalanobis D. time:  5.45 seconds
Mahalanobis D. time:  5.56 seconds
Mahalanobis D. time:  5.09 seconds
Mahalanobis D. time:  5.62 seconds
Mahalanobis D. time:  6.9 seconds
Mahalanobis D. time:  6.61 seconds
Mahalanobis D. time:  5.62 seconds
Done 8 of 80
Mahalanobis D. time:  5.18 seconds
Mahalanobis D. time:  4.88 seconds
Mahalanobis D. time:  5.07 seconds
Mahalanobis D. time:  5.22 seconds
Mahalanobis D. time:  5.13 seconds
Mahalanobis D. time:  5.26 seconds
Mahalanobis D. time:  5.05 seconds
Mahalanobis D. time:  4.91 seconds
Done 16 of 80
Mahalanobis D. time:  5.08 seconds
Mahalanobis D. time:  4.93 seconds
Mahalanobis D. time:  5.09 seconds
Mahalanobis D. time:  4.95 seconds
Mahalanobis D. time:  5.08 seconds


Mahalanobis D. time:  5.21 seconds
Done 48 of 80
Mahalanobis D. time:  5.3 seconds
Mahalanobis D. time:  5.43 seconds
Mahalanobis D. time:  4.92 seconds
Mahalanobis D. time:  4.96 seconds
Mahalanobis D. time:  5.32 seconds
Mahalanobis D. time:  5.15 seconds
Mahalanobis D. time:  5.56 seconds
Mahalanobis D. time:  5.08 seconds
Done 56 of 80
Mahalanobis D. time:  4.91 seconds
Mahalanobis D. time:  4.98 seconds
Mahalanobis D. time:  5.23 seconds
Mahalanobis D. time:  5.25 seconds
Mahalanobis D. time:  5.42 seconds
Mahalanobis D. time:  5.05 seconds
Mahalanobis D. time:  5.07 seconds
Mahalanobis D. time:  5.0 seconds
Done 64 of 80
Mahalanobis D. time:  5.54 seconds
Mahalanobis D. time:  5.27 seconds
Mahalanobis D. time:  5.27 seconds
Mahalanobis D. time:  5.25 seconds
Mahalanobis D. time:  5.12 seconds
Mahalanobis D. time:  5.12 seconds
Mahalanobis D. time:  5.29 seconds
Mahalanobis D. time:  5.17 seconds
Done 72 of 80
Mahalanobis D. time:  5.23 seconds
Mahalanobis D. time:  5.54 seconds
M

Mahalanobis D. time:  5.2 seconds
Mahalanobis D. time:  5.26 seconds
Done 16 of 80
Mahalanobis D. time:  5.14 seconds
Mahalanobis D. time:  6.13 seconds
Mahalanobis D. time:  5.22 seconds
Mahalanobis D. time:  5.3 seconds
Mahalanobis D. time:  5.35 seconds
Mahalanobis D. time:  5.69 seconds
Mahalanobis D. time:  5.79 seconds
Mahalanobis D. time:  5.21 seconds
Done 24 of 80
Mahalanobis D. time:  5.36 seconds
Mahalanobis D. time:  5.4 seconds
Mahalanobis D. time:  5.55 seconds
Mahalanobis D. time:  5.98 seconds
Mahalanobis D. time:  6.07 seconds
Mahalanobis D. time:  6.24 seconds
Mahalanobis D. time:  6.51 seconds
Mahalanobis D. time:  6.03 seconds
Done 32 of 80
Mahalanobis D. time:  7.03 seconds
Mahalanobis D. time:  5.95 seconds
Mahalanobis D. time:  4.87 seconds
Mahalanobis D. time:  5.09 seconds
Mahalanobis D. time:  5.1 seconds
Mahalanobis D. time:  6.23 seconds
Mahalanobis D. time:  6.16 seconds
Mahalanobis D. time:  5.27 seconds
Done 40 of 80
Mahalanobis D. time:  5.68 seconds
Mah

#### Config 2
MD distance with propensity scores, eval on original

In [45]:
run_test_battery(
    est=mahalanobis_matching,
    runs=80,
    n_samples=1000,
    from_files=True,
    loss_type="vae",
    results_subfolder="VAE/md_withp",
    md_with_prop_scores=True,
    propensity_on_original_covars=True,
    verbosity=2)

Results File: VAE/md_withp/est_mahalanobis_matching_runs_80_n_1000
No valid, existant results found. Beggining battery.

Running:  A_add_lin
Simulation running. Config:
n_samples: 1000
assignment_model: A_add_lin
from_files: True
loss_type: vae
Finding propensity scores with custom vars
Mahalanobis D. time:  4.12 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.18 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.2 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.13 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.23 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.15 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.31 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.19 seconds
Done 8 of 80
Finding propensity scores with custom vars
Mahalanobis D. time:  4.33 seconds
Finding propensity scores with custom var

Mahalanobis D. time:  5.22 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  5.19 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  5.73 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  5.82 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  6.0 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  5.37 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  5.47 seconds
Done 24 of 80
Finding propensity scores with custom vars
Mahalanobis D. time:  5.17 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  5.11 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  5.18 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  5.17 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  5.18 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  5.15 seconds
Finding propensi

Mahalanobis D. time:  4.14 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.06 seconds
Done 40 of 80
Finding propensity scores with custom vars
Mahalanobis D. time:  4.11 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.2 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.03 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.15 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.12 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.16 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.18 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.19 seconds
Done 48 of 80
Finding propensity scores with custom vars
Mahalanobis D. time:  4.3 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.19 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.09 seconds
Fin

Mahalanobis D. time:  4.25 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.12 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.08 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.24 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.15 seconds
Done 64 of 80
Finding propensity scores with custom vars
Mahalanobis D. time:  4.29 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.25 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.09 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.16 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.12 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.26 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.09 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.19 seconds
Done 72 of 80
F

Mahalanobis D. time:  4.13 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.02 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  3.99 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.1 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.04 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.15 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.07 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  3.96 seconds
Done 8 of 80
Finding propensity scores with custom vars
Mahalanobis D. time:  4.59 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.16 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.1 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.12 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.04 seconds
Finding propensity

Mahalanobis D. time:  4.12 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.1 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.14 seconds
Done 24 of 80
Finding propensity scores with custom vars
Mahalanobis D. time:  4.08 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.09 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  3.97 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.13 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.05 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.11 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.1 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.13 seconds
Done 32 of 80
Finding propensity scores with custom vars
Mahalanobis D. time:  4.12 seconds
Finding propensity scores with custom vars
Mahalanobis D. time:  4.12 seconds
Fin

#### Config 3

**Bhattacharyya Distance** distance

In [66]:
bhat_runs = 100
# files = np.random.choice(list(range(100)), bhat_runs, replace=False)

run_test_battery(
    est=mahalanobis_matching,
    runs=bhat_runs,
    n_samples=1000,
    from_files=True,
    loss_type="vae",
    results_subfolder="VAE/bhat",
    distance_metric="bhat",
    verbosity=2)

Results File: VAE/bhat/est_mahalanobis_matching_runs_100_n_1000
No valid, existant results found. Beggining battery.

Running:  A_add_lin
Simulation running. Config:
n_samples: 1000
assignment_model: A_add_lin
from_files: True
loss_type: vae
Mahalanobis D. time:  11.44 seconds
Done 10 of 100
Done 20 of 100
Done 30 of 100
Done 40 of 100
Done 50 of 100
Done 60 of 100
Done 70 of 100
Done 80 of 100
Done 90 of 100
Done 100 of 100

RMSE 0.07975029888054605
Bias 12.037355157225393


Done.

Running:  B_add_mild_nlin
Simulation running. Config:
n_samples: 1000
assignment_model: B_add_mild_nlin
from_files: True
loss_type: vae
Mahalanobis D. time:  11.94 seconds
Done 10 of 100
Done 20 of 100
Done 30 of 100
Done 40 of 100
Done 50 of 100
Done 60 of 100
Done 70 of 100
Done 80 of 100
Done 90 of 100
Done 100 of 100

RMSE 0.09079294477803893
Bias 13.685622183383138


Done.

Running:  C_add_mod_nlin
Simulation running. Config:
n_samples: 1000
assignment_model: C_add_mod_nlin
from_files: True
loss_type: 