In [1]:
#!pip install -e ../Maccabee > /dev/null

In [2]:
# %load_ext autoreload
# %autoreload 2

In [3]:
from maccabee.data_analysis.data_metrics import AXES_AND_METRIC_NAMES
from maccabee.constants import Constants
import maccabee.data_sources as data_sources
from maccabee.parameters import build_default_parameters

from maccabee.data_generation import DataGeneratingProcess, SampledDataGeneratingProcess, data_generating_method

# benchmarks
from maccabee.benchmarking import benchmark_model_using_concrete_dgp
from maccabee.benchmarking import benchmark_model_using_sampled_dgp
from maccabee.benchmarking import benchmark_model_using_sampled_dgp_grid

# genmatch
from maccabee.examples.genmatch import GenmatchDataGeneratingProcess, build_genmatch_datasource, GENMATCH_SPECS
from maccabee.examples.genmatch import LogisticPropensityMatchingCausalModel

In [4]:
import numpy as np
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
BASE_GENMATCH_DGP_PARAMS = {
  "OUTCOME_MECHANISM_COVARIATE_SELECTION_PROBABILITY": { 
        "LINEAR": 0.7,
        "POLY_QUAD": 0,
        "POLY_CUBIC": 0,
        "STEP_JUMP": 0,
        "STEP_KINK": 0,
        "INTERACTION_TWO_WAY": 0,
        "INTERACTION_THREE_WAY": 0,
  },
  "TREATMENT_EFFECT_HETEROGENEITY": 0,
  "ACTUAL_CONFOUNDER_ALIGNMENT": 0,
  "MIN_PROPENSITY_SCORE": 0.05,
  "MAX_PROPENSITY_SCORE": 0.95,
  "OUTCOME_NOISE_TAIL_THICKNESS": 1000,
  "SUBFUNCTION_CONSTANT_TAIL_THICKNESS": 1000,
  "TREATMENT_EFFECT_TAIL_THICKNESS": 1000
}

A_param_overrides = BASE_GENMATCH_DGP_PARAMS.copy()
A_param_overrides["TREAT_MECHANISM_COVARIATE_SELECTION_PROBABILITY"] = { 
    "LINEAR": 0.7,
    "POLY_QUAD": 0,
    "POLY_CUBIC": 0,
    "STEP_JUMP": 0,
    "STEP_KINK": 0,
    "INTERACTION_TWO_WAY": 0.0,
    "INTERACTION_THREE_WAY": 0,
}
  
C_param_overrides = BASE_GENMATCH_DGP_PARAMS.copy()
C_param_overrides["TREAT_MECHANISM_COVARIATE_SELECTION_PROBABILITY"] = { 
    "LINEAR": 0.7,
    "POLY_QUAD": 0.3,
    "POLY_CUBIC": 0,
    "STEP_JUMP": 0,
    "STEP_KINK": 0,
    "INTERACTION_TWO_WAY": 0.0,
    "INTERACTION_THREE_WAY": 0,
}

G_param_overrides = BASE_GENMATCH_DGP_PARAMS.copy()
G_param_overrides["TREAT_MECHANISM_COVARIATE_SELECTION_PROBABILITY"] = { 
    "LINEAR": 0.7,
    "POLY_QUAD": 0.75,
    "POLY_CUBIC": 0,
    "STEP_JUMP": 0,
    "STEP_KINK": 0,
    "INTERACTION_TWO_WAY": 0.1,
    "INTERACTION_THREE_WAY": 0,
}

X_param_overrides = BASE_GENMATCH_DGP_PARAMS.copy()
X_param_overrides["TREAT_MECHANISM_COVARIATE_SELECTION_PROBABILITY"] = { 
    "LINEAR": 0.7,
    "POLY_QUAD": 0.99,
    "POLY_CUBIC": 0.99,
    "STEP_JUMP": 0,
    "STEP_KINK": 0,
    "INTERACTION_TWO_WAY": 0.99,
    "INTERACTION_THREE_WAY": 0,
}

In [6]:
# Define axes and metrics to analyze
DATA_METRICS_SPEC = {
    Constants.AxisNames.OUTCOME_NONLINEARITY: [
        "Lin r2(X_obs, Y)",
        "Lin r2(X_true, Y)"
    ],
    Constants.AxisNames.TREATMENT_NONLINEARITY: [
        "Lin r2(X_obs, Treat Logit)",
        "Lin r2(X_true, Treat Logit)",
        "Log r2(X_obs, T)"
    ],
    Constants.AxisNames.PERCENT_TREATED: [
        "Percent(T==1)"
    ],
    Constants.AxisNames.BALANCE: [
        "Wass dist X_true: T=1<->T=0",
    ],
    Constants.AxisNames.ALIGNMENT: [
        "Lin r2(Y, Treat Logit)",
        "Lin r2(Y0, Treat Logit)"
    ],
    Constants.AxisNames.TE_HETEROGENEITY: [
        "std(TE)/std(Y)"
    ]
}

In [7]:
N_CORES = 7
N_THREADS = 1

## Run MC Benchmarks

In [8]:
 %%time

CONCRETE_IN_ANALYSIS_MODE = True

# Concrete Genmatch Benchmark
dgp = GenmatchDataGeneratingProcess(
    *GENMATCH_SPECS["G"],
    n_observations=1000,
    data_analysis_mode=CONCRETE_IN_ANALYSIS_MODE)

concrete_perf_agg, concrete_perf_raw, concrete_data_agg, concrete_data_raw = \
    benchmark_model_using_concrete_dgp(
        dgp=dgp,
        model_class=LogisticPropensityMatchingCausalModel,
        estimand=Constants.Model.ATT_ESTIMAND,
        num_sampling_runs_per_dgp=20,
        num_samples_from_dgp=63,
        data_analysis_mode=CONCRETE_IN_ANALYSIS_MODE,
        data_metrics_spec=DATA_METRICS_SPEC,
        n_jobs=N_CORES,
        n_threads=N_THREADS)

Process ForkPoolWorker-1010:
From cffi callback <function _processevents at 0x7f922dc3b620>:
Process ForkPoolWorker-1012:
Process ForkPoolWorker-1006:
Process ForkPoolWorker-1013:
Process ForkPoolWorker-1005:
  File "/home/jovyan/work/Maccabee/maccabee/data_generation/data_generating_process.py", line 177, in generate_dataset
    self._generate_transformed_covars()
Process ForkPoolWorker-1011:
Process ForkPoolWorker-1009:
Process ForkPoolWorker-1007:
Process ForkPoolWorker-1008:
Traceback (most recent call last):
From cffi callback <function _processevents at 0x7f922dc3b620>:
  File "/opt/conda/lib/python3.7/site-packages/rpy2/rinterface_lib/callbacks.py", line 262, in _processevents
Traceback (most recent call last):
    @ffi_proxy.callback(ffi_proxy._processevents_def,
  File "/opt/conda/lib/python3.7/site-packages/rpy2/rinterface_lib/callbacks.py", line 262, in _processevents
    @ffi_proxy.callback(ffi_proxy._processevents_def,
Traceback (most recent call last):
Traceback (most rec

  File "/home/jovyan/work/Maccabee/maccabee/data_generation/data_generating_process.py", line 84, in call
    val = wrapper.func(dgp, required_var_vals, *args, **kwargs)
  File "/home/jovyan/work/Maccabee/maccabee/data_generation/data_generating_process.py", line 84, in call
    val = wrapper.func(dgp, required_var_vals, *args, **kwargs)
  File "/home/jovyan/work/Maccabee/maccabee/examples/genmatch.py", line 133, in _generate_observed_covars
    return self.data_source.get_covar_df()
  File "/home/jovyan/work/Maccabee/maccabee/benchmarking/benchmarking.py", line 45, in _gen_data_and_apply_model
    dataset = dgp.generate_dataset()
  File "/home/jovyan/work/Maccabee/maccabee/examples/genmatch.py", line 162, in _generate_true_propensity_scores
    observed_covariate_data)
  File "/home/jovyan/work/Maccabee/maccabee/examples/genmatch.py", line 162, in _generate_true_propensity_scores
    observed_covariate_data)
  File "/home/jovyan/work/Maccabee/maccabee/examples/genmatch.py", line 154, 

  File "/opt/conda/lib/python3.7/site-packages/pandas/core/computation/check.py", line 3, in <module>
    ne = import_optional_dependency("numexpr", raise_on_missing=False, on_version="warn")
  File "/opt/conda/lib/python3.7/site-packages/sympy/core/basic.py", line 1182, in xreplace
    value, _ = self._xreplace(rule)
  File "/opt/conda/lib/python3.7/site-packages/numpy/core/_ufunc_config.py", line 186, in geterr
    res['invalid'] = _errdict_rev[val]
  File "<__array_function__ internals>", line 6, in amin
  File "/opt/conda/lib/python3.7/site-packages/sympy/printing/printer.py", line 287, in _print
    return getattr(self, printmethod)(expr, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/sympy/core/basic.py", line 1204, in _xreplace
    return self.func(*args), True
  File "/opt/conda/lib/python3.7/site-packages/pandas/compat/_optional.py", line 90, in import_optional_dependency
    module = importlib.import_module(name)
  File "/opt/conda/lib/python3.7/site-packages/numpy/

Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/multiprocessing/pool.py", line 733, in next
    item = self._items.popleft()
IndexError: pop from an empty deque

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/magics/execution.py", line 1310, in time
    exec(code, glob, local_ns)
  File "<timed exec>", line 19, in <module>
  File "/home/jovyan/work/Maccabee/maccabee/benchmarking/benchmarking.py", line 175, in benchmark_model_using_concrete_dgp
    run_model_on_dgp, sample_indeces):
  File "/opt/conda/lib/python3.7/multiprocessing/pool.py", line 737, in next
    self._cond.wait(timeout)
  File "/opt/conda/lib/python3.7/threading.py", line 296, in wait
    waiter.acquire()
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/IPython/core/

TypeError: can only concatenate str (not "list") to str

In [None]:
# TRUE_TREAT_LOGIT = dgp.treatment_logit_expression

In [None]:
%%time

SAMPLED_IN_ANALYSIS_MODE = True

from maccabee.data_generation.utils import evaluate_expression
# Sampled Genmatch Benchmark
class ModifiedNoiseSampledDataGeneratingProcess(SampledDataGeneratingProcess):
    @data_generating_method(Constants.DGPVariables.OUTCOME_NOISE_NAME, [])
    def _generate_outcome_noise_samples(self, input_vars):
        return 0
    
    @data_generating_method(Constants.DGPVariables.TREATMENT_EFFECT_NAME, [])
    def _generate_treatment_effects(self, input_vars):
        return -0.4
    
#     @data_generating_method(Constants.DGPVariables.PROPENSITY_SCORE_NAME,
#                             [Constants.DGPVariables.COVARIATES_NAME])
#     def _generate_true_propensity_scores(self, input_vars):
#         observed_covariate_data = input_vars[Constants.DGPVariables.COVARIATES_NAME]

#         logits = evaluate_expression(
#             TRUE_TREAT_LOGIT,
#             observed_covariate_data)

#         return 1/(1 + np.exp(-1*logits))
    
dgp_params = build_default_parameters()
dgp_params.set_parameters(G_param_overrides)

sampled_perf_agg, sampled_perf_raw, sampled_perf_run_raw, sampled_data_agg, sampled_data_raw = \
    benchmark_model_using_sampled_dgp(
        dgp_params, build_genmatch_datasource(),
        dgp_kwargs={"data_analysis_mode": SAMPLED_IN_ANALYSIS_MODE},
        dgp_class=ModifiedNoiseSampledDataGeneratingProcess,
        model_class=LogisticPropensityMatchingCausalModel,
        estimand=Constants.Model.ATE_ESTIMAND,
        num_dgp_samples=49,
        num_sampling_runs_per_dgp=20,
        num_samples_from_dgp=63,
        data_analysis_mode=SAMPLED_IN_ANALYSIS_MODE,
        data_metrics_spec=DATA_METRICS_SPEC,
        n_jobs=N_CORES,
        n_threads=N_THREADS)

## Data Metric Comparison

In [None]:
if SAMPLED_IN_ANALYSIS_MODE and CONCRETE_IN_ANALYSIS_MODE:
    for axes, metrics in DATA_METRICS_SPEC.items():
        print(axes)
        for metric in metrics:
            print("\t", metric)

            key = f"{axes} {metric}"
            print("\t\tConcrete:", np.round(concrete_data_agg[key], 3))
            print("\t\tSampled:", np.round(sampled_data_agg[key], 3))
    
    for name, vals in sampled_data_raw.items():
        if name.startswith("TREATMENT_NONLINEARITY"):
            print(name, vals)

## Performance Comparison

Notes

At config A, 20-50 matches across runs. Wide distro. Should investigate the stability of the concrete distro when n=1000.

At config G, 20-50 has much lower bias in the sampled benchmark. The concrete function appears to be a worst case scenario given that very similar functions experience much lower typical bias. The distribution is right tailed and so it is possible that the concrete func chosen is just an (un)lucky sample from right tail.

I will test this by permuting the variable order in the exact same functional form.

### Concrete Performance

In [None]:
concrete_perf_agg

In [None]:
plt.hist(concrete_perf_raw["AMBP"], density=True)
plt.show()

### Sampled Performance

In [None]:
sampled_perf_agg

In [None]:
plt.hist(sampled_perf_raw["AMBP"], density=True)
plt.show()

In [None]:
flat_list = [item for sublist in sampled_perf_run_raw["AMBP"] for item in sublist]
plt.hist(flat_list)
plt.show()

In [None]:
sampled_perf_run_raw["AMBP"]

### Exploratory Analysis

In [None]:
# data = np.array(sampled_perf_raw["absolute mean bias %"])
# clean_data = data[data < 20]
# plt.hist(clean_data)
# plt.show()
# np.mean(clean_data)

In [None]:
# plt.scatter(
#     np.array(sampled_data_raw["TREATMENT_NONLINEARITY Lin r2(X_obs, Treat Logit)"])[data < 8],
#     np.array(sampled_perf_raw["absolute mean bias %"])[data < 8])

In [None]:
# plt.scatter(
#     np.array(sampled_data_raw["TREATMENT_NONLINEARITY Lin r2(X_obs, Treat Logit)"])[data < 8],
#     np.array(sampled_perf_raw["absolute mean bias %"])[data < 8],
#     c="b")

# plt.scatter(
#     np.array(concrete_data_raw["TREATMENT_NONLINEARITY Lin r2(X_obs, Treat Logit)"]),
#     np.array(concrete_perf_raw["absolute mean bias %"]),
#     c="r")