In [1]:
#!pip install -e ../Maccabee > /dev/null

In [2]:
# %load_ext autoreload
# %autoreload 2

In [3]:
from maccabee.data_analysis.data_metrics import AXES_AND_METRIC_NAMES
from maccabee.constants import Constants
import maccabee.data_sources as data_sources
from maccabee.parameters import build_default_parameters

from maccabee.data_generation import DataGeneratingProcess, SampledDataGeneratingProcess, data_generating_method

# benchmarks
from maccabee.benchmarking import benchmark_model_using_concrete_dgp
from maccabee.benchmarking import benchmark_model_using_sampled_dgp
from maccabee.benchmarking import benchmark_model_using_sampled_dgp_grid

# genmatch
from maccabee.examples.genmatch import GenmatchDataGeneratingProcess, build_genmatch_datasource, GENMATCH_SPECS
from maccabee.examples.genmatch import LogisticPropensityMatchingCausalModel

In [4]:
import numpy as np
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
BASE_GENMATCH_DGP_PARAMS = {
  "OUTCOME_MECHANISM_COVARIATE_SELECTION_PROBABILITY": { 
        "LINEAR": 0.7,
        "POLY_QUAD": 0,
        "POLY_CUBIC": 0,
        "STEP_JUMP": 0,
        "STEP_KINK": 0,
        "INTERACTION_TWO_WAY": 0,
        "INTERACTION_THREE_WAY": 0,
  },
  "TREATMENT_EFFECT_HETEROGENEITY": 0,
  "ACTUAL_CONFOUNDER_ALIGNMENT": 0,
  "MIN_PROPENSITY_SCORE": 0.05,
  "MAX_PROPENSITY_SCORE": 0.95,
  "OUTCOME_NOISE_TAIL_THICKNESS": 1000,
  "SUBFUNCTION_CONSTANT_TAIL_THICKNESS": 1000,
  "TREATMENT_EFFECT_TAIL_THICKNESS": 1000
}

A_param_overrides = BASE_GENMATCH_DGP_PARAMS.copy()
A_param_overrides["TREAT_MECHANISM_COVARIATE_SELECTION_PROBABILITY"] = { 
    "LINEAR": 0.7,
    "POLY_QUAD": 0,
    "POLY_CUBIC": 0,
    "STEP_JUMP": 0,
    "STEP_KINK": 0,
    "INTERACTION_TWO_WAY": 0.0,
    "INTERACTION_THREE_WAY": 0,
}
  
C_param_overrides = BASE_GENMATCH_DGP_PARAMS.copy()
C_param_overrides["TREAT_MECHANISM_COVARIATE_SELECTION_PROBABILITY"] = { 
    "LINEAR": 0.7,
    "POLY_QUAD": 0.3,
    "POLY_CUBIC": 0,
    "STEP_JUMP": 0,
    "STEP_KINK": 0,
    "INTERACTION_TWO_WAY": 0.0,
    "INTERACTION_THREE_WAY": 0,
}

G_param_overrides = BASE_GENMATCH_DGP_PARAMS.copy()
G_param_overrides["TREAT_MECHANISM_COVARIATE_SELECTION_PROBABILITY"] = { 
    "LINEAR": 0.7,
    "POLY_QUAD": 0.75,
    "POLY_CUBIC": 0,
    "STEP_JUMP": 0,
    "STEP_KINK": 0,
    "INTERACTION_TWO_WAY": 0.1,
    "INTERACTION_THREE_WAY": 0,
}

X_param_overrides = BASE_GENMATCH_DGP_PARAMS.copy()
X_param_overrides["TREAT_MECHANISM_COVARIATE_SELECTION_PROBABILITY"] = { 
    "LINEAR": 0.7,
    "POLY_QUAD": 0.99,
    "POLY_CUBIC": 0.99,
    "STEP_JUMP": 0,
    "STEP_KINK": 0,
    "INTERACTION_TWO_WAY": 0.99,
    "INTERACTION_THREE_WAY": 0,
}

In [6]:
# Define axes and metrics to analyze
DATA_METRICS_SPEC = {
    Constants.AxisNames.OUTCOME_NONLINEARITY: [
        "Lin r2(X_obs, Y)",
        "Lin r2(X_true, Y)"
    ],
    Constants.AxisNames.TREATMENT_NONLINEARITY: [
        "Lin r2(X_obs, Treat Logit)",
        "Lin r2(X_true, Treat Logit)",
        "Log r2(X_obs, T)"
    ],
    Constants.AxisNames.PERCENT_TREATED: [
        "Percent(T==1)"
    ],
    Constants.AxisNames.BALANCE: [
        "Wass dist X_true: T=1<->T=0",
    ],
    Constants.AxisNames.ALIGNMENT: [
        "Lin r2(Y, Treat Logit)",
        "Lin r2(Y0, Treat Logit)"
    ],
    Constants.AxisNames.TE_HETEROGENEITY: [
        "std(TE)/std(Y)"
    ]
}

In [7]:
N_CORES = 16
N_THREADS=3

## Run MC Benchmarks

In [8]:
#  %%time

# CONCRETE_IN_ANALYSIS_MODE = True

# # Concrete Genmatch Benchmark
# dgp = GenmatchDataGeneratingProcess(
#     *GENMATCH_SPECS["G"],
#     n_observations=1000,
#     data_analysis_mode=CONCRETE_IN_ANALYSIS_MODE)

# concrete_perf_agg, concrete_perf_raw, concrete_data_agg, concrete_data_raw = \
#     benchmark_model_using_concrete_dgp(
#         dgp=dgp,
#         model_class=LogisticPropensityMatchingCausalModel,
#         estimand=Constants.Model.ATT_ESTIMAND,
#         num_sampling_runs_per_dgp=10,
#         num_samples_from_dgp=20,
#         data_analysis_mode=CONCRETE_IN_ANALYSIS_MODE,
#         data_metrics_spec=DATA_METRICS_SPEC,
#         n_jobs=N_CORES,
#         n_threads=1)

In [9]:
# TRUE_TREAT_LOGIT = dgp.treatment_logit_expression

In [None]:
%%time

SAMPLED_IN_ANALYSIS_MODE = True

from maccabee.data_generation.utils import evaluate_expression
# Sampled Genmatch Benchmark
class ModifiedNoiseSampledDataGeneratingProcess(SampledDataGeneratingProcess):
    @data_generating_method(Constants.DGPVariables.OUTCOME_NOISE_NAME, [])
    def _generate_outcome_noise_samples(self, input_vars):
        return 0
    
    @data_generating_method(Constants.DGPVariables.TREATMENT_EFFECT_NAME, [])
    def _generate_treatment_effects(self, input_vars):
        return -0.4
    
#     @data_generating_method(Constants.DGPVariables.PROPENSITY_SCORE_NAME,
#                             [Constants.DGPVariables.COVARIATES_NAME])
#     def _generate_true_propensity_scores(self, input_vars):
#         observed_covariate_data = input_vars[Constants.DGPVariables.COVARIATES_NAME]

#         logits = evaluate_expression(
#             TRUE_TREAT_LOGIT,
#             observed_covariate_data)

#         return 1/(1 + np.exp(-1*logits))
    
dgp_params = build_default_parameters()
dgp_params.set_parameters(G_param_overrides)

sampled_perf_agg, sampled_perf_raw, sampled_perf_run_raw, sampled_data_agg, sampled_data_raw = \
    benchmark_model_using_sampled_dgp(
        dgp_params, build_genmatch_datasource(),
        dgp_kwargs={"data_analysis_mode": SAMPLED_IN_ANALYSIS_MODE},
        dgp_class=ModifiedNoiseSampledDataGeneratingProcess,
        model_class=LogisticPropensityMatchingCausalModel,
        estimand=Constants.Model.ATE_ESTIMAND,
        num_dgp_samples=1,
        num_sampling_runs_per_dgp=10,
        num_samples_from_dgp=28,
        data_analysis_mode=SAMPLED_IN_ANALYSIS_MODE,
        data_metrics_spec=DATA_METRICS_SPEC,
        n_jobs=7,
        n_threads=1)

Compiling
1/(exp(-0.510057150755267*X0 - 0.360078406363953*X1 + 0.641470479318533*X10**2 - 0.539957443998051*X10*X2 - 0.61792603844522*X10*X8 - 0.746686302806717*X10 - 0.364746464672264*X2**2 - 0.0641805654395704*X2 - 0.608488473559081*X4**2 - 0.277075933248411*X4*X6 + 0.113661495923376*X4 - 0.51011913179712*X5*X7 + 0.675149916823819*X5 + 0.551812043003241*X7**2 + 0.129513333454906*X8) + 1)
Done compiling
Compiling
0.515075327636745*X1 - 0.750650360922654*X10 + 0.310578842690503*X2 + 0.579017550273551*X3 - 0.00441198750731474*X4 - 1.05081201500435*X5 - 0.105769607604711*X7 - 0.0563909861306566*X8 + 0.656034529580096
Done compiling
Done sampling DGP 1
Starting DGP 1/1
0.510057150755267*X0 + 0.360078406363953*X1 - 0.641470479318533*X10**2 + 0.539957443998051*X10*X2 + 0.61792603844522*X10*X8 + 0.746686302806717*X10 + 0.364746464672264*X2**2 + 0.0641805654395704*X2 + 0.608488473559081*X4**2 + 0.277075933248411*X4*X6 - 0.113661495923376*X4 + 0.51011913179712*X5*X7 - 0.675149916823819*X5 - 0

Generating data
Importing compiled module.
Importing compiled module.
Generating data
Importing compiled module.
Executing compiled code
Done executing compiled code
Executing compiled code
Done executing compiled code
Executing compiled code
1000 1000
Done executing compiled code
1000 1000
Importing compiled module.
Generating data
1000 1000
Generating data
Importing compiled module.
Executing compiled code
Done executing compiled code
Importing compiled module.
Fitting model to data
Executing compiled code
Done executing compiled code
Executing compiled code
Fitting model to data
Importing compiled module.
Done executing compiled code
Executing compiled code
Importing compiled module.
Fitting model to data
Done executing compiled code
Collecting estimand from model
1000 1000
Executing compiled code
Importing compiled module.
Importing compiled module.
Collecting estimand from model
Done executing compiled code
1000 1000
Executing compiled code
Done executing compiled code
Executing c

Collecting estimand from model
Collecting estimand from model
Executing compiled code
Executing compiled code
Done executing compiled code
Done executing compiled code
Executing compiled code
Done executing compiled code
Fitting model to data
1000 1000
Fitting model to data
Importing compiled module.
Sample: 0
Sample: 2
Sample: 1
Sample: 3
Executing compiled code
Done executing compiled code
Fitting model to data
Generating data
Collecting estimand from model
Collecting estimand from model
Generating data
Collecting estimand from model
Sample: 4
Generating data

Generating data
Importing compiled module.
Executing compiled code
Importing compiled module.
Done executing compiled code
1000 1000
Executing compiled code
Done executing compiled code
1000 1000
Importing compiled module.
Sample: 5
Sample: 6
Importing compiled module.
Executing compiled code
Done executing compiled code
Fitting model to data
Importing compiled module.
Executing compiled code
Importing compiled module.
Done exe

Executing compiled code
Done executing compiled code
Fitting model to data
Generating data
Collecting estimand from model
Collecting estimand from model
Generating data
Collecting estimand from model
Generating data
Importing compiled module.
Executing compiled code
Done executing compiled code
Sample: 8
Sample: 7
Sample: 9
1000 1000
Importing compiled module.
Importing compiled module.
Importing compiled module.
Executing compiled code
Executing compiled code
Done executing compiled code
Done executing compiled code
Generating data
1000 1000
Fitting model to data
Importing compiled module.
Executing compiled code
Importing compiled module.
Executing compiled code
Done executing compiled code
Done executing compiled code
Generating data
1000 1000
1000 1000
Executing compiled code
Importing compiled module.
Generating data
Importing compiled module.
Done executing compiled code
Executing compiled code
Done executing compiled code
Collecting estimand from model
Fitting model to data
Fitt

Importing compiled module.
Importing compiled module.
Executing compiled code
Executing compiled code
Done executing compiled code
Done executing compiled code
1000 1000
Executing compiled code
Executing compiled code
Done executing compiled code
1000 1000
Importing compiled module.
Fitting model to data
Generating data
Done executing compiled code
1000 1000
Importing compiled module.
Importing compiled module.
Executing compiled code
Executing compiled code
Done executing compiled code
Done executing compiled code
Fitting model to data
Sample: 12
Fitting model to data
Executing compiled code
Done executing compiled code
Collecting estimand from model
Generating data
Importing compiled module.
Fitting model to data
Collecting estimand from model
Executing compiled code
Importing compiled module.
Done executing compiled code
Done executing compiled code
1000 1000
Sample: 13
Sample: 14
Executing compiled code
1000 1000
Collecting estimand from model
Importing compiled module.
Importing c

## Data Metric Comparison

In [None]:
if SAMPLED_IN_ANALYSIS_MODE and CONCRETE_IN_ANALYSIS_MODE:
    for axes, metrics in DATA_METRICS_SPEC.items():
        print(axes)
        for metric in metrics:
            print("\t", metric)

            key = f"{axes} {metric}"
            print("\t\tConcrete:", np.round(concrete_data_agg[key], 3))
            print("\t\tSampled:", np.round(sampled_data_agg[key], 3))
    
    for name, vals in sampled_data_raw.items():
        if name.startswith("TREATMENT_NONLINEARITY"):
            print(name, vals)

## Performance Comparison

Notes

At config A, 20-50 matches across runs. Wide distro. Should investigate the stability of the concrete distro when n=1000.

At config G, 20-50 has much lower bias in the sampled benchmark. The concrete function appears to be a worst case scenario given that very similar functions experience much lower typical bias. The distribution is right tailed and so it is possible that the concrete func chosen is just an (un)lucky sample from right tail.

I will test this by permuting the variable order in the exact same functional form.

### Concrete Performance

In [None]:
concrete_perf_agg

In [None]:
plt.hist(concrete_perf_raw["AMBP"], density=True)
plt.show()

### Sampled Performance

In [None]:
sampled_perf_agg

In [None]:
plt.hist(sampled_perf_raw["AMBP"], density=True)
plt.show()

In [None]:
flat_list = [item for sublist in sampled_perf_run_raw["AMBP"] for item in sublist]
plt.hist(flat_list)
plt.show()

In [None]:
sampled_perf_run_raw["AMBP"]

### Exploratory Analysis

In [None]:
# data = np.array(sampled_perf_raw["absolute mean bias %"])
# clean_data = data[data < 20]
# plt.hist(clean_data)
# plt.show()
# np.mean(clean_data)

In [None]:
# plt.scatter(
#     np.array(sampled_data_raw["TREATMENT_NONLINEARITY Lin r2(X_obs, Treat Logit)"])[data < 8],
#     np.array(sampled_perf_raw["absolute mean bias %"])[data < 8])

In [None]:
# plt.scatter(
#     np.array(sampled_data_raw["TREATMENT_NONLINEARITY Lin r2(X_obs, Treat Logit)"])[data < 8],
#     np.array(sampled_perf_raw["absolute mean bias %"])[data < 8],
#     c="b")

# plt.scatter(
#     np.array(concrete_data_raw["TREATMENT_NONLINEARITY Lin r2(X_obs, Treat Logit)"]),
#     np.array(concrete_perf_raw["absolute mean bias %"]),
#     c="r")