In [1]:
# Hidden Config Cell

# from maccabee.constants import Constants
# Constants.DGPSampling.NORMALIZE_SAMPLED_OUTCOME_FUNCTION = True
# Constants.DGPSampling.CENTER_SAMPLED_OUTCOME_FUNCTION = True
# Constants.DGPSampling.NORMALIZE_SAMPLED_TREATMENT_FUNCTION = True
# Constants.DGPSampling.ADJUST_ALIGNMENT = True

from maccabee.parameters import ParameterStore
import numpy as np

def coeff_sampler(self, size=1):
    vals = np.random.uniform(low=1, high=10, size=size)
    neg_locs = (np.random.random(size=size) < 0.5)
    neg_mask = np.full(size, 1)
    neg_mask[neg_locs] = -1
    return vals*neg_mask

def te_sampler(self, size=1):
    return np.random.normal(loc=0, scale=3, size=size)
    
def noise_sampler(self, size=1):
    return np.random.normal(scale=0.25, size=size)
    
ParameterStore.sample_subfunction_constants = coeff_sampler
ParameterStore.sample_treatment_effect = te_sampler
ParameterStore.sample_outcome_noise = noise_sampler

#!python -m pip install -e ../../../../Maccabee > /dev/null

In [19]:
from maccabee.modeling.models import CausalModelR
from sklearn.linear_model import LogisticRegression

class LogisticPropensityMatchingCausalModel(CausalModelR):
    def fit(self):
        
        # Import the Matching R package
        matching = self._import_r_package("Matching")
        
        # Fit the logistic propensity model.
        logistic_model = LogisticRegression(solver='lbfgs', n_jobs=1)
        logistic_model.fit(
            self.dataset.X.to_numpy(), self.dataset.T.to_numpy())
        class_proba = logistic_model.predict_proba(
            self.dataset.X.to_numpy())
        propensity_scores = class_proba[:, logistic_model.classes_ == 1].flatten()

        # Run matching on prop scores using the R match package.
        self.match_out = matching.Match(
            Y=self.dataset.Y.to_numpy(),
            Tr=self.dataset.T.to_numpy(),
            X=propensity_scores,
            estimand="ATT",
            replace=True,
            version="fast")

    def estimate_ATT(self):
        
        # Return the ATT by extracting it from the match out result.
        return np.array(self.match_out.rx2("est").rx(1,1))[0]

In [14]:
from maccabee.constants import Constants
from maccabee.data_sources.data_source_builders import build_random_normal_datasource
from maccabee.benchmarking import benchmark_model_using_sampled_dgp_grid
import pandas as pd

LOW, MEDIUM, HIGH = Constants.AxisLevels.LEVELS

param_grid = {
    Constants.AxisNames.TREATMENT_NONLINEARITY: [HIGH, LOW],
    Constants.AxisNames.OUTCOME_NONLINEARITY: [HIGH, LOW]
}

normal_data_source = build_random_normal_datasource(
    n_covars=5,
    n_observations=1000)
    
results = benchmark_model_using_sampled_dgp_grid(
    model_class=LogisticPropensityMatchingCausalModel,
    estimand=Constants.Model.ATT_ESTIMAND,
    data_source=normal_data_source,
    dgp_param_grid=param_grid,
    num_dgp_samples=5,
    num_sampling_runs_per_dgp=1,
    num_samples_from_dgp=32)

In [15]:
pd.DataFrame(results)

Unnamed: 0,param_outcome_nonlinearity,param_treatment_nonlinearity,RMSE,RMSE (std),AMBP,AMBP (std),MABP,MABP (std)
0,HIGH,HIGH,0.051,0.01,6.008,6.61,8.834,6.048
1,HIGH,LOW,0.05,0.011,2.604,1.752,9.059,4.469
2,LOW,HIGH,0.039,0.011,4.107,4.169,11.299,8.213
3,LOW,LOW,0.042,0.015,1.085,0.571,10.111,11.049


In [20]:
r_prog = """# Custom R program
library("utils")
capture.output(library("Matching"))

p_score_match <- function(Y, Tr, X){
    out <- Match(
        Y=Y,
        Tr=Tr,
        X=X,
        estimand="ATT",
        replace=TRUE,
        version="fast")
        
    return(out[["est"]][1][1])
}
"""

with open("r_prog.R", "w") as file:
    file.write(r_prog)

In [18]:
from maccabee.modeling.models import CausalModelR
from sklearn.linear_model import LogisticRegression

class LogisticPropensityMatchingCausalModel(CausalModelR):
    def fit(self):
        
        # Import the custom R file
        matching = self._import_r_file_as_package("r_prog.R", "MatchingCode")
        
        # Fit the logistic propensity model.
        logistic_model = LogisticRegression(solver='lbfgs', n_jobs=1)
        logistic_model.fit(
            self.dataset.X.to_numpy(), self.dataset.T.to_numpy())
        class_proba = logistic_model.predict_proba(
            self.dataset.X.to_numpy())
        propensity_scores = class_proba[:, logistic_model.classes_ == 1].flatten()

        # Run matching on prop scores using the R match package.
        self.att = matching.p_score_match(
            Y=self.dataset.Y.to_numpy(),
            Tr=self.dataset.T.to_numpy(),
            X=propensity_scores)

    def estimate_ATT(self):
        # Return the ATT by extracting it from the match out result.
        return np.array(self.att)