In [19]:
# Hidden Config Cell

from maccabee.constants import Constants
Constants.DGPSampling.NORMALIZE_SAMPLED_OUTCOME_FUNCTION = True
Constants.DGPSampling.CENTER_SAMPLED_OUTCOME_FUNCTION = True
Constants.DGPSampling.NORMALIZE_SAMPLED_TREATMENT_FUNCTION = True
Constants.DGPSampling.ADJUST_ALIGNMENT = True

from maccabee.parameters import ParameterStore
import numpy as np

def coeff_sampler(self, size=1):
    vals = np.random.uniform(low=1, high=10, size=size)
    neg_locs = (np.random.random(size=size) < 0.5)
    neg_mask = np.full(size, 1)
    neg_mask[neg_locs] = -1
    return vals*neg_mask

def te_sampler(self, size=1):
    return np.random.normal(loc=0, scale=3, size=size)
    
def noise_sampler(self, size=1):
    return np.random.normal(scale=0.25, size=size)
    
ParameterStore.sample_subfunction_constants = coeff_sampler
ParameterStore.sample_treatment_effect = te_sampler
ParameterStore.sample_outcome_noise = noise_sampler

from maccabee.examples.genmatch import LogisticPropensityMatchingCausalModel

#!python -m pip install -e ../../../../Maccabee > /dev/null

In [22]:
from maccabee.constants import Constants
from maccabee.data_sources.data_source_builders import build_random_normal_datasource
from maccabee.benchmarking import benchmark_model_using_sampled_dgp_grid
from maccabee.modeling.models import LinearRegressionCausalModel

import pandas as pd

LOW, MEDIUM, HIGH = Constants.AxisLevels.LEVELS

param_grid = {
Constants.AxisNames.TREATMENT_NONLINEARITY: [LOW],
Constants.AxisNames.OUTCOME_NONLINEARITY: [HIGH, MEDIUM, LOW]
}

normal_data_source = build_random_normal_datasource(
    n_covars=5,
    n_observations=1000)
    
results = benchmark_model_using_sampled_dgp_grid(
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    data_source=normal_data_source,
    dgp_param_grid=param_grid,
    num_dgp_samples=10,
    num_sampling_runs_per_dgp=5,
    num_samples_from_dgp=96)

In [45]:
pd.DataFrame(results)

Unnamed: 0,param_outcome_nonlinearity,param_treatment_nonlinearity,RMSE,RMSE (std),AMBP,AMBP (std),MABP,MABP (std)
0,HIGH,LOW,0.081,0.03,17.892,25.949,21.771,25.519
1,MEDIUM,LOW,0.043,0.02,2.644,3.316,3.654,3.986
2,LOW,LOW,0.017,0.001,0.1,0.067,0.995,0.445


In [5]:
from sklearn.linear_model import LinearRegression
from maccabee.modeling.models import CausalModel


class LinearRegressionCausalModel(CausalModel):
    def __init__(self, dataset):
        self.dataset = dataset
        self.model = LinearRegression()
        self.data = dataset.observed_data.drop("Y", axis=1)

    def fit(self):
        """Fit the linear regression model.
        """
        self.model.fit(self.data, self.dataset.Y)

    def estimate_ATE(self):
        """
        Return the co-efficient on the treatment status variable as the
        ATE.
        """
        # The coefficient on the treatment status
        return self.model.coef_[-1]


In [26]:
from maccabee.data_sources import build_lalonde_datasource
data_source = build_lalonde_datasource()

In [27]:
from maccabee.constants import Constants
Constants.AxisNames.all()

{'OUTCOME_NONLINEARITY': 'OUTCOME_NONLINEARITY',
 'TREATMENT_NONLINEARITY': 'TREATMENT_NONLINEARITY',
 'PERCENT_TREATED': 'PERCENT_TREATED',
 'OVERLAP': 'OVERLAP',
 'BALANCE': 'BALANCE',
 'ALIGNMENT': 'ALIGNMENT',
 'TE_HETEROGENEITY': 'TE_HETEROGENEITY'}

In [37]:
LOW, MEDIUM, HIGH = Constants.AxisLevels.LEVELS

complete_param_grid = {
    Constants.AxisNames.OUTCOME_NONLINEARITY: [HIGH, MEDIUM, LOW],
    Constants.AxisNames.TE_HETEROGENEITY: [HIGH, MEDIUM, LOW],
    Constants.AxisNames.TREATMENT_NONLINEARITY: [HIGH, MEDIUM, LOW],
    Constants.AxisNames.PERCENT_TREATED: [HIGH, MEDIUM, LOW],
    Constants.AxisNames.OVERLAP: [HIGH, MEDIUM, LOW],
    Constants.AxisNames.BALANCE: [HIGH, MEDIUM, LOW],
    Constants.AxisNames.ALIGNMENT: [HIGH, MEDIUM, LOW]
}

In [40]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,param_outcome_nonlinearity,param_treatment_nonlinearity,RMSE,RMSE (std),AMBP,AMBP (std),MABP,MABP (std)
0,HIGH,LOW,0.081,0.03,17.892,25.949,21.771,25.519
1,MEDIUM,LOW,0.043,0.02,2.644,3.316,3.654,3.986
2,LOW,LOW,0.017,0.001,0.1,0.067,0.995,0.445


In [44]:
results_df[
    results_df["param_outcome_nonlinearity"]!=HIGH
].drop(["MABP", "MABP (std)"], axis=1)

Unnamed: 0,param_outcome_nonlinearity,param_treatment_nonlinearity,RMSE,RMSE (std),AMBP,AMBP (std)
1,MEDIUM,LOW,0.043,0.02,2.644,3.316
2,LOW,LOW,0.017,0.001,0.1,0.067
