In [1]:
# Hidden Config Cell

#!python -m pip install -e ../../../../Maccabee > /dev/null

In [29]:
from maccabee.constants import Constants
from maccabee.parameters import build_default_parameters

# Build the parameters
dgp_params = build_default_parameters()

dgp_params

<maccabee.parameters.parameter_store.ParameterStore at 0x7f7b247bc650>

In [6]:
dgp_params.set_parameters({
    "ACTUAL_CONFOUNDER_ALIGNMENT": 0.25,
    "POTENTIAL_CONFOUNDER_SELECTION_PROBABILITY": 0.7
})

dgp_params.ACTUAL_CONFOUNDER_ALIGNMENT

0.25

In [9]:
from maccabee.constants import Constants
from maccabee.parameters import build_parameters_from_axis_levels

# Build the parameters
dgp_params = build_parameters_from_axis_levels({
  Constants.AxisNames.OUTCOME_NONLINEARITY: Constants.AxisLevels.LOW,
  Constants.AxisNames.TREATMENT_NONLINEARITY: Constants.AxisLevels.LOW,
})

dgp_params

<maccabee.parameters.parameter_store.ParameterStore at 0x7f7b054172d0>

In [17]:
from maccabee.constants import Constants
from maccabee.data_sources.data_source_builders import build_random_normal_datasource
from maccabee.benchmarking import benchmark_model_using_sampled_dgp
from maccabee.modeling.models import LinearRegressionCausalModel
import pandas as pd

normal_data_source = build_random_normal_datasource(
    n_covars=5,
    n_observations=1000)
    
results = benchmark_model_using_sampled_dgp(
    dgp_sampling_params=dgp_params,
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    data_source=normal_data_source,
    num_dgp_samples=2,
    num_sampling_runs_per_dgp=5,
    num_samples_from_dgp=10)

results[0]

{'RMSE': 0.022,
 'RMSE (std)': 0.003,
 'AMBP': 0.61,
 'AMBP (std)': 0.4,
 'MABP': 1.832,
 'MABP (std)': 0.321}

In [24]:
from maccabee.data_sources import build_random_normal_datasource
from maccabee.data_generation import DataGeneratingProcessSampler

# Build the data source
covar_data_source = build_random_normal_datasource(
  n_covars = 5, n_observations=1000)

# Build a DGP Sampler, supplying params and data.
dgp_sampler = DataGeneratingProcessSampler(
  parameters=dgp_params,
  data_source=covar_data_source)

# Sample a DGP.
dgp = dgp_sampler.sample_dgp()

dgp

<maccabee.data_generation.data_generating_process.SampledDataGeneratingProcess at 0x7f7b248db4d0>

In [25]:
# Generate a data set.
dataset = dgp.generate_dataset()
dataset.observed_data.head()

Unnamed: 0,X0,X1,X2,X3,X4,T,Y
0,0.042946,-0.244024,0.352224,-0.385605,-0.297958,0,0.065378
1,-0.035489,-0.130587,0.168029,-0.508966,0.172846,1,-1.160852
2,-0.100376,-0.000438,-0.134698,0.158655,0.133745,1,-1.293867
3,-0.198569,-0.354449,0.093455,-0.113408,-0.307699,1,-0.491382
4,0.250105,-0.005793,-0.202563,-0.027322,0.524938,0,-0.318331


In [26]:
from maccabee.modeling.models import LinearRegressionCausalModel

# Fit the model
model = LinearRegressionCausalModel(dataset)
model.fit()

In [27]:
# Ground Truth
dataset.ATE

-0.6129999999999998

In [28]:
# Estimate
model.estimate(estimand=Constants.Model.ATE_ESTIMAND)

-0.6095040438593615