In [7]:
!pip install -e ../CauseML

Obtaining file:///home/jovyan/work/CauseML
Installing collected packages: cause-ml
  Found existing installation: cause-ml 0.0.11
    Uninstalling cause-ml-0.0.11:
      Successfully uninstalled cause-ml-0.0.11
  Running setup.py develop for cause-ml
Successfully installed cause-ml


In [1]:
%load_ext autoreload
%autoreload 2

In [79]:
from cause_ml.parameters import build_parameters_from_axis_levels
from cause_ml.constants import Constants
from cause_ml.data_generation import DataGeneratingProcessSampler, DataGeneratingProcess
import cause_ml.data_sources as data_sources
from cause_ml.modeling.models import LinearRegressionCausalModel
from cause_ml.benchmarking import run_sampled_dgp_benchmark, run_concrete_dgp_benchmark

[autoreload of cause_ml.data_generation.data_generating_process failed: Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/opt/conda/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/opt/conda/lib/python3.7/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/opt/conda/lib/python3.7/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/home/jovyan/work/CauseML/cause_ml/data_generation/data_generating_process.py", line 138, in <module>
    class SampledDataGeneratingProcess(DataGeneratingProcess):
  File "/home/jov

In [4]:
import pandas as pd

## Model Demo

In [7]:
covar_data_source = data_sources.load_random_normal_covariates(n_covars = 10, n_observations=1000)
dgp_params = build_parameters_from_axis_levels({
    Constants.AxisNames.OUTCOME_NONLINEARITY: Constants.AxisLevels.LOW,
    Constants.AxisNames.TREATMENT_NONLINEARITY: Constants.AxisLevels.LOW,
})

dgp_sampler = DataGeneratingProcessSampler(
    parameters=dgp_params, data_source=covar_data_source)

dgp = dgp_sampler.sample_dgp()
dataset = dgp.generate_dataset()

In [8]:
dataset.ATE

-1.2200000000000002

In [9]:
model = LinearRegressionCausalModel(dataset)
model.fit()
model.estimate(estimand=Constants.Model.ATE_ESTIMAND)

-1.245133038486609

## Benchmarking Demo

In [11]:
%%time
# Sample Benchmark


HIGH, MEDIUM, LOW = Constants.AxisLevels.HIGH, Constants.AxisLevels.MEDIUM, Constants.AxisLevels.LOW
param_grid = dgp_params = {
    Constants.AxisNames.TREATMENT_NONLINEARITY: [MEDIUM, LOW],
    Constants.AxisNames.OUTCOME_NONLINEARITY: [LOW]
#     Constants.AxisNames.TREATMENT_NONLINEARITY: [HIGH, MEDIUM, LOW],
#     Constants.AxisNames.OUTCOME_NONLINEARITY: [HIGH, MEDIUM, LOW]
}

covar_data_source = data_sources.load_random_normal_covariates(
    n_covars=10,
    n_observations=500)

result = run_sampled_dgp_benchmark(
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    data_source=covar_data_source,
    param_grid=param_grid,
    num_dgp_samples=1,
    num_data_samples_per_dgp=1,
    enable_ray_multiprocessing=True)

2020-01-20 11:32:53,111	INFO resource_spec.py:216 -- Starting Ray with 3.96 GiB memory available for workers and up to 1.99 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).


CPU times: user 430 ms, sys: 300 ms, total: 730 ms
Wall time: 7.42 s


In [12]:
pd.DataFrame(result)

Unnamed: 0,param_outcome_nonlinearity,param_treatment_nonlinearity,absolute mean bias,root mean squared error
0,LOW,MEDIUM,0.079315,0.079315
1,LOW,LOW,0.056143,0.056143


In [13]:
# Concrete Benchmark

covar_data_source = data_sources.load_random_normal_covariates(n_covars = 10, n_observations=1000)
dgp_params = build_parameters_from_axis_levels({
    Constants.AxisNames.OUTCOME_NONLINEARITY: Constants.AxisLevels.LOW,
    Constants.AxisNames.TREATMENT_NONLINEARITY: Constants.AxisLevels.LOW,
})

dgp_sampler = DataGeneratingProcessSampler(
    parameters=dgp_params, data_source=covar_data_source)

dgp = dgp_sampler.sample_dgp()
dataset = dgp.generate_dataset()

result = run_concrete_dgp_benchmark(
    dgp=dgp,
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    num_samples_from_dgp=1,
    enable_ray_multiprocessing=True)

In [14]:
result

{'absolute mean bias': 0.014085615675211383,
 'root mean squared error': 0.014085615675211383}

## Sandbox DGP

In [74]:
DataGeneratingProcess()

<cause_ml.data_generation.data_generating_process.DataGeneratingProcess at 0x7f8fae8c93c8>

In [70]:
class ConcreteDGP(DataGeneratingProcess):
    TEST = DataGeneratingProcess.TEST + [4, 2, 3]
    @dgp_method(Constants.COVARIATES_NAME, [])
    def _generate_observed_covars(self, input_vars):
        return 3

<class '__main__.ConcreteDGP'> ConcreteDGP (<class '__main__.DataGeneratingProcess'>,) {'__module__': '__main__', '__qualname__': 'ConcreteDGP', 'TEST': [1, 2, 3, 4, 2, 3, 4, 2, 3, 4, 2, 3], '_generate_observed_covars': <__main__.DGPGeneratorMethodWrapper object at 0x7f8fae8d50f0>}


In [57]:
dgp = ConcreteDGP()
dgp._generate_observed_covars()

3