In [1]:
!pip install -e ../CauseML

Obtaining file:///home/jovyan/work/CauseML
Installing collected packages: cause-ml
  Found existing installation: cause-ml 0.0.11
    Uninstalling cause-ml-0.0.11:
      Successfully uninstalled cause-ml-0.0.11
  Running setup.py develop for cause-ml
Successfully installed cause-ml


In [1]:
from cause_ml.data_generation import DataGeneratingProcess

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from cause_ml.parameters import build_parameters_from_axis_levels
from cause_ml.constants import Constants
import cause_ml.data_sources as data_sources
from cause_ml.data_generation import DataGeneratingProcessSampler
from cause_ml.modeling.models import LinearRegressionCausalModel
from cause_ml.benchmarking import run_sampled_dgp_benchmark, run_concrete_dgp_benchmark



In [4]:
import pandas as pd

## Model Demo

In [7]:
covar_data_source = data_sources.load_random_normal_covariates(
    n_covars = 10, n_observations=2000)
dgp_params = build_parameters_from_axis_levels({
    Constants.AxisNames.OUTCOME_NONLINEARITY: Constants.AxisLevels.MEDIUM,
    Constants.AxisNames.TREATMENT_NONLINEARITY: Constants.AxisLevels.MEDIUM,
})

dgp_sampler = DataGeneratingProcessSampler(
    parameters=dgp_params,
    data_source=covar_data_source,
    dgp_kwargs={"analysis_mode": False})

dgp = dgp_sampler.sample_dgp()
dataset = dgp.generate_dataset()

In [20]:
dataset.observed_data

Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,T,Y
1568,-0.196141,0.205069,-0.624708,0.227752,0.109602,-0.013306,0.413004,0.608288,0.246142,-0.299590,1,2.264143
614,-0.603249,0.235596,-0.613809,0.382893,0.029270,0.447050,0.327484,0.337328,-0.256597,-0.294624,0,0.962745
1622,-0.268824,-0.230482,-0.090645,0.182829,0.032529,0.055267,0.324694,-0.001857,-0.415156,-0.151814,1,1.423383
1054,-0.259423,0.039329,-0.103771,-0.043999,-0.153523,-0.192860,-0.042508,0.293374,0.299389,-0.246721,0,1.136054
1051,-0.159908,0.456943,0.161281,0.226828,0.248419,0.458495,0.094569,-0.543445,0.088730,0.177025,0,-1.179464
...,...,...,...,...,...,...,...,...,...,...,...,...
1305,-0.239222,0.260964,0.032593,-0.015654,0.180464,0.151719,-0.018002,-0.157380,0.069499,-0.027314,1,0.569022
452,-0.499559,0.258799,-0.357254,0.390726,-0.089519,0.215259,0.288217,-0.018392,-0.211642,-0.221028,1,1.047652
840,0.179884,0.323715,0.297043,0.414579,-0.668454,-0.428524,-0.130695,0.296353,-0.017930,-0.442738,0,-1.362328
119,0.073725,-0.168594,0.689835,-0.114071,0.007010,-0.146272,0.077329,-0.293848,0.206724,0.246238,0,-0.588730


In [7]:
dataset.ATE

0.7279999999999999

In [8]:
model = LinearRegressionCausalModel(dataset)
model.fit()
model.estimate(estimand=Constants.Model.ATE_ESTIMAND)

0.7492414562127729

## Benchmarking Demo

In [22]:
%%time
# Sample Benchmark


HIGH, MEDIUM, LOW = Constants.AxisLevels.HIGH, Constants.AxisLevels.MEDIUM, Constants.AxisLevels.LOW
param_grid = dgp_params = {
    Constants.AxisNames.TREATMENT_NONLINEARITY: [MEDIUM, LOW],
    Constants.AxisNames.OUTCOME_NONLINEARITY: [LOW]
#     Constants.AxisNames.TREATMENT_NONLINEARITY: [HIGH, MEDIUM, LOW],
#     Constants.AxisNames.OUTCOME_NONLINEARITY: [HIGH, MEDIUM, LOW]
}

covar_data_source = data_sources.load_random_normal_covariates(
    n_covars=10,
    n_observations=750)

result = run_sampled_dgp_benchmark(
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    data_source=covar_data_source,
    param_grid=param_grid,
    num_dgp_samples=1,
    num_data_samples_per_dgp=50,
    dgp_kwargs={"analysis_mode": False},
    enable_ray_multiprocessing=True)

CPU times: user 1.34 s, sys: 850 ms, total: 2.19 s
Wall time: 27.4 s


In [18]:
pd.DataFrame(result)

Unnamed: 0,param_outcome_nonlinearity,param_treatment_nonlinearity,absolute mean bias,root mean squared error
0,LOW,MEDIUM,0.075148,0.098322
1,LOW,LOW,0.005661,0.025789


In [None]:
# Concrete Benchmark

covar_data_source = data_sources.load_random_normal_covariates(n_covars = 10, n_observations=1000)
dgp_params = build_parameters_from_axis_levels({
    Constants.AxisNames.OUTCOME_NONLINEARITY: Constants.AxisLevels.LOW,
    Constants.AxisNames.TREATMENT_NONLINEARITY: Constants.AxisLevels.LOW,
})

dgp_sampler = DataGeneratingProcessSampler(
    parameters=dgp_params, data_source=covar_data_source)

dgp = dgp_sampler.sample_dgp()
dataset = dgp.generate_dataset()

result = run_concrete_dgp_benchmark(
    dgp=dgp,
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    num_samples_from_dgp=1,
    enable_ray_multiprocessing=True)

In [None]:
result