In [1]:
!pip install -e ../CauseML

Obtaining file:///home/jovyan/work/CauseML
Installing collected packages: cause-ml
  Found existing installation: cause-ml 0.0.11
    Uninstalling cause-ml-0.0.11:
      Successfully uninstalled cause-ml-0.0.11
  Running setup.py develop for cause-ml
Successfully installed cause-ml


In [1]:
%load_ext autoreload
%autoreload 2

In [8]:
from cause_ml.parameters import build_parameters_from_axis_levels
from cause_ml.constants import Constants
from cause_ml.data_generation import DataGeneratingProcessSampler, DataGeneratingProcess
import cause_ml.data_sources as data_sources
from cause_ml.modeling.models import LinearRegressionCausalModel
from cause_ml.benchmarking import run_sampled_dgp_benchmark, run_concrete_dgp_benchmark

In [3]:
import pandas as pd

## Model Demo

In [9]:
covar_data_source = data_sources.load_random_normal_covariates(n_covars = 10, n_observations=2000)
dgp_params = build_parameters_from_axis_levels({
    Constants.AxisNames.OUTCOME_NONLINEARITY: Constants.AxisLevels.LOW,
    Constants.AxisNames.TREATMENT_NONLINEARITY: Constants.AxisLevels.LOW,
})

dgp_sampler = DataGeneratingProcessSampler(
    parameters=dgp_params, data_source=covar_data_source)

dgp = dgp_sampler.sample_dgp()
dataset = dgp.generate_dataset()

AttributeError: 'NoneType' object has no attribute '_generated_data'

In [6]:
dataset.observed_data

Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,T,Y
1436,-0.133430,0.312300,0.064449,-0.428943,-0.049579,0.154230,-0.020638,0.232540,0.028152,0.288982,0,-0.661855
512,-0.265310,0.107664,-0.216597,-0.274784,0.537381,0.207100,-0.179762,0.375493,-0.280309,-0.076720,0,1.923871
1586,0.096998,0.406151,-0.064930,0.293781,0.161240,-0.325511,-0.438070,0.335768,-0.335649,0.000397,1,1.259728
765,0.024792,0.309959,0.066594,-0.021045,0.247439,-0.101765,-0.227208,0.111930,0.096067,0.179526,0,-0.557728
849,0.002324,0.113300,0.142672,0.148820,0.313056,0.333502,-0.182936,0.557433,-0.420675,0.199617,1,1.415550
...,...,...,...,...,...,...,...,...,...,...,...,...
405,0.329482,-0.254272,-0.108463,0.149866,-0.501884,-0.207926,0.108103,-0.340175,0.068467,-0.180080,1,0.773322
638,0.076888,-0.035820,0.511463,-0.355263,0.560716,0.487174,-0.376144,0.259740,-0.379125,0.193632,0,-0.958696
954,-0.012756,-0.310287,0.277102,0.441400,0.233678,0.230396,-0.125061,-0.317551,-0.130323,0.466648,1,0.018679
1359,0.579912,-0.579258,0.915461,0.218281,0.225897,0.229701,-0.609679,-0.338018,-0.435324,0.499625,0,-2.335373


In [81]:
dataset.ATE

-1.9600000000000004

In [82]:
model = LinearRegressionCausalModel(dataset)
model.fit()
model.estimate(estimand=Constants.Model.ATE_ESTIMAND)

-1.6436335485712636

## Benchmarking Demo

In [85]:
%%time
# Sample Benchmark


HIGH, MEDIUM, LOW = Constants.AxisLevels.HIGH, Constants.AxisLevels.MEDIUM, Constants.AxisLevels.LOW
param_grid = dgp_params = {
#     Constants.AxisNames.TREATMENT_NONLINEARITY: [MEDIUM, LOW],
#     Constants.AxisNames.OUTCOME_NONLINEARITY: [LOW]
    Constants.AxisNames.TREATMENT_NONLINEARITY: [HIGH, MEDIUM, LOW],
    Constants.AxisNames.OUTCOME_NONLINEARITY: [HIGH, MEDIUM, LOW]
}

covar_data_source = data_sources.load_random_normal_covariates(
    n_covars=7,
    n_observations=250)

result = run_sampled_dgp_benchmark(
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    data_source=covar_data_source,
    param_grid=param_grid,
    num_dgp_samples=5,
    num_data_samples_per_dgp=25,
    enable_ray_multiprocessing=True)

CPU times: user 8.96 s, sys: 6.48 s, total: 15.4 s
Wall time: 2min 50s


In [86]:
pd.DataFrame(result)

Unnamed: 0,param_outcome_nonlinearity,param_treatment_nonlinearity,absolute mean bias,root mean squared error
0,HIGH,HIGH,0.337211,0.361943
1,HIGH,MEDIUM,0.327949,0.396993
2,HIGH,LOW,0.135947,0.232954
3,MEDIUM,HIGH,0.186938,0.231396
4,MEDIUM,MEDIUM,0.177274,0.224901
5,MEDIUM,LOW,0.064819,0.125216
6,LOW,HIGH,0.227031,0.284352
7,LOW,MEDIUM,0.070123,0.126957
8,LOW,LOW,0.001396,0.052522


In [None]:
# Concrete Benchmark

covar_data_source = data_sources.load_random_normal_covariates(n_covars = 10, n_observations=1000)
dgp_params = build_parameters_from_axis_levels({
    Constants.AxisNames.OUTCOME_NONLINEARITY: Constants.AxisLevels.LOW,
    Constants.AxisNames.TREATMENT_NONLINEARITY: Constants.AxisLevels.LOW,
})

dgp_sampler = DataGeneratingProcessSampler(
    parameters=dgp_params, data_source=covar_data_source)

dgp = dgp_sampler.sample_dgp()
dataset = dgp.generate_dataset()

result = run_concrete_dgp_benchmark(
    dgp=dgp,
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    num_samples_from_dgp=1,
    enable_ray_multiprocessing=True)

In [None]:
result