In [14]:
!pip install -e ../CauseML

Obtaining file:///home/jovyan/work/CauseML
Installing collected packages: cause-ml
  Found existing installation: cause-ml 0.0.11
    Uninstalling cause-ml-0.0.11:
      Successfully uninstalled cause-ml-0.0.11
  Running setup.py develop for cause-ml
Successfully installed cause-ml


In [1]:
from cause_ml.parameters import build_parameters_from_axis_levels
from cause_ml.constants import Constants
from cause_ml.data_generation import DataGeneratingProcessSampler, DataGeneratingProcess
import cause_ml.data_sources as data_sources
from cause_ml.modeling.models import LinearRegressionCausalModel
from cause_ml.benchmarking import run_sampled_dgp_benchmark, run_concrete_dgp_benchmark



In [2]:
import pandas as pd

## Model Demo

In [10]:
covar_data_source = data_sources.load_random_normal_covariates(n_covars = 10, n_observations=1000)
dgp_params = build_parameters_from_axis_levels({
    Constants.AxisNames.OUTCOME_NONLINEARITY: Constants.AxisLevels.LOW,
    Constants.AxisNames.TREATMENT_NONLINEARITY: Constants.AxisLevels.LOW,
})

dgp_sampler = DataGeneratingProcessSampler(
    parameters=dgp_params, data_source=covar_data_source)

dgp = dgp_sampler.sample_dgp()
dataset = dgp.generate_dataset()

In [11]:
dataset.observed_data

Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,T,Y
0,-0.032705,0.379023,-0.052096,-0.174033,-0.130430,0.021379,-0.277230,-0.202362,0.204592,0.022504,1.0,0.721517
1,0.518542,-0.614966,0.237754,-0.245709,0.009041,0.223363,-0.506292,0.205042,-0.129543,-0.142800,1.0,0.650597
2,-0.119060,-0.175836,-0.037312,0.360897,-0.150054,0.101863,-0.309814,-0.187791,-0.246574,-0.090366,1.0,0.827244
3,0.223553,0.014960,0.549030,-0.007186,0.176155,-0.507930,-0.092807,0.273350,-0.340771,0.281374,0.0,1.981864
4,-0.395687,0.671763,0.091527,0.221473,-0.533656,-0.376643,0.235341,-0.297709,-0.200014,0.105650,1.0,1.745069
...,...,...,...,...,...,...,...,...,...,...,...,...
995,-0.268724,0.942336,0.155024,-0.069956,-0.468402,-0.773120,0.145647,-0.053227,-0.180862,0.076523,0.0,1.285377
996,-0.421429,-0.004975,-0.475427,0.563067,-0.264242,0.079006,-0.026643,-0.012472,-0.064174,-0.290809,0.0,-1.651300
997,-0.317455,-0.194330,-0.077840,0.041476,-0.079446,0.651287,-0.587402,-0.295974,-0.195516,0.161256,0.0,-0.702514
998,-0.090477,0.816784,0.177120,0.075610,-0.334477,-0.149501,0.182300,0.113051,-0.074797,0.090159,1.0,2.629175


In [16]:
dataset.Y.reshape(-1, 1).shape

(1000, 1)

In [12]:
dataset.ATE

1.293

In [13]:
model = LinearRegressionCausalModel(dataset)
model.fit()
model.estimate(estimand=Constants.Model.ATE_ESTIMAND)
model.model.coef_

array([[ 0.00648296,  0.83314399,  3.0997415 ,  0.02700677,  0.20417691,
        -0.06301565,  0.82440101, -0.26077052, -0.62801124,  0.11468437,
         1.29180402]])

## Benchmarking Demo

In [None]:
%%time
# Sample Benchmark


HIGH, MEDIUM, LOW = Constants.AxisLevels.HIGH, Constants.AxisLevels.MEDIUM, Constants.AxisLevels.LOW
param_grid = dgp_params = {
    Constants.AxisNames.TREATMENT_NONLINEARITY: [MEDIUM, LOW],
    Constants.AxisNames.OUTCOME_NONLINEARITY: [LOW]
#     Constants.AxisNames.TREATMENT_NONLINEARITY: [HIGH, MEDIUM, LOW],
#     Constants.AxisNames.OUTCOME_NONLINEARITY: [HIGH, MEDIUM, LOW]
}

covar_data_source = data_sources.load_random_normal_covariates(
    n_covars=10,
    n_observations=500)

result = run_sampled_dgp_benchmark(
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    data_source=covar_data_source,
    param_grid=param_grid,
    num_dgp_samples=1,
    num_data_samples_per_dgp=1,
    enable_ray_multiprocessing=True)

In [None]:
pd.DataFrame(result)

In [None]:
# Concrete Benchmark

covar_data_source = data_sources.load_random_normal_covariates(n_covars = 10, n_observations=1000)
dgp_params = build_parameters_from_axis_levels({
    Constants.AxisNames.OUTCOME_NONLINEARITY: Constants.AxisLevels.LOW,
    Constants.AxisNames.TREATMENT_NONLINEARITY: Constants.AxisLevels.LOW,
})

dgp_sampler = DataGeneratingProcessSampler(
    parameters=dgp_params, data_source=covar_data_source)

dgp = dgp_sampler.sample_dgp()
dataset = dgp.generate_dataset()

result = run_concrete_dgp_benchmark(
    dgp=dgp,
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    num_samples_from_dgp=1,
    enable_ray_multiprocessing=True)

In [None]:
result

## Sandbox DGP

In [None]:
DataGeneratingProcess()

In [None]:
class ConcreteDGP(DataGeneratingProcess):
    TEST = DataGeneratingProcess.TEST + [4, 2, 3]
    @dgp_method(Constants.COVARIATES_NAME, [])
    def _generate_observed_covars(self, input_vars):
        return 3

In [None]:
dgp = ConcreteDGP()
dgp._generate_observed_covars()