In [1]:
# Hidden Config Cell

#!python -m pip install -e ../../../../Maccabee > /dev/null

In [2]:
from maccabee.data_sources.data_source_builders import build_lalonde_datasource
lalonde_data_source = build_lalonde_datasource()

lalonde_data_source

<maccabee.data_sources.data_sources.StaticDataSource at 0x7f9119fdb790>

In [3]:
from maccabee.constants import Constants
from maccabee.benchmarking import benchmark_model_using_sampled_dgp_grid
from maccabee.modeling.models import LinearRegressionCausalModel
import pandas as pd

LOW, MEDIUM, HIGH = Constants.AxisLevels.LEVELS

param_grid = {
    Constants.AxisNames.TREATMENT_NONLINEARITY: [LOW],
    Constants.AxisNames.OUTCOME_NONLINEARITY: [HIGH, MEDIUM, LOW]
}
    
results = benchmark_model_using_sampled_dgp_grid(
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    data_source=lalonde_data_source,
    dgp_param_grid=param_grid,
    num_dgp_samples=10,
    num_sampling_runs_per_dgp=1,
    num_samples_from_dgp=10)

In [4]:
pd.DataFrame(results)

Unnamed: 0,param_outcome_nonlinearity,param_treatment_nonlinearity,RMSE,RMSE (std),AMBP,AMBP (std),MABP,MABP (std)
0,HIGH,LOW,0.096,0.077,20.528,30.686,21.523,30.321
1,MEDIUM,LOW,0.03,0.015,159.336,464.794,160.574,464.391
2,LOW,LOW,0.015,0.002,1.224,1.726,4.551,6.341


In [5]:
from maccabee.data_sources.data_source_builders import build_csv_datasource
import numpy as np

# Define and save data
data = np.array([
    [1.07, 0],
    [3.5, 1],
    [5.17, 0]
])
file_name = "data.csv"
np.savetxt(file_name, data, delimiter=',', header="Age, Gender")

In [6]:
# Load data as DataSource
static_datasource = build_csv_datasource(file_name, ["Gender"])

static_datasource

<maccabee.data_sources.data_sources.StaticDataSource at 0x7f90ee705f50>

In [7]:
# Show the covariate data
static_datasource.get_covar_df()

Unnamed: 0,Age,Gender
0,-1.0,0.0
1,0.185366,1.0
2,1.0,0.0


In [16]:
from maccabee.data_sources.data_source_builders import build_stochastic_datasource

N_covars = 10
N_obs = 1000
binary_col_indeces = [0, 3]
covar_names = [f"X{i}" for i in range(N_covars)]

def generate_data():    
    covar_data = np.random.normal(loc=0.0, scale=1.0, size=(
            N_obs, N_covars))

    # Make binary columns.
    
    for var in binary_col_indeces:
        covar_data[:, var] = (covar_data[:, var] > 0).astype(int)

    return covar_data

stochastic_datasource = build_stochastic_datasource(
        generate_data,
        covar_names=covar_names,
        discrete_covar_names=["X0", "X3"])

stochastic_datasource

<maccabee.data_sources.data_sources.StochasticDataSource at 0x7f91192f2d10>

In [18]:
stochastic_datasource.get_covar_df().head()

Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9
0,1.0,0.397143,-0.286716,1.0,-0.104719,0.009483,0.348603,-0.158236,-0.021799,-0.909613
1,0.0,-0.166077,-0.54677,1.0,-0.05803,-0.382312,-0.222217,0.299738,0.028531,-0.665068
2,1.0,0.452277,-0.666157,0.0,-0.130254,-0.167185,0.463603,-0.100672,0.155738,-0.480558
3,0.0,0.500995,0.070425,0.0,0.074098,0.646198,0.510876,0.392154,0.309803,-0.076971
4,1.0,-0.534493,-0.425811,0.0,-0.290319,-0.010616,0.072021,0.02328,0.055812,-0.296402
