In [1]:
# Hidden Config Cell

#!python -m pip install -e ../../../../Maccabee > /dev/null

In [2]:
from maccabee.modeling.performance_metrics import add_performance_metric
from maccabee.constants import Constants


perf_metric_name = "MAE"

def mean_absolute_error(avg_effect_estimate_vals, avg_effect_true_vals):
    import numpy as np
    return np.mean(np.abs(avg_effect_estimate_vals - avg_effect_true_vals))
    
add_performance_metric(
    aggregation_level=Constants.Model.AVERAGE_ESTIMANDS,
    metric_name=perf_metric_name,
    metric_callable=mean_absolute_error)

In [4]:
# Define axes and metrics to analyze
from maccabee.data_analysis.data_metrics import add_data_metric
from maccabee.constants import Constants
from sklearn.linear_model import LinearRegression

# Metric for the outcome nonlinearity axis
data_axis = Constants.AxisNames.OUTCOME_NONLINEARITY

# Define the callable function
def noisy_biased_outcome_linearity_metric(X, Y0, outcome_noise, bias):
    target = Y0 + outcome_noise + bias
    lr = LinearRegression().fit(X, target)
    return lr.score(X, target)
    
# Define the metric dict
data_metric_name = "lin (X, Noisy, Biased Y)"
metric_dict = {
    "name": data_metric_name,
    "args": {
        "X": Constants.DGPVariables.COVARIATES_NAME,
        "Y0": Constants.DGPVariables.POTENTIAL_OUTCOME_WITHOUT_TREATMENT_NAME,
        "outcome_noise": Constants.DGPVariables.OUTCOME_NOISE_NAME   
    },
    "constant_args": {
        "bias": 42
    },
    "function": noisy_biased_outcome_linearity_metric
}

add_data_metric(data_axis, metric_dict)

In [6]:
from maccabee.data_sources.data_source_builders import build_random_normal_datasource
from maccabee.benchmarking import benchmark_model_using_sampled_dgp
from maccabee.modeling.models import LinearRegressionCausalModel
from maccabee.parameters import build_default_parameters

# Build the parameters
params = build_default_parameters()

# Build a random normal data source
normal_data_source = build_random_normal_datasource(
    n_covars=5,
    n_observations=1000)

# Select the new data metric (and an old one for good measure)
DATA_METRICS_SPEC = {
    Constants.AxisNames.OUTCOME_NONLINEARITY: [
        data_metric_name,
        "Lin r2(X_obs, Y0)",
    ]
}

# Run a benchmark
perf_agg_metrics, _, _, data_agg_metrics, _, _ = benchmark_model_using_sampled_dgp(
    dgp_sampling_params=params,
    model_class=LinearRegressionCausalModel,
    estimand=Constants.Model.ATE_ESTIMAND,
    data_source=normal_data_source,
    num_dgp_samples=10,
    num_sampling_runs_per_dgp=1,
    num_samples_from_dgp=16,
    data_analysis_mode=True, # SET DATA ANALYSIS MODE
    data_metrics_spec=DATA_METRICS_SPEC, # PROVIDE SPEC
    n_jobs=8)

In [7]:
# New performance metric
perf_agg_metrics[perf_metric_name]

0.023

In [8]:
# New data metric
data_agg_metrics[f"{data_axis} {data_metric_name}"]

0.476