In [3]:
import numpy as np
import scipy
from CATE_utils import data_generator, regressor, clf, generate_EBM_CATE_learners, generate_CATE_learners, run_and_report_mse

In [1]:
from sklearn.model_selection import train_test_split

def doubly_robust_learner(propensity_model, treatment_model, control_model, cate_model,
                          X, T, y, lambda_11, lambda_12, lambda_2, random_state=42):
    
    assert(lambda_11 + lambda_12 + lambda_2 == 1)

    X1, X2, T1, T2, y1, y2 = train_test_split(X, T, y, test_size=lambda_2, random_state=random_state, stratify=T)
    X11, X12, T11, T12, y11, y12 = train_test_split(X1, T1, y1, test_size=lambda_12 / (lambda_11 + lambda_12),
                                                    random_state=random_state, stratify=T1)
    
    _ = propensity_model.fit(X11, T11)
    _ = treatment_model.fit(X12[T12 == 1], y12[T12 == 1])
    _ = control_model.fit(X12[T12 == 0], y12[T12 == 0])
    
    y_treatment_dr = treatment_model.predict(X2) + (y2 - treatment_model.predict(X2)) / (prop_model.predict_proba(X2)[:, 1]) * T2
    y_control_dr = control_model.predict(X2) + (y2 - control_model.predict(X2)) / (prop_model.predict_proba(X2)[:, 1]) * (1 - T2)

    phi_dr = y_treatment_dr - y_control_dr

    _ = cate_model.fit(X2, phi_dr)
    
    return propensity_model, treatment_model, control_model, cate_model

In [None]:
# Experiment Parameters

DATA_SIZES = [100, 1_000]
EPSILON = [1, 2]
SEED = 42
N_EXPERIMENTS = 2

METRICS = ['avg_cate_mse', 'std_cate_mse', 'avg_ate_abs', 'std_ate_abs']

# Output Frame

results = pd.DataFrame(columns=["dataset", "learner",  "epsilon", "stage", "eval_data", "metric_name", "metric_value"])

# ['synthetic', 'DR_ebm', 'propensity', 'train', 'inf', 'avg_mean_squared_error', '0.25']
# ['synthetic', 'DR_ebm', 'propensity', 'train', '1', 'avg_mean_squared_error', '0.25']
# ['synthetic', 'DR_ebm', 'propensity', 'train', '1', 'std_mean_squared_error', '0.05']
# ['synthetic', 'DR_ebm', 'propensity', 'test', '1', 'mean_squared_error', '0.55']
# ['synthetic', 'DR_ebm', 'propensity', 'train', '2', 'mean_squared_error', '0.2']
# ['synthetic', 'DR_ebm', 'propensity', 'test', '2', 'mean_squared_error', '0.51']


# Things to log:
# - propensity model on train set
# - full process/CATE model on test set
# - treatment/control effect models on test set
# - propensity model on test set
# - full process/CATE model on test set
# - treatment/control effect models on test set

In [4]:
n = 10000
d = 4
covariates_model = lambda d: np.random.binomial(1, .5, size=d)
propensity_model = lambda x: scipy.special.expit(x[0])
control_outcome_model = lambda x: x[0] + (x[0] + 1)*np.random.normal(0, 1, size=1)
treatment_effect_model = lambda x: -1 + 2 * x[0]
special_test_point = np.array([[1, 0, 0, 0], [0, 0, 0, 0]])
special_test_value = np.array([1, -1])


data1 = data_generator(n=n, 
                      d=d, 
                      covariates_model=covariates_model,
                      propensity_model=propensity_model,
                      control_outcome_model=control_outcome_model,
                      treatment_effect_model=treatment_effect_model,
                      special_test_point=special_test_point,
                      special_test_value=special_test_value,
                      seed = 1234
                      )