# Notebook description

This is an exemplary notebook that includes the generation of a half-synthetic dataset based on IHDP data. The individual
treatment effect is linear to one of the covariates in this example. There is no noise in this example either.
The basic structure is taken from: 
https://justcause.readthedocs.io/en/latest/usage.html#quick-overview

In [37]:
from sklearn.utils import check_random_state  # ensures usable random state
from justcause.data.utils import generate_data
import numpy as np
from numpy.random import RandomState
from scipy.special import expit
from sklearn.utils import check_random_state
from justcause.data.sets.ihdp import get_ihdp_covariates

covariates = get_ihdp_covariates()

In [38]:
def linear_outcome(covariates, *, random_state: RandomState, **kwargs):
    random_state = check_random_state(random_state)

    # define tau as linear function of covariate x_8
    tau = 2 * covariates[:, 7]

    y_0 = 1
    y_1 = y_0 + tau
    mu_0, mu_1 = y_0, y_1  # no noise for this example
    return mu_0, mu_1, y_0, y_1

In [48]:
def treatment(covariates, *, random_state: RandomState, **kwargs):
    random_state = check_random_state(random_state)
    return random_state.binomial(1, p=0.5)

In [49]:
replications = generate_data(
    covariates,
    treatment,
    linear_outcome,
    n_samples=747,  # Optional but 747 is the maximum available with IHDP covariates
    n_replications=100,
    random_state=0  # Fix random_state for replicability
)

AssertionError: Treatment function must return vector with dimension `n_samples`