# Notebook description

This is an exemplary notebook that includes the generation of a half-synthetic dataset based on IHDP data. The individual
treatment effect is linear to one of the covariates in this example. There is no noise in this example either.
The basic structure is taken from: 
https://justcause.readthedocs.io/en/latest/usage.html#quick-overview

In [60]:
from sklearn.utils import check_random_state  # ensures usable random state
from justcause.data.utils import generate_data
import numpy as np
from numpy.random import RandomState
from scipy.special import expit
from sklearn.utils import check_random_state
from justcause.data.sets.ihdp import get_ihdp_covariates

covs = get_ihdp_covariates()

In [61]:
def outcome(covariates, *, random_state: RandomState, **kwargs):
    random_state = check_random_state(random_state)

    # define tau
    tau = random_state.normal(2*covariates["x_8"]*covariates["x_7"] + covariates["x_8"]*covariates["x_9"] + 3*covariates["x_7"]*covariates["x_9"], 0, size=len(covariates))

    y_0 = random_state.normal(0, 0.2, size=len(covariates))
    y_1 = y_0 + tau
    mu_0, mu_1 = y_0, y_1  # no noise for this example
    return mu_0, mu_1, y_0, y_1

In [62]:
def treatment(covariates, *, random_state: RandomState, **kwargs):
    random_state = check_random_state(random_state)
    return random_state.binomial(1, 0.5, size=len(covariates))

In [63]:
replications = generate_data(
    covs,
    treatment,
    outcome,
    n_samples=747,  # Optional but 747 is the maximum available with IHDP covariates
    n_replications=1000,
    random_state=0  # Fix random_state for replicability
)

In [64]:
from sklearn.linear_model import LinearRegression

from sklearn.preprocessing import PolynomialFeatures

from justcause.learners import SLearner, TLearner

class PolyLearner:
    def __init__(self, degree):
        self.degree = degree
        self.poly = PolynomialFeatures(degree)
        self.linReg = LinearRegression()
    
    def fit(self, X, y, sample_weight = None):
        X_Poly = self.poly.fit_transform(X)
        self.linReg.fit(X_Poly, y, sample_weight)
    
    def predict(self, X):
        X_Poly = self.poly.fit_transform(X)
        return self.linReg.predict(X_Poly)

In [71]:
learners = [
 
    SLearner(LinearRegression()),
 
    TLearner(LinearRegression()),
    
    SLearner(PolyLearner(2)),
 
    TLearner(PolyLearner(2))
]

In [None]:
from justcause.evaluation import evaluate_ite
 
from justcause.metrics import pehe_score, mean_absolute, bias
 
results = evaluate_ite(replications, learners, metrics=[pehe_score, mean_absolute, bias], random_state=0)

In [None]:
import pandas as pd
df = pd.DataFrame(results).sort_values('pehe_score-mean')
df[df['train'] == False][['method', 'train', 'pehe_score-mean']]