In [1]:
import pandas as pd
import numpy as np
from estimator_model.meta_learner import XLearner, TLearner, SLearner

from exp_data import coupon_dataset, meaningless_discrete_dataset

np.random.seed(2022)

## Try the trivial dataset. The true treatment effect is 3.

In [2]:
data, coef = meaningless_discrete_dataset(num=1000,
                                          confounder_num=2,
                                          treatment_effct=[2, 3],
                                          prob=[0.3, 0.7])
data.head()

Unnamed: 0,treatment,w_0,w_1,outcome
0,0,-0.603348,-0.337215,0.795487
1,1,-0.228919,0.941748,4.82866
2,1,1.250625,-0.67559,2.070163
3,0,-0.0049,-0.835539,-0.4092
4,1,0.632894,0.058976,5.112075


In [14]:
treatment = 'treatment'
outcome = 'outcome'
w = ['w_0', 'w_1']
s = SLearner(ml_model='LR')
s_ate = s.estimate_ate(data, outcome, treatment, w)
print(f'The estimation error is {np.abs(s_ate - 3) / 3}')

The estimation error is 0.02627183841896885


In [15]:
t = TLearner(ml_model='LR')
t_ate = t.estimate_ate(data, outcome, treatment, w)
print(f'The estimation error is {np.abs(t_ate - 3) / 3}')

The estimation error is 0.02667637145442973


In [16]:
x = XLearner(ml_model='LR')
x_ate = x.estimate_ate(data, outcome, treatment, w)
print(f'The estimation error is {np.abs(x_ate - 3) / 3}')

The estimation error is 0.02667637145442973


## Try the coupon-amount dataset. The true treatment effect is 150.

In [6]:
n_users = 10000
df = coupon_dataset(n_users, with_income=True)
df.head()

Unnamed: 0,gender,coupon,amount,income,time_spent
0,1,1,504.695267,517.259674,60.469527
1,1,1,503.833114,517.168181,60.383311
2,1,1,497.694587,494.259992,59.769459
3,0,0,245.259153,477.792542,24.525915
4,0,1,400.853434,506.449489,50.085343


## SLearner

In [18]:
treatment = 'coupon'
outcome = 'amount'
w = ['gender', 'income']

In [19]:
s_ = SLearner(ml_model='LR')
s__ate = s_.estimate_ate(df, outcome, treatment, w)
print(f'The estimation error is {np.abs(s__ate - 150) / 150}')

The estimation error is 0.0003193211687494113


In [20]:
s_.estimate_cate(data=df, 
                outcome='amount', 
                treatment='coupon', 
                adjustment=['gender', 'income'],
                condition_set={'time_spent'},
                condition=(df['time_spent'] >= 25.06))

149.8049552157292

## TLearner

In [21]:
t_ = TLearner(ml_model='LR')
t__ate = t_.estimate_ate(data=df, 
               outcome=outcome, 
               treatment=treatment, 
               adjustment=w)
print(f'The estimation error is {np.abs(t__ate - 150) / 150}')

The estimation error is 0.000837753491799352


In [11]:
t_.estimate_cate(data=df, 
                outcome='amount', 
                treatment='coupon', 
                adjustment=['gender', 'income'],
                condition_set={'time_spent'},
                condition=(df['time_spent'] >= 25.06))

149.4368733690326

## XLearner

In [22]:
x_ = XLearner(ml_model='LR')
x__ate = x_.estimate_ate(data=df, 
               outcome='amount', 
               treatment='coupon', 
               adjustment=['gender', 'income'])
print(f'The estimation error is {np.abs(x__ate - 150) / 150}')

The estimation error is 0.0008377534917991625


In [13]:
x_.estimate_cate(data=df, 
                outcome='amount', 
                treatment='coupon', 
                adjustment=['gender', 'income'],
                condition_set={'time_spent'},
                condition=(df['time_spent'] >= 25.06))

149.4368733690326