In [11]:
import pandas as pd
import torch
import numpy as np

from estimator_model.propensity_score import InversePorbWeighting, PropensityScore
from exp_data import coupon_dataset, meaningless_discrete_dataset, meaningless_discrete_dataset_

np.random.seed(2022)

## Try the trivial dataset. The true treatment effect is 3.

In [12]:
data, coef = meaningless_discrete_dataset(num=1000,
                                          confounder_n=2,
                                          treatment_effct=[2, 3],
                                          prob=[0.3, 0.7])
data.head()

Unnamed: 0,treatment,w_0,w_1,outcome
0,0,-0.603348,-0.337215,0.795487
1,1,-0.228919,0.941748,4.82866
2,1,1.250625,-0.67559,2.070163
3,0,-0.0049,-0.835539,-0.4092
4,1,0.632894,0.058976,5.112075


In [13]:
treatment = 'treatment'
outcome = 'outcome'
w = ['w_0', 'w_1']

In [14]:
ipw = InversePorbWeighting('LogisticR')
ate = ipw.estimate_ate(data, 'outcome', 'treatment', w)

In [15]:
# Error
print(f'The estimation error is {np.abs(ate - 3) / 3}')

The estimation error is 0.05566537069934988


## Try the coupon-amount dataset. The true treatment effect is 150.

In [16]:
n_users = 10000
df = coupon_dataset(n_users, with_income=True)
df.head()

Unnamed: 0,gender,coupon,amount,income,time_spent
0,1,1,504.695267,517.259674,60.469527
1,1,1,503.833114,517.168181,60.383311
2,1,1,497.694587,494.259992,59.769459
3,0,0,245.259153,477.792542,24.525915
4,0,1,400.853434,506.449489,50.085343


In [17]:
treatment = 'coupon'
outcome = 'amount'
w = ['gender', 'income']

In [18]:
ipw_ = InversePorbWeighting('LogisticR')
ipw_ate = ipw_.estimate_ate(df, outcome, treatment, w)
print(f'The estimation error is {np.abs(ipw_ate - 150) / 150}')

The estimation error is 1.0095933850395633


why this failed?

## Test new dataset.

In [19]:
d = meaningless_discrete_dataset_(num=10000, 
                                  confounder_n=2,
                                  treatment_effct=[1, 3, 4, 6],
                                  random_seed=12)
d['treatment'].value_counts()

1    3891
2    2480
3    2284
0    1345
Name: treatment, dtype: int64

Unnamed: 0,treatment,w_0,w_1,outcome
0,3,-0.679570,2.344229,1.608472
1,0,0.689231,-0.040814,1.348316
2,1,-0.253517,-0.460669,3.707772
3,1,0.017621,-0.435487,3.772071
4,3,-0.324275,1.608774,3.043107
...,...,...,...,...
9995,1,-0.881862,-2.214418,6.537205
9996,2,1.060763,1.001066,2.666631
9997,3,-1.143148,3.858135,-1.237494
9998,3,-0.778809,3.707048,-0.825910


In [43]:
1 + (d['treatment'] >= 1).astype(int).values.reshape(-1, 1)

array([[2],
       [1],
       [2],
       ...,
       [2],
       [2],
       [2]])

In [32]:
a = np.ones((3, 4))

In [33]:
a.reshape(-1, 1)

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]])

In [34]:
a

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])