In [46]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from econml.metalearners import XLearner
import math

In [55]:
def get_ps_weights(clf, x, t):
    ti = np.squeeze(t)
    clf.fit(x, ti)
    ptx = clf.predict_proba(x).T[1].T + 0.0001 # add a small value to avoid dividing by 0
    # Given ti and ptx values, compute the weights wi
    wi = (ti / ptx) + ((1.0 - ti) / (1.0 - ptx))
    return wi

def individual_effect(y0_array, y1_array):
    ## Your code goes here
    return y1_array - y0_array

def average_effect(y0_array, y1_array):
    ## Your code goes here
    return np.mean(y1_array - y0_array)

def abs_ate(effect_true, effect_pred):
    # 1. Obtain true ATE by getting the mean of true ITEs.
    # 2. Obtain predicted ATE by getting the mean of predicted ITEs.
    # 3. Calculate the difference between the true and predicted ATEs.
    # 4. Calculate the absolute value of the difference obtained in the previous step.
    # Hint: Keep your code concise (this can be done in one line of code).
    ## Your code goes here
    return abs(np.mean(effect_true)-np.mean(effect_pred))

def pehe(effect_true, effect_pred):
    # 1. Obtain the difference between the true and predicted ITEs.
    # 2. Square the result (power of 2).
    # 3. Take the mean.
    # 4. Take the square root.
    # Hint: Keep your code concise (this can be done in one line of code).
    ## Your code goes here
    return np.sqrt(np.mean((effect_pred-effect_true)**2))

In [48]:
df = pd.read_csv('https://raw.githubusercontent.com/dmachlanski/CE888_2023/main/lab5/ihdp.csv')
df = df.drop('ycf', axis = 1)
X = df.iloc[:,0:25]
Y = df.iloc[:,-2]
T = df.iloc[:,-3]
ITE = df.iloc[:,-1]

In [49]:
x_train, x_test, y_train, y_test, t_train, t_test = train_test_split(X, Y, T, test_size=0.20, shuffle=True)

In [50]:
lr = LinearRegression()
x_concat = np.c_[x_train, t_train]

In [56]:
lr.fit(x_concat, y_train)
y0_pred = lr.predict(np.c_[x_test, np.zeros_like(t_test)])
y1_pred = lr.predict(np.c_[x_test, np.ones_like(t_test)])
s_pred = y1_pred - y0_pred

In [57]:
clf = LogisticRegression()
weights = get_ps_weights(clf, x_train, t_train)
lr.fit(x_concat, y_train, sample_weight=weights)
y0_wpred = lr.predict(np.c_[x_test, np.zeros_like(t_test)])
y1_wpred = lr.predict(np.c_[x_test, np.ones_like(t_test)])
w_pred = y1_wpred - y0_wpred

In [62]:
est = XLearner(models=lr)
est.fit(y_train, t_train, X=x_train)
y0_xpred = lr.predict(np.c_[x_test, np.zeros_like(t_test)])
y1_xpred = lr.predict(np.c_[x_test, np.ones_like(t_test)])
#est.effect(X[:3])
xl_pred = est.effect(x_test)

In [70]:
s_val=0
w_val=0
xl_val=0
for i in range(len(s_pred)):
    s_val += s_pred[i]
    w_val += w_pred[i]
    xl_val += xl_pred[i]
ate_s = w_val/len(w_pred)
ate_w = w_val/len(w_pred)
ate_xl = xl_val/len(xl_pred)

pehe_s = np.sqrt(np.mean((s_pred)**2))
pehe_w = np.sqrt(np.mean((w_pred)**2))
pehe_xl = np.sqrt(np.mean((xl_pred)**2))

In [74]:
if (pehe_s >= pehe_w) and (pehe_s >= pehe_xl):
    better = 'S'
elif (pehe_w >= pehe_s) and (pehe_w >= pehe_xl):
    better = 'W'
elif (pehe_xl >= pehe_s) and (pehe_xl >= pehe_w):
    better = 'XL'

print(better)
print(pehe_s,pehe_w,pehe_xl)

if (ate_s >= ate_w) and (ate_s >= ate_xl):
    better = 'S'
elif (ate_w >= ate_s) and (ate_w >= ate_xl):
    better = 'W'
elif (ate_xl >= ate_s) and (ate_xl >= ate_w):
    better = 'XL'

print(better)
print(ate_s,ate_w,ate_xl)

XL
3.806943618572935 3.869883504385941 3.9778997373427125
XL
3.86988350438594 3.86988350438594 3.8911677516890464
