In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import MinMaxScaler
from sklift.metrics import uplift_at_k, uplift_auc_score, qini_auc_score, weighted_average_uplift

In [2]:
# Get imputed data
mimic_complete = pd.read_csv("./impute_mimic.csv")
mimic_complete.head(10)

In [3]:
# Create variables to store outcome Y, treatment T, confounders L, and features X
y = "mort_28"
T = "peep_regime"
L = ["age", "weight", "pf_ratio", "po2", "driving_pressure", "fio2", "hco3", "plateau_pressure", "respiratory_rate"] 
X = ["age", "sex", "weight", "height", "pf_ratio", "po2", "pco2", "ph", "driving_pressure", "lung_compliance", "map", "bilirubin", "creatinine", "platelets", "urea", "fio2", "hco3", "heart_rate", "minute_volume", "peep", "plateau_pressure", "respiratory_rate", "syst_blood_pressure", "diastolic_blood_pressure"] 

In [None]:
mimic_complete['mort_28'].replace([1,0], [0,1], inplace=True)

### Evaluating CATE estimation LGBM 

In [7]:
from Simulation.X_learner_confounder import X_learner_lgbm
from Simulation.T_learner import T_learner_lgbm
from Simulation.S_learner import S_learner_lgbm

# Get CATE estimates for S-, T-, and X-learner using LGBM
s_cate_test_list = []
t_cate_test_list = []
x_cate_test_list = []
x_cate_train_list = []

for _ in range(100): 
    # Train and test set
    train, test = train_test_split(mimic_complete, test_size=0.3, random_state=None)
    
    # Normalizing Data
    normalizer = MinMaxScaler()
    train[X] = normalizer.fit_transform(train[X])
    test[X] = normalizer.fit_transform(test[X])
    
    # Get CATE estimates
    s_cate_train, s_cate_test = S_learner_lgbm(train, test, L, T, y)
    t_cate_train, t_cate_test = T_learner_lgbm(train, test, L, T, y)
    x_cate_train, x_cate_test = X_learner_lgbm(train, test, L, X, T, y)
    s_cate_test_list.append(s_cate_test['pred_cate'].tolist())
    t_cate_test_list.append(t_cate_test['pred_cate'].tolist())
    x_cate_test_list.append(x_cate_test['pred_cate'].tolist())
    x_cate_train_list.append(x_cate_train['pred_cate'].tolist())
    
s_cate_test = np.mean(s_cate_test_list, axis=0)
t_cate_test = np.mean(t_cate_test_list, axis=0)
x_cate_test = np.mean(x_cate_test_list, axis=0)
x_cate_train = np.mean(x_cate_train_list, axis=0)

In [8]:
# Preparing data for visualization
y_test = test[y].tolist()
t_test = test[T].tolist()
y_train = train[y].tolist()
t_train = train[T].tolist()

In [9]:
from sklift.viz import plot_qini_curve
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)
ax.set_title('Qini curves LGBM')

plot_qini_curve(
    y_train, x_cate_train, t_train,
    perfect=True, name='Xlearner train', ax=ax
);

plot_qini_curve(
    y_test, x_cate_test, t_test,
    perfect=True, name='Xlearner test', ax=ax
);

In [10]:
fig, ax = plt.subplots(1, 1)
ax.set_title('Qini curves LGBM')

plot_qini_curve(
    y_test, s_cate_test, t_test,
    perfect=True, name='Slearner', ax=ax
);
plot_qini_curve(
    y_test, t_cate_test, t_test,
    perfect=True, name='Tlearner', ax=ax
);

plot_qini_curve(
    y_test, x_cate_test, t_test,
    perfect=True, name='Xlearner', ax=ax
);

### Linear Regression

In [22]:
from Simulation.X_learner_confounder import X_learner_linear
from Simulation.T_learner import T_learner_linear
from Simulation.S_learner import S_learner_linear

# Get CATE estimates for S-, T-, and X-learner using Linear Regression
s_cate_test_list = []
t_cate_test_list = []
x_cate_test_list = []

for _ in range(100): 
    # Train and test set
    train, test = train_test_split(mimic_complete, test_size=0.3, random_state=None)
    
    # Normalizing Data
    normalizer = MinMaxScaler()
    train[X] = normalizer.fit_transform(train[X])
    test[X] = normalizer.fit_transform(test[X])
    
    # Get CATE estimates
    s_cate_train, s_cate_test = S_learner_linear(train, test, L, T, y)
    t_cate_train, t_cate_test = T_learner_linear(train, test, L, T, y)
    x_cate_train, x_cate_test = X_learner_linear(train, test, L, X, T, y)
    s_cate_test_list.append(s_cate_test['pred_cate'].tolist())
    t_cate_test_list.append(t_cate_test['pred_cate'].tolist())
    x_cate_test_list.append(x_cate_test['pred_cate'].tolist())
    
s_cate_test = np.mean(s_cate_test_list, axis=0)
t_cate_test = np.mean(t_cate_test_list, axis=0)
x_cate_test = np.mean(x_cate_test_list, axis=0)

In [23]:
from sklift.viz import plot_qini_curve
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)
ax.set_title('Qini curves Linear Regression')

plot_qini_curve(
    y_test, s_cate_test, t_test,
    perfect=True, name='Slearner', ax=ax
);
plot_qini_curve(
    y_test, t_cate_test, t_test,
    perfect=True, name='Tlearner', ax=ax
);
plot_qini_curve(
    y_test, x_cate_test, t_test,
    perfect=True, name='Xlearner', ax=ax
);

### Random Forest

In [10]:
from Simulation.X_learner_confounder import X_learner_rf
from Simulation.T_learner import T_learner_rf
from Simulation.S_learner import S_learner_rf


# Get CATE estimates for S-, T-, and X-learner using Random Forest
s_cate_test_list = []
t_cate_test_list = []
x_cate_test_list = []

for _ in range(20): 
    # Train and test set
    train, test = train_test_split(mimic_complete, test_size=0.3, random_state=None)
    
    # Normalizing Data
    normalizer = MinMaxScaler()
    train[X] = normalizer.fit_transform(train[X])
    test[X] = normalizer.fit_transform(test[X])
    
    # Get CATE estimates
    s_cate_train, s_cate_test = S_learner_rf(train, test, X, T, y)
    t_cate_train, t_cate_test = T_learner_rf(train, test, X, T, y)
    x_cate_train, x_cate_test = X_learner_rf(train, test, L, X, T, y)
    s_cate_test_list.append(s_cate_test['pred_cate'].tolist())
    t_cate_test_list.append(t_cate_test['pred_cate'].tolist())
    x_cate_test_list.append(x_cate_test['pred_cate'].tolist())
    
s_cate_test = np.mean(s_cate_test_list, axis=0)
t_cate_test = np.mean(t_cate_test_list, axis=0)
x_cate_test = np.mean(x_cate_test_list, axis=0)

In [11]:
from sklift.viz import plot_qini_curve
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)
ax.set_title('Qini curves RF')

plot_qini_curve(
    y_test, s_cate_test, t_test,
    perfect=True, name='Slearner', ax=ax
);
plot_qini_curve(
    y_test, t_cate_test, t_test,
    perfect=True, name='Tlearner', ax=ax
);
plot_qini_curve(
    y_test, x_cate_test, t_test,
    perfect=True, name='Xlearner', ax=ax
);

### Support Vector Regression   

In [12]:
from Simulation.X_learner_confounder import X_learner_svm
from Simulation.T_learner import T_learner_svm
from Simulation.S_learner import S_learner_svm

# Get CATE estimates for S-, T-, and X-learner using Random Forest
s_cate_test_list = []
t_cate_test_list = []
x_cate_test_list = []

for _ in range(20): 
    # Train and test set
    train, test = train_test_split(mimic_complete, test_size=0.3, random_state=None)
    
    # Normalizing Data
    normalizer = MinMaxScaler()
    train[X] = normalizer.fit_transform(train[X])
    test[X] = normalizer.fit_transform(test[X])
    
    # Get CATE estimates
    s_cate_train, s_cate_test = S_learner_svm(train, test, X, T, y)
    t_cate_train, t_cate_test = T_learner_svm(train, test, X, T, y)
    x_cate_train, x_cate_test = X_learner_svm(train, test, L, X, T, y)
    s_cate_test_list.append(s_cate_test['pred_cate'].tolist())
    t_cate_test_list.append(t_cate_test['pred_cate'].tolist())
    x_cate_test_list.append(x_cate_test['pred_cate'].tolist())
    
s_cate_test = np.mean(s_cate_test_list, axis=0)
t_cate_test = np.mean(t_cate_test_list, axis=0)
x_cate_test = np.mean(x_cate_test_list, axis=0)

In [13]:
from sklift.viz import plot_qini_curve
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)
ax.set_title('Qini curves SVR')

plot_qini_curve(
    y_test, s_cate_test, t_test,
    perfect=True, name='Slearner', ax=ax
);
plot_qini_curve(
    y_test, t_cate_test, t_test,
    perfect=True, name='Tlearner', ax=ax
);
plot_qini_curve(
    y_test, x_cate_test, t_test,
    perfect=True, name='Xlearner', ax=ax
);