In [70]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from toolz import curry
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import MinMaxScaler

In [71]:
# Get imputed data
mimic_complete = pd.read_csv("./impute_mimic.csv")
mimic_complete.head(10)

In [65]:
# Train and test set
train, test = train_test_split(mimic_complete, test_size=0.3, random_state=None)
print(train.shape, test.shape)

In [66]:
# Create variables to store outcome Y, treatment T, and features X
y = "mort_28"
T = "peep_regime"
X = ["age", "sex", "weight", "height", "pf_ratio", "po2", "pco2", "ph", "driving_pressure", "lung_compliance", "map", "bilirubin", "creatinine", "platelets", "urea", "fio2", "hco3", "heart_rate", "minute_volume", "peep", "plateau_pressure", "respiratory_rate", "syst_blood_pressure", "diastolic_blood_pressure"]

In [67]:
# Normalizing Data
normalizer = MinMaxScaler()
train[X] = normalizer.fit_transform(train[X])
test[X] = normalizer.fit_transform(test[X])

In [68]:
mimic_complete.head(10)

In [69]:
print(train[X])

In [29]:
# Function to calculate the elasticity 
@curry
def elast(data, y, t):
        return (np.sum((data[t] - data[t].mean())*(data[y] - data[y].mean())) /
                (np.sum((data[t] - data[t].mean())**2) + 1e-10))

# Function to calculate the cumulative gain
def cumulative_gain(dataset, prediction, y, t, min_periods=30, steps=100):
    size = dataset.shape[0]
    ordered_df = dataset.sort_values(prediction, ascending=False).reset_index(drop=True)
    n_rows = list(range(min_periods, size, size // steps)) + [size]
    return np.array([elast(ordered_df.head(rows), y, t) * (rows/size) for rows in n_rows])

## 1. LGBM Regressor

In [30]:
from Simulation.X_learner import X_learner_lgbm
from Simulation.T_learner import T_learner_lgbm
from Simulation.S_learner import S_learner_lgbm

# Get CATE estimates for S-, T-, and X-learner using LGBM
s_cate_train, s_cate_test = S_learner_lgbm(train, test, X, T, y)
t_cate_train, t_cate_test = T_learner_lgbm(train, test, X, T, y)
x_cate_train, x_cate_test = X_learner_lgbm(train, test, X, T, y)

In [31]:
# Plot cumulative gain curve for S-learner (LGBM)
gain_curve_test_s_lgbm = cumulative_gain(s_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_s_lgbm = cumulative_gain(s_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_s_lgbm, color="C0", label="Test")
plt.plot(gain_curve_train_s_lgbm, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.title("S-Learner, using LGBM");

In [32]:
# T-Learner, LGBM: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_t_lgbm = cumulative_gain(t_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_t_lgbm = cumulative_gain(t_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_t_lgbm, color="C0", label="Test")
plt.plot(gain_curve_train_t_lgbm, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.xlabel("% of Top Sensitivity")
plt.ylabel("Cumulative Gain")
plt.title("T-Learner, using LGBM");

In [33]:
# T-Learner, LGBM: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_x_lgbm = cumulative_gain(x_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_x_lgbm = cumulative_gain(x_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_x_lgbm, color="C0", label="Test")
plt.plot(gain_curve_train_x_lgbm, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.xlabel("% of Top Sensitivity")
plt.ylabel("Cumulative Gain")
plt.title("X-Learner, using LGBM");

## 2. Linear Regression 

In [34]:
from Simulation.X_learner import X_learner_linear
from Simulation.T_learner import T_learner_linear
from Simulation.S_learner import S_learner_linear

# Get CATE estimates for S-, T-, and X-learner using Linear Regression
s_cate_train, s_cate_test = S_learner_linear(train, test, X, T, y)
t_cate_train, t_cate_test = T_learner_linear(train, test, X, T, y)
x_cate_train, x_cate_test = X_learner_linear(train, test, X, T, y)

In [35]:
# S-Learner, Linear: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_s_linear = cumulative_gain(s_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_s_linear = cumulative_gain(s_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_s_linear, color="C0", label="Test")
plt.plot(gain_curve_train_s_linear, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.title("S-Learner, using Linear Regression");

In [36]:
# T-Learner, Linear: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_t_linear = cumulative_gain(t_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_t_linear = cumulative_gain(t_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_t_linear, color="C0", label="Test")
plt.plot(gain_curve_train_t_linear, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.title("T-Learner, using Linear Regression");

In [37]:
# X-Learner, Linear: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_x_linear = cumulative_gain(x_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_x_linear = cumulative_gain(x_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_x_linear, color="C0", label="Test")
plt.plot(gain_curve_train_x_linear, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.title("X-Learner, using Linear Regression");

## 4. Random Forest

In [38]:
from Simulation.X_learner import X_learner_rf
from Simulation.T_learner import T_learner_rf
from Simulation.S_learner import S_learner_rf

# Get CATE estimates for S-, T-, and X-learner using Random Forest
s_cate_train, s_cate_test = S_learner_rf(train, test, X, T, y)
t_cate_train, t_cate_test = T_learner_rf(train, test, X, T, y)
x_cate_train, x_cate_test = X_learner_rf(train, test, X, T, y)

In [39]:
# S-Learner, RF: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_s_rf = cumulative_gain(s_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_s_rf = cumulative_gain(s_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_s_rf, color="C0", label="Test")
plt.plot(gain_curve_train_s_rf, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.title("S-Learner, using RF");

In [40]:
# T-Learner, RF: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_t_rf = cumulative_gain(t_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_t_rf = cumulative_gain(t_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_t_rf, color="C0", label="Test")
plt.plot(gain_curve_train_t_rf, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.title("T-Learner, using RF");

In [41]:
# X-Learner, RF: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_x_rf = cumulative_gain(x_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_x_rf = cumulative_gain(x_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_x_rf, color="C0", label="Test")
plt.plot(gain_curve_train_x_rf, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.title("X-Learner, using RF");

## 5. Support Vector Regressor

In [42]:
from Simulation.X_learner import X_learner_svm
from Simulation.T_learner import T_learner_svm
from Simulation.S_learner import S_learner_svm

# Get CATE estimates for S-, T-, and X-learner using Support Vector Regressor
s_cate_train, s_cate_test = S_learner_svm(train, test, X, T, y)
t_cate_train, t_cate_test = T_learner_svm(train, test, X, T, y)
x_cate_train, x_cate_test = X_learner_svm(train, test, X, T, y)

In [43]:
# S-Learner, SVR: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_s_svr = cumulative_gain(s_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_s_svr = cumulative_gain(s_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_s_svr, color="C0", label="Test")
plt.plot(gain_curve_train_s_svr, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.title("S-Learner, using Support Vector Regression");

In [44]:
# T-Learner, SVR: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_t_svr = cumulative_gain(t_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_t_svr = cumulative_gain(t_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_t_svr, color="C0", label="Test")
plt.plot(gain_curve_train_t_svr, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.title("T-Learner, using Support Vector Regression");

In [45]:
# X-Learner, SVR: Evaluating the model by looking at the cumulative gain curve
gain_curve_test_x_svr = cumulative_gain(x_cate_test, "pred_cate", y="mort_28", t="peep_regime")
gain_curve_train_x_svr = cumulative_gain(x_cate_train, "pred_cate", y="mort_28", t="peep_regime")
plt.plot(gain_curve_test_x_svr, color="C0", label="Test")
plt.plot(gain_curve_train_x_svr, color="C1", label="Train")
plt.plot([0, 100], [0, elast(test, "mort_28", "peep_regime")], linestyle="--", color="black", label="Baseline")
plt.legend()
plt.title("X-Learner, using Support Vector Regression");