In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score, recall_score, f1_score

In [16]:
# Get imputed data
mimic_complete = pd.read_csv("./impute_mimic.csv")
mimic_complete['mort_28'].replace([False, True],[0, 1], inplace=True)

In [17]:
# Create variables to store outcome Y, treatment T, and features X
y = "mort_28"
T = "peep_regime"
P = ["age", "weight", "pf_ratio", "po2", "driving_pressure", "fio2", "hco3", "plateau_pressure", "respiratory_rate"]

In [18]:
train, test = train_test_split(mimic_complete, test_size=0.3)
train[P].shape

(2758, 9)

In [19]:
from sklearn.preprocessing import MinMaxScaler

# Get AUC-ROC, Precision, Recall, F1-score for different models (regression, rf, decision tree)
auc_roc_log_list = []
auc_roc_rf_list = []
auc_roc_dt_list = []

precision_log_list = []
precision_rf_list = []
precision_dt_list = []

recall_log_list = []
recall_rf_list = []
recall_dt_list =[]

f1_log_list = []
f1_rf_list = []
f1_dt_list = []

for i in range(50):
     train, test = train_test_split(mimic_complete, test_size=0.3)

    # Normalizing Data
     normalizer = MinMaxScaler()
     train[P] = normalizer.fit_transform(train[P])
     test[P] = normalizer.fit_transform(test[P])
          
     # Different propensity score models (already did hyperparameter tuning)
     propensity_logistic = LogisticRegression(solver="lbfgs", max_iter=200, C=1.0, random_state=123)
     propensity_rf = RandomForestClassifier(n_estimators=50, max_depth=None, min_samples_leaf=2, random_state=123)
     propensity_decision_tree = DecisionTreeClassifier(max_depth=10, min_samples_leaf=1, random_state=123)     
     
     propensity_logistic.fit(train[P], train[T])
     propensity_rf.fit(train[P], train[T])
     propensity_decision_tree.fit(train[P], train[T])
     
     # Predict probabilities
     y_probs_logistic = propensity_logistic.predict_proba(test[P])[:, 1]
     y_probs_rf = propensity_rf.predict_proba(test[P])[:, 1]
     y_probs_dt = propensity_decision_tree.predict_proba(test[P])[:, 1]
     y_test = test[T]
     
     # Accuracy
     # AUC-ROC: higher value indicates better discrimination performance
     auc_roc_logistic = roc_auc_score(y_test, y_probs_logistic)
     auc_roc_rf = roc_auc_score(y_test, y_probs_rf)
     auc_roc_dt = roc_auc_score(y_test, y_probs_dt)    
     auc_roc_log_list.append(auc_roc_logistic)
     auc_roc_rf_list.append(auc_roc_rf)
     auc_roc_dt_list.append(auc_roc_dt)
     
     # Precision: Measures the proportion of true positive predictions among all positive predictions made by the model
     precision_log = precision_score(y_test, (y_probs_logistic > 0.5))
     precision_rf = precision_score(y_test, (y_probs_rf > 0.5))
     precision_dt = precision_score(y_test, (y_probs_dt > 0.5))
     precision_log_list.append(precision_log)
     precision_rf_list.append(precision_rf)
     precision_dt_list.append(precision_dt)
     
     # Recall: Measures the model's ability to capture all positive instances     
     recall_log = recall_score(y_test, (y_probs_logistic > 0.5))
     recall_rf = recall_score(y_test, (y_probs_rf > 0.5))
     recall_dt = recall_score(y_test, (y_probs_dt > 0.5))
     recall_log_list.append(recall_log)
     recall_rf_list.append(recall_rf)
     recall_dt_list.append(recall_dt)
     
     # F1-score: Provides balance between precision and recall and is useful when the class distribution is uneven (!)     
     f1_log = f1_score(y_test, (y_probs_logistic > 0.5))
     f1_rf = f1_score(y_test, (y_probs_rf > 0.5))
     f1_dt = f1_score(y_test, (y_probs_dt > 0.5))
     f1_log_list.append(f1_log)
     f1_rf_list.append(f1_rf)
     f1_dt_list.append(f1_dt)


In [20]:
# Performance of the Propensity Models
print(f"AUC-ROC logistic: {sum(auc_roc_log_list)/len(auc_roc_log_list):.4f}")
print(f"AUC-ROC rf: {sum(auc_roc_rf_list)/len(auc_roc_rf_list) :.4f}")
print(f"AUC-ROC dt: {sum(auc_roc_dt_list)/len(auc_roc_dt_list):.4f}")

print(f"Precision-score logistic: {sum(precision_log_list)/len(precision_log_list):.4f}")
print(f"Precision-score rf: {sum(precision_rf_list)/len(precision_rf_list) :.4f}")
print(f"Precision-score dt: {sum(precision_dt_list)/len(precision_dt_list):.4f}")

print(f"Recall-score logistic: {sum(recall_log_list)/len(recall_log_list):.4f}")
print(f"Recall-score rf: {sum(recall_rf_list)/len(recall_rf_list) :.4f}")
print(f"Recall-score dt: {sum(recall_dt_list)/len(recall_dt_list):.4f}")

print(f"f1-score logistic: {sum(f1_log_list)/len(f1_log_list):.4f}")
print(f"f1-score rf: {sum(f1_rf_list )/len(f1_rf_list ) :.4f}")
print(f"f1-score dt: {sum(f1_dt_list)/len(f1_dt_list):.4f}")

AUC-ROC logistic: 0.8272
AUC-ROC rf: 0.7210
AUC-ROC dt: 0.5278
Precision-score logistic: 0.4729
Precision-score rf: 0.4374
Precision-score dt: 0.1854
Recall-score logistic: 0.4296
Recall-score rf: 0.1944
Recall-score dt: 0.3265
f1-score logistic: 0.3894
f1-score rf: 0.2148
f1-score dt: 0.1908
