# Ablation Studies for Risk Score Generation

In [None]:
REPLACE_DICT = {
    'preiculos': 'Pre-ICU LOS',
    'age': 'Age', 
    'gcs_min': 'Min GCS', 
    'mechvent': 'Ventilation', 
    'urineoutput': 'Urine Output', 
    
    'heartrate_min': 'Min Heart Rate',
    'heartrate_max': 'Max Heart Rate',
    
    'meanbp_min': 'Min MBP',
    'meanbp_max': 'Max MBP',
    
    'resprate_min': 'Min Respiratory Rate',
    'resprate_max': 'Max Respiratory Rate',
    
    'tempc_min': 'Min Temperature',
    'tempc_max': 'Max Temperature',
    
    'sysbp_min': 'Min SBP',
    'sysbp_max': 'Max SBP',
    
    'bun_min': 'Min BUN',
    'bun_max': 'Max BUN',
    
    'wbc_min': 'Min WBC',
    'wbc_max': 'Max WBC',
    
    'potassium_min': 'Min Potassium',
    'potassium_max': 'Max Potassium',
    
    'sodium_min': 'Min Sodium',
    'sodium_max': 'Max Sodium',
    
    'bicarbonate_min': 'Min Bicarbonate',
    'bicarbonate_max': 'Max Bicarbonate',
    
    'bilirubin_min': 'Min Bilirubin',
    'bilirubin_max': 'Max Bilirubin',
    
    'hematocrit_min': 'Min Hematocrit',
    'hematocrit_max': 'Max Hematocrit',
    
    'creatinine_min': 'Min Creatinine',
    'creatinine_max': 'Max Creatinine',
    
    'albumin_min': 'Min Albumin',
    'albumin_max': 'Max Albumin',
    
    'glucose_max': 'Max Glucose',
    'glucose_min': 'Min Glucose',
    
    'aids': 'AIDS/HIV',
    'hem': 'Hematologic Cancer',
    'mets': 'Metastatic Cancer',
    
    'electivesurgery': 'Elective Surgery',
    'pao2fio2_vent_min': 'Min P/F Ratio',
    'admissiontype': 'Admission Type',
    
    'pao2_max': 'Max PaO2',
    'pao2_min': 'Min PaO2',
    
    'paco2_max': 'Max PaCO2',
    'paco2_min': 'Min PaCO2',
    
    'ph_min': 'Min pH',
    'ph_max': 'Max pH',
    
    'aado2_min': 'Min A-aO2',
    'addo2_max': 'Max A-aO2',
}

## group sparsity = 10

In [None]:
from mimic_pipeline.feature import BinBinarizer
import pandas as pd

entire = pd.read_csv("data/MIMIC-WHOLE.csv")
entire = entire.rename(columns=REPLACE_DICT)

X_train, y_train = entire.drop('hospital_expire_flag', axis=1), entire['hospital_expire_flag']
binarizer = BinBinarizer(interval_width=1, whether_interval=False, group_sparsity=True)
X_train, group_idx = binarizer.fit_transform(X_train)
print(X_train.shape)
X_train.head()

In [None]:
pairs = {}
for col in list(X_train.columns):
    if 'Max Bilirubin' in col:
        pairs[col] = (-100, 0)
    elif 'Min GCS' in col:
        pairs[col] = (0, 100)
    elif 'Min SBP' in col:
        pairs[col] = (0, 100)
    # elif 'Max BUN' in col:
    #     pairs[col] = (-100, 0)

In [None]:
pairs

In [None]:
from mimic_pipeline.model import FasterRisk

lb_list, ub_list = FasterRisk.define_bounds(
    X_train,
    feature_bound_pairs=pairs,
    lb_else=-100,
    ub_else=100,
)
# fasterrisk_10_fix = FasterRisk(gap_tolerance=0.3, group_sparsity=10, k=34, lb=lb_list, select_top_m=1, ub=ub_list, featureIndex_to_groupIndex=group_idx)
fasterrisk_10_fix = FasterRisk(gap_tolerance=0.3, k=34, lb=-100, select_top_m=1, ub=100)
fasterrisk_10_fix.fit(X_train, y_train)

In [None]:
fasterrisk_10_fix.print_risk_card(list(X_train.columns), X_train, y_train)

In [None]:
fasterrisk_10_fix.visualize_risk_card(list(X_train.columns), X_train)

In [None]:
import pandas as pd
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, roc_curve
import joblib
test = pd.read_csv("data/eICU-union.csv")
X_test, y_test = test.drop(['uniquepid', 'patientunitstayid', 'oasis_prob', 'apache_iv_prob', 'apache_iva_prob', 'sapsii_prob', 'hospital_expire_flag'], axis=1), test['hospital_expire_flag']
X_test = X_test.rename(columns=REPLACE_DICT)
X_test, _ = binarizer.transform(X_test)
y_prob = fasterrisk_10_fix.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_prob)
print(f"AUC: {auc(fpr, tpr)}")
precision, recall, _ = precision_recall_curve(y_test, y_prob)
stats = {"precision": precision, "recall": recall, "auroc": auc(fpr, tpr), "auprc": auc(recall, precision), "fpr": fpr, "tpr": tpr}
joblib.dump(stats, "results/fasterrisk/fasterrisk-10-ood-stats-corrected")
print(f"AUPRC: {auc(recall, precision)}")

## group sparsity = 15

In [None]:
import joblib
from mimic_pipeline.feature import BinBinarizer
import pandas as pd
fasterrisk_15 = joblib.load("models/fasterrisk/fasterrisk-15")
entire = pd.read_csv("data/MIMIC-WHOLE.csv")
entire = entire.rename(columns=REPLACE_DICT)

X_train, y_train = entire.drop('hospital_expire_flag', axis=1), entire['hospital_expire_flag']
binarizer = BinBinarizer(interval_width=1, whether_interval=False, group_sparsity=True)
X_train, group_idx = binarizer.fit_transform(X_train)
X_train.head()

In [None]:
pairs = {}
for col in list(X_train.columns):
    if 'Max Bilirubin' in col:
        pairs[col] = (-100, 0)
    elif 'Min GCS' in col:
        pairs[col] = (0, 100)
    elif 'Min SBP' in col:
        pairs[col] = (0, 100)
    elif 'Max BUN' in col:
        pairs[col] = (-100, 0)

In [None]:
from mimic_pipeline.model import FasterRisk

lb_list, ub_list = FasterRisk.define_bounds(
    X_train,
    feature_bound_pairs=pairs,
    lb_else=-50,
    ub_else=70
)
# fasterrisk_15_fix = FasterRisk(gap_tolerance=0.3, group_sparsity=15, k=50, lb=lb_list, select_top_m=1, ub=ub_list, featureIndex_to_groupIndex=group_idx)
fasterrisk_15_fix = FasterRisk(gap_tolerance=0.3, k=50, lb=-50, select_top_m=1, ub=70)
fasterrisk_15_fix.fit(X_train, y_train)

In [None]:
fasterrisk_15_fix.print_risk_card(list(X_train.columns), X_train)

In [None]:
fasterrisk_15_fix.visualize_risk_card(list(X_train.columns), X_train)

In [None]:
fasterrisk_15_fix.print_risk_card(list(X_train.columns), X_train.to_numpy(), y_train.to_numpy())

In [None]:
import pandas as pd
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, roc_curve
test = pd.read_csv("data/eICU-union.csv")
X_test, y_test = test.drop(['uniquepid', 'patientunitstayid', 'oasis_prob', 'apache_iv_prob', 'apache_iva_prob', 'sapsii_prob', 'hospital_expire_flag'], axis=1), test['hospital_expire_flag']
X_test = X_test.rename(columns=REPLACE_DICT)
X_test, _ = binarizer.transform(X_test)
y_prob = fasterrisk_15_fix.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_prob)
print(f"AUC: {auc(fpr, tpr)}")
precision, recall, _ = precision_recall_curve(y_test, y_prob)
stats = {"precision": precision, "recall": recall, "auroc": auc(fpr, tpr), "auprc": auc(recall, precision), "fpr": fpr, "tpr": tpr}
joblib.dump(stats, "results/fasterrisk/fasterrisk-15-ood-stats-corrected")
print(f"AUPRC: {auc(recall, precision)}")

## group sparsity = 40

perform monotonic correction on
* Max Sodium -- increasing
* Max Bilirubin -- increasing
* Min Respiratory Rate -- increasing 
* Min Bicarbonate -- increasing

In [None]:
from mimic_pipeline.feature import BinBinarizer
import pandas as pd

entire = pd.read_csv("data/MIMIC-WHOLE.csv")
entire = entire.rename(columns=REPLACE_DICT)

X_train, y_train = entire.drop('hospital_expire_flag', axis=1), entire['hospital_expire_flag']
binarizer = BinBinarizer(interval_width=1, whether_interval=False, group_sparsity=True)
X_train, group_idx = binarizer.fit_transform(X_train)
print(X_train.shape)
X_train.head()

In [None]:
pairs = {}
for col in list(X_train.columns):
    if 'Max Bilirubin' in col:
        pairs[col] = (-100, 0)
    elif 'Max Sodium' in col:
        pairs[col] = (-100, 0)
    elif 'Min Respiratory Rate' in col:
        pairs[col] = (-100, 0)
    elif 'Min Bicarbonate' in col:
        pairs[col] = (-100, 0)

In [None]:
from mimic_pipeline.model import FasterRisk

lb_list, ub_list = FasterRisk.define_bounds(
    X_train,
    feature_bound_pairs=pairs,
    lb_else=-100,
    ub_else=100,
)
# fasterrisk_40_fix = FasterRisk(gap_tolerance=0.3, group_sparsity=40, k=80, lb=-70, select_top_m=1, ub=50, featureIndex_to_groupIndex=group_idx)
# fasterrisk_40_fix = FasterRisk(gap_tolerance=0.3, group_sparsity=40, k=80, lb=lb_list, select_top_m=1, ub=ub_list, featureIndex_to_groupIndex=group_idx)
fasterrisk_40_fix = FasterRisk(gap_tolerance=0.3, k=80, lb=-100, select_top_m=1, ub=100)
fasterrisk_40_fix.fit(X_train, y_train)

In [None]:
fasterrisk_40_fix.print_risk_card(list(X_train.columns), X_train, y_train)

In [None]:
fasterrisk_40_fix.visualize_risk_card(list(X_train.columns), X_train)

In [None]:
import pandas as pd
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, roc_curve
test = pd.read_csv("data/eICU-union.csv")
X_test, y_test = test.drop(['uniquepid', 'patientunitstayid', 'oasis_prob', 'apache_iv_prob', 'apache_iva_prob', 'sapsii_prob', 'hospital_expire_flag'], axis=1), test['hospital_expire_flag']
X_test = X_test.rename(columns=REPLACE_DICT)
X_test, _ = binarizer.transform(X_test)
y_prob = fasterrisk_40_fix.predict_proba(X_test)
fpr, tpr, _ = roc_curve(y_test, y_prob)
print(f"AUC: {auc(fpr, tpr)}")
precision, recall, _ = precision_recall_curve(y_test, y_prob)
stats = {"precision": precision, "recall": recall, "auroc": auc(fpr, tpr), "auprc": auc(recall, precision), "fpr": fpr, "tpr": tpr}
joblib.dump(stats, "results/fasterrisk/fasterrisk-40-ood-stats-corrected")
print(f"AUPRC: {auc(recall, precision)}")