## Import Libs

In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import random
import pickle
import joblib
from tqdm import tqdm
from sklearn.model_selection import GridSearchCV
from plot_metric.functions import BinaryClassification
from sklearn import linear_model, preprocessing
from sklearn.metrics import precision_recall_curve, auc, roc_curve, classification_report
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score


In [2]:
# check the system font
import matplotlib.font_manager as font_manager
font_manager.findSystemFonts(fontpaths=None, fontext='ttf')

# add the font wanted
font_dir = ['../../Latin-Modern-Roman']
for font in font_manager.findSystemFonts(font_dir):
    font_manager.fontManager.addfont(font)

# Set font family globally
plt.rcParams['font.family'] = 'Latin Modern Roman'
print(plt.rcParams['font.family'])

['Latin Modern Roman']


## Feature Selection

### Baseline Data

In [5]:
X_feature_baseline = ['HR', 'SaO2', 'Temp', 'SBP', 'MAP', 'DBP', 'RR', 'BaseExcess', 'HCO3',
       'PH', 'BUN', 'Calcium', 'Chloride', 'Creatinine', 'Glucose', 'Lactic',
       'Magnesium', 'Potassium', 'PTT', 'WBC', 'Platelet', 'age', 'gender']

y_feature = 'sepsis'

In [6]:
max_features_baseline = len(X_feature_baseline)
print('There are ',len(X_feature_baseline), 'Features inside baseline data with subject_id and sepsis excluded')

There are  23 Features inside baseline data with subject_id and sepsis excluded


In [5]:
# In Random Forest model, the data split in 5-fold crossvalidation is stored,
# It can be directly applied here

for k in tqdm(range(5)):

    print('---Fold{}/5---'.format(k+1))

    # load the train and val data
    filename = '../data/data_both/kFold_baseline/fold{}/train.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        train_baseline = pickle.load(f)

    filename = '../data/data_both/kFold_baseline/fold{}/val.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        val_baseline = pickle.load(f)
    
    X_train_baseline = train_baseline[X_feature_baseline]
    y_train_baseline = train_baseline[y_feature]

    X_val_baseline = val_baseline[X_feature_baseline]
    y_val_baseline = val_baseline[y_feature]

    #Standardize the dataset
    scaler = preprocessing.StandardScaler()
    # fit and transform data
    scaler.fit(X_train_baseline)
    X_train_baseline = scaler.transform(X_train_baseline)
    X_val_baseline = scaler.transform(X_val_baseline)

    # train a basic LR model
    LR_baseline = linear_model.LogisticRegression(C=10, solver='saga', max_iter=1000)
    LR_baseline.fit(X_train_baseline, y_train_baseline)

    # validation of the model
    yhat_baseline = LR_baseline.predict(X_val_baseline)
    acc_baseline = np.mean(yhat_baseline == y_val_baseline)
    print('Baseline Dataset {}: Accuracy on the training data is {}'.format(k+1, acc_baseline))

    # plot graph of feature importances for better visualization
    feat_importances = LR_baseline.coef_.ravel()
    feat_importances = pd.Series(feat_importances, index=X_feature_baseline)
    feat_importances.nlargest(len(X_feature_baseline)).plot(kind='barh')
    plt.savefig('./figs/feature_importance_baseline_{}.pdf'.format(k+1))
    plt.clf()


    # Interprete the Weight Vector for features
    W_baseline = LR_baseline.coef_.ravel()
    ind = np.argsort(np.abs(W_baseline))
    for j in range(1,len(ind)):
        i = ind[-j]
        name = X_feature_baseline[i]
        print('The {0:d} most significant feature is {1:s}'.format(j, name))

  0%|          | 0/5 [00:00<?, ?it/s]

---Fold1/5---
Baseline Dataset 1: Accuracy on the training data is 0.6074441788727503


 20%|██        | 1/5 [00:17<01:11, 17.82s/it]

The 1 most significant feature is HR
The 2 most significant feature is SaO2
The 3 most significant feature is PH
The 4 most significant feature is DBP
The 5 most significant feature is RR
The 6 most significant feature is WBC
The 7 most significant feature is BUN
The 8 most significant feature is Glucose
The 9 most significant feature is MAP
The 10 most significant feature is HCO3
The 11 most significant feature is gender
The 12 most significant feature is Creatinine
The 13 most significant feature is Chloride
The 14 most significant feature is age
The 15 most significant feature is Magnesium
The 16 most significant feature is SBP
The 17 most significant feature is Lactic
The 18 most significant feature is Platelet
The 19 most significant feature is PTT
The 20 most significant feature is Potassium
The 21 most significant feature is BaseExcess
The 22 most significant feature is Calcium
---Fold2/5---
Baseline Dataset 2: Accuracy on the training data is 0.5842594646927126


 40%|████      | 2/5 [00:33<00:50, 16.78s/it]

The 1 most significant feature is HR
The 2 most significant feature is SaO2
The 3 most significant feature is PH
The 4 most significant feature is DBP
The 5 most significant feature is MAP
The 6 most significant feature is RR
The 7 most significant feature is Glucose
The 8 most significant feature is WBC
The 9 most significant feature is BUN
The 10 most significant feature is SBP
The 11 most significant feature is Creatinine
The 12 most significant feature is PTT
The 13 most significant feature is gender
The 14 most significant feature is Lactic
The 15 most significant feature is age
The 16 most significant feature is Chloride
The 17 most significant feature is Potassium
The 18 most significant feature is BaseExcess
The 19 most significant feature is Platelet
The 20 most significant feature is Magnesium
The 21 most significant feature is HCO3
The 22 most significant feature is Temp
---Fold3/5---
Baseline Dataset 3: Accuracy on the training data is 0.59864592394786


 60%|██████    | 3/5 [00:49<00:32, 16.31s/it]

The 1 most significant feature is HR
The 2 most significant feature is SaO2
The 3 most significant feature is PH
The 4 most significant feature is DBP
The 5 most significant feature is RR
The 6 most significant feature is BUN
The 7 most significant feature is MAP
The 8 most significant feature is Glucose
The 9 most significant feature is WBC
The 10 most significant feature is Magnesium
The 11 most significant feature is Chloride
The 12 most significant feature is Creatinine
The 13 most significant feature is HCO3
The 14 most significant feature is PTT
The 15 most significant feature is SBP
The 16 most significant feature is gender
The 17 most significant feature is age
The 18 most significant feature is Platelet
The 19 most significant feature is Lactic
The 20 most significant feature is Temp
The 21 most significant feature is Potassium
The 22 most significant feature is BaseExcess
---Fold4/5---
Baseline Dataset 4: Accuracy on the training data is 0.6030393457862084


 80%|████████  | 4/5 [01:02<00:15, 15.05s/it]

The 1 most significant feature is HR
The 2 most significant feature is SaO2
The 3 most significant feature is PH
The 4 most significant feature is DBP
The 5 most significant feature is MAP
The 6 most significant feature is RR
The 7 most significant feature is BUN
The 8 most significant feature is WBC
The 9 most significant feature is Glucose
The 10 most significant feature is SBP
The 11 most significant feature is Chloride
The 12 most significant feature is gender
The 13 most significant feature is PTT
The 14 most significant feature is Magnesium
The 15 most significant feature is HCO3
The 16 most significant feature is Creatinine
The 17 most significant feature is Lactic
The 18 most significant feature is Temp
The 19 most significant feature is BaseExcess
The 20 most significant feature is Calcium
The 21 most significant feature is Potassium
The 22 most significant feature is Platelet
---Fold5/5---
Baseline Dataset 5: Accuracy on the training data is 0.5916755773100705


100%|██████████| 5/5 [01:15<00:00, 15.20s/it]

The 1 most significant feature is HR
The 2 most significant feature is SaO2
The 3 most significant feature is DBP
The 4 most significant feature is PH
The 5 most significant feature is RR
The 6 most significant feature is WBC
The 7 most significant feature is MAP
The 8 most significant feature is Glucose
The 9 most significant feature is BUN
The 10 most significant feature is Chloride
The 11 most significant feature is HCO3
The 12 most significant feature is gender
The 13 most significant feature is SBP
The 14 most significant feature is PTT
The 15 most significant feature is Lactic
The 16 most significant feature is age
The 17 most significant feature is Creatinine
The 18 most significant feature is Platelet
The 19 most significant feature is Potassium
The 20 most significant feature is Temp
The 21 most significant feature is Magnesium
The 22 most significant feature is BaseExcess





<Figure size 432x288 with 0 Axes>

Results:

In [7]:
# 9 most important features + 2 extra vital sign features
X_feature_baseline_fs = ['HR', 'SaO2', 'PH', 'DBP', 'RR', 'BUN', 'MAP', 'Glucose', 'WBC', 'Temp', 'SBP']

y_feature = 'sepsis'

### Engineered Data

In [8]:


X_feature_engineered = ['HR', 'SaO2', 'Temp', 'SBP', 'MAP', 'DBP', 'RR', 'BaseExcess', 'HCO3',
       'PH', 'BUN', 'Calcium', 'Chloride', 'Creatinine', 'Glucose', 'Lactic',
       'Magnesium', 'Potassium', 'PTT', 'WBC', 'Platelet', 'age', 'gender',
       'HR_dev_1', 'HR_dev_2', 'HR_dev_3', 'RR_dev_1',
       'RR_dev_2', 'RR_dev_3', 'Temp_dev_1', 'Temp_dev_2', 'Temp_dev_3',
       'Bradycardia', 'Tachycardia', 'Hypothermia', 'Fever', 'Hyperpyrexia']

y_feature = 'sepsis'
# omit the patient_id here

In [7]:
max_features_eng = len(X_feature_engineered)
print('There are ',len(X_feature_engineered), 'Features inside engineered data with subject_id and sepsis excluded')

There are  37 Features inside engineered data with subject_id and sepsis excluded


In [9]:
# In Random Forest model, the data split in 5-fold crossvalidation is stored,
# It can be directly applied here

for k in tqdm(range(5)):

    print('---Fold{}/5---'.format(k+1))

    # load the train and val data
    filename = '../data/data_both/kFold_engineered/fold{}/train.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        train_engineered = pickle.load(f)

    filename = '../data/data_both/kFold_engineered/fold{}/val.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        val_engineered = pickle.load(f)
    
    X_train_engineered = train_engineered[X_feature_engineered]
    y_train_engineered = train_engineered[y_feature]

    X_val_engineered = val_engineered[X_feature_engineered]
    y_val_engineered = val_engineered[y_feature]

    #Standardize the dataset
    scaler = preprocessing.StandardScaler()
    # fit and transform data
    scaler.fit(X_train_engineered)
    X_train_engineered = scaler.transform(X_train_engineered)
    X_val_engineered = scaler.transform(X_val_engineered)

    # train a basic LR model
    LR_engineered = linear_model.LogisticRegression(C=10, solver='saga', max_iter=1000)
    LR_engineered.fit(X_train_engineered, y_train_engineered)

    # validation of the model
    yhat_engineered = LR_engineered.predict(X_val_engineered)
    acc_engineered = np.mean(yhat_engineered == y_val_engineered)
    print('engineered Dataset {}: Accuracy on the training data is {}'.format(k+1, acc_engineered))

    # plot graph of feature importances for better visualization
    feat_importances = LR_engineered.coef_.ravel()
    feat_importances = pd.Series(feat_importances, index=X_feature_engineered)
    feat_importances.nlargest(len(X_feature_engineered)).plot(kind='barh')
    plt.savefig('./figs/feature_importance_engineered_{}.pdf'.format(k+1))
    plt.clf()


    # Interprete the Weight Vector for features
    W_engineered = LR_engineered.coef_.ravel()
    ind = np.argsort(np.abs(W_engineered))
    for j in range(1,len(ind)):
        i = ind[-j]
        name = X_feature_engineered[i]
        print('The {0:d} most significant feature is {1:s}'.format(j, name))

  0%|          | 0/5 [00:00<?, ?it/s]

---Fold1/5---
engineered Dataset 1: Accuracy on the training data is 0.7300021109544919


 20%|██        | 1/5 [01:19<05:17, 79.30s/it]

The 1 most significant feature is HR_dev_3
The 2 most significant feature is Temp
The 3 most significant feature is HR_dev_2
The 4 most significant feature is Hyperpyrexia
The 5 most significant feature is HR_dev_1
The 6 most significant feature is Temp_dev_3
The 7 most significant feature is Temp_dev_2
The 8 most significant feature is Temp_dev_1
The 9 most significant feature is Hypothermia
The 10 most significant feature is Fever
The 11 most significant feature is SaO2
The 12 most significant feature is HR
The 13 most significant feature is PH
The 14 most significant feature is RR
The 15 most significant feature is BUN
The 16 most significant feature is DBP
The 17 most significant feature is RR_dev_1
The 18 most significant feature is RR_dev_2
The 19 most significant feature is RR_dev_3
The 20 most significant feature is Glucose
The 21 most significant feature is SBP
The 22 most significant feature is WBC
The 23 most significant feature is MAP
The 24 most significant feature is PTT


 40%|████      | 2/5 [02:50<04:19, 86.44s/it]

The 1 most significant feature is HR_dev_3
The 2 most significant feature is Temp
The 3 most significant feature is Hyperpyrexia
The 4 most significant feature is HR_dev_2
The 5 most significant feature is HR_dev_1
The 6 most significant feature is Hypothermia
The 7 most significant feature is Temp_dev_3
The 8 most significant feature is Temp_dev_2
The 9 most significant feature is Temp_dev_1
The 10 most significant feature is Fever
The 11 most significant feature is SaO2
The 12 most significant feature is HR
The 13 most significant feature is PH
The 14 most significant feature is RR
The 15 most significant feature is DBP
The 16 most significant feature is BUN
The 17 most significant feature is SBP
The 18 most significant feature is MAP
The 19 most significant feature is PTT
The 20 most significant feature is RR_dev_2
The 21 most significant feature is RR_dev_1
The 22 most significant feature is RR_dev_3
The 23 most significant feature is Lactic
The 24 most significant feature is Gluco

 60%|██████    | 3/5 [03:51<02:29, 74.69s/it]

The 1 most significant feature is HR_dev_3
The 2 most significant feature is Temp
The 3 most significant feature is Hyperpyrexia
The 4 most significant feature is HR_dev_2
The 5 most significant feature is HR_dev_1
The 6 most significant feature is Hypothermia
The 7 most significant feature is Temp_dev_3
The 8 most significant feature is Temp_dev_2
The 9 most significant feature is Temp_dev_1
The 10 most significant feature is Fever
The 11 most significant feature is SaO2
The 12 most significant feature is HR
The 13 most significant feature is PH
The 14 most significant feature is RR
The 15 most significant feature is BUN
The 16 most significant feature is DBP
The 17 most significant feature is Magnesium
The 18 most significant feature is RR_dev_1
The 19 most significant feature is RR_dev_2
The 20 most significant feature is RR_dev_3
The 21 most significant feature is SBP
The 22 most significant feature is MAP
The 23 most significant feature is PTT
The 24 most significant feature is Pl

 80%|████████  | 4/5 [05:19<01:20, 80.09s/it]

The 1 most significant feature is Temp
The 2 most significant feature is Hyperpyrexia
The 3 most significant feature is HR_dev_3
The 4 most significant feature is HR_dev_2
The 5 most significant feature is Hypothermia
The 6 most significant feature is HR_dev_1
The 7 most significant feature is Temp_dev_3
The 8 most significant feature is Temp_dev_2
The 9 most significant feature is Temp_dev_1
The 10 most significant feature is Fever
The 11 most significant feature is HR
The 12 most significant feature is SaO2
The 13 most significant feature is PH
The 14 most significant feature is RR
The 15 most significant feature is DBP
The 16 most significant feature is BUN
The 17 most significant feature is MAP
The 18 most significant feature is SBP
The 19 most significant feature is PTT
The 20 most significant feature is RR_dev_2
The 21 most significant feature is RR_dev_1
The 22 most significant feature is RR_dev_3
The 23 most significant feature is WBC
The 24 most significant feature is Magnesiu

100%|██████████| 5/5 [06:19<00:00, 75.89s/it]

The 1 most significant feature is HR_dev_3
The 2 most significant feature is Temp
The 3 most significant feature is Hyperpyrexia
The 4 most significant feature is HR_dev_2
The 5 most significant feature is HR_dev_1
The 6 most significant feature is Hypothermia
The 7 most significant feature is Temp_dev_3
The 8 most significant feature is Temp_dev_2
The 9 most significant feature is Temp_dev_1
The 10 most significant feature is Fever
The 11 most significant feature is SaO2
The 12 most significant feature is HR
The 13 most significant feature is PH
The 14 most significant feature is RR
The 15 most significant feature is DBP
The 16 most significant feature is BUN
The 17 most significant feature is MAP
The 18 most significant feature is WBC
The 19 most significant feature is Glucose
The 20 most significant feature is PTT
The 21 most significant feature is SBP
The 22 most significant feature is RR_dev_2
The 23 most significant feature is RR_dev_1
The 24 most significant feature is RR_dev_3





<Figure size 432x288 with 0 Axes>

Results:

In [9]:
# 18 most important features + 1 extra vital sign features
X_feature_engineered_fs = ['HR_dev_3', 'Temp', 'Hyperpyrexia', 'HR_dev_2', 'HR_dev_1', 'Hypothermia', 'Temp_dev_3',
                            'Temp_dev_2', 'Temp_dev_1', 'Fever', 'SaO2', 'HR', 'PH', 'RR', 'DBP', 'BUN', 'MAP', 'WBC', 'SBP']

y_feature = 'sepsis'

## Hyper-parameter Tuning

### Baseline Data without Feature Selection

In [None]:
# In Random Forest model, the data split in 5-fold crossvalidation is stored,
# It can be directly applied here

for k in tqdm(range(5)):

    print('---Fold{}/5---'.format(k+1))

    # load the train and val data
    filename = '../data/data_both/kFold_baseline/fold{}/train.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        train_baseline = pickle.load(f)

    filename = '../data/data_both/kFold_baseline/fold{}/val.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        val_baseline = pickle.load(f)
    
    X_train_baseline = train_baseline[X_feature_baseline]
    y_train_baseline = train_baseline[y_feature]

    X_val_baseline = val_baseline[X_feature_baseline]
    y_val_baseline = val_baseline[y_feature]

    #Standardize the dataset
    scaler = preprocessing.StandardScaler()
    # fit and transform data
    scaler.fit(X_train_baseline)
    X_train_baseline = scaler.transform(X_train_baseline)
    X_val_baseline = scaler.transform(X_val_baseline)

    # create a logistic regression model
    model_baseline = linear_model.LogisticRegression(solver='saga', max_iter=1000)

    # create hyperparameter search space
    # create regularization penalty space
    penalty = ['l1', 'l2']
    # create regularization hyperparameter space
    C = np.logspace(-1, 4, 10)  #from 10e-1 to 10e4, evenly pick 10 number
    # create hyperparameter options
    hyperparameters = dict(C=C, penalty=penalty)

    #Grid search for best parameters
    clf = GridSearchCV(model_baseline, hyperparameters, verbose=2, scoring='accuracy')
    best_model = clf.fit(X_train_baseline, y_train_baseline)
    print('Best Parameters: ', best_model.best_params_)

Results:

Best Parameters:  {'C': 0.1, 'penalty': 'l1'}

### Baseline Data with Feature Selection

In [None]:
# In Random Forest model, the data split in 5-fold crossvalidation is stored,
# It can be directly applied here

for k in tqdm(range(5)):

    print('---Fold{}/5---'.format(k+1))

    # load the train and val data
    filename = '../data/data_both/kFold_baseline/fold{}/train.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        train_baseline = pickle.load(f)

    filename = '../data/data_both/kFold_baseline/fold{}/val.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        val_baseline = pickle.load(f)
    
    X_train_baseline = train_baseline[X_feature_baseline_fs]
    y_train_baseline = train_baseline[y_feature]

    X_val_baseline = val_baseline[X_feature_baseline_fs]
    y_val_baseline = val_baseline[y_feature]

    #Standardize the dataset
    scaler = preprocessing.StandardScaler()
    # fit and transform data
    scaler.fit(X_train_baseline)
    X_train_baseline = scaler.transform(X_train_baseline)
    X_val_baseline = scaler.transform(X_val_baseline)

    # create a logistic regression model
    model_baseline = linear_model.LogisticRegression(solver='saga', max_iter=1000)

    # create hyperparameter search space
    # create regularization penalty space
    penalty = ['l1', 'l2']
    # create regularization hyperparameter space
    C = np.logspace(-1, 4, 10)  #from 10e-1 to 10e4, evenly pick 10 number
    # create hyperparameter options
    hyperparameters = dict(C=C, penalty=penalty)

    #Grid search for best parameters
    clf = GridSearchCV(model_baseline, hyperparameters, verbose=2, scoring='accuracy')
    best_model = clf.fit(X_train_baseline, y_train_baseline)
    print('Best Parameters: ', best_model.best_params_)

Results:

Best Parameters:  {'C': 0.1, 'penalty': 'l1'}

### Engineered Data without Feature Engineering

In [None]:
# In Random Forest model, the data split in 5-fold crossvalidation is stored,
# It can be directly applied here

for k in tqdm(range(5)):

    print('---Fold{}/5---'.format(k+1))

    # load the train and val data
    filename = '../data/data_both/kFold_engineered/fold{}/train.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        train_engineered = pickle.load(f)

    filename = '../data/data_both/kFold_engineered/fold{}/val.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        val_engineered = pickle.load(f)
    
    X_train_engineered = train_engineered[X_feature_engineered]
    y_train_engineered = train_engineered[y_feature]

    X_val_engineered = val_engineered[X_feature_engineered]
    y_val_engineered = val_engineered[y_feature]

    #Standardize the dataset
    scaler = preprocessing.StandardScaler()
    # fit and transform data
    scaler.fit(X_train_engineered)
    X_train_engineered = scaler.transform(X_train_engineered)
    X_val_engineered = scaler.transform(X_val_engineered)

    # create a logistic regression model
    model_engineered = linear_model.LogisticRegression(solver='saga', max_iter=1000)

    # create hyperparameter search space
    # create regularization penalty space
    penalty = ['l1', 'l2']
    # create regularization hyperparameter space
    C = np.logspace(-1, 4, 10)  #from 10e-1 to 10e4, evenly pick 10 number
    # create hyperparameter options
    hyperparameters = dict(C=C, penalty=penalty)

    #Grid search for best parameters
    clf = GridSearchCV(model_engineered, hyperparameters, verbose=2, n_jobs = -1, scoring='accuracy')
    best_model = clf.fit(X_train_engineered, y_train_engineered)
    print('Best Parameters: ', best_model.best_params_)

    

Results:

Best Parameters:  {'C': 4.641588833612779, 'penalty': 'l2'}

### Engineered Data with Feature Engineering

In [None]:
# In Random Forest model, the data split in 5-fold crossvalidation is stored,
# It can be directly applied here

for k in tqdm(range(5)):

    print('---Fold{}/5---'.format(k+1))

    # load the train and val data
    filename = '../data/data_both/kFold_engineered/fold{}/train.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        train_engineered = pickle.load(f)

    filename = '../data/data_both/kFold_engineered/fold{}/val.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        val_engineered = pickle.load(f)
    
    X_train_engineered = train_engineered[X_feature_engineered_fs]
    y_train_engineered = train_engineered[y_feature]

    X_val_engineered = val_engineered[X_feature_engineered_fs]
    y_val_engineered = val_engineered[y_feature]

    #Standardize the dataset
    scaler = preprocessing.StandardScaler()
    # fit and transform data
    scaler.fit(X_train_engineered)
    X_train_engineered = scaler.transform(X_train_engineered)
    X_val_engineered = scaler.transform(X_val_engineered)

    # create a logistic regression model
    model_engineered = linear_model.LogisticRegression(solver='saga', max_iter=1000)

    # create hyperparameter search space
    # create regularization penalty space
    penalty = ['l1', 'l2']
    # create regularization hyperparameter space
    C = np.logspace(-1, 4, 10)
    # create hyperparameter options
    hyperparameters = dict(C=C, penalty=penalty)

    #Grid search for best parameters
    clf = GridSearchCV(model_engineered, hyperparameters, verbose=2, n_jobs = -1, scoring='accuracy')
    best_model = clf.fit(X_train_engineered, y_train_engineered)
    print('Best Parameters: ', best_model.best_params_)

Results:

Best Parameters:  {'C': 0.35938136638046275, 'penalty': 'l2'}

## Re-train Model with Optimized Params

In [16]:
!mkdir -p ./trained_models/BDWOFS
!mkdir -p ./trained_models/BDWFS
!mkdir -p ./trained_models/EDWOFS
!mkdir -p ./trained_models/EDWFS

### Baseline Data without Feature Selection

In [17]:
f1_list = []
accuracy_list = []
auprc_list = []
auroc_list = []

for k in tqdm(range(5)):

    print('---Fold{}/5---'.format(k+1))

    # load the train and val data
    filename = '../data/data_both/kFold_baseline/fold{}/train.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        train_baseline = pickle.load(f)

    filename = '../data/data_both/kFold_baseline/fold{}/val.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        val_baseline = pickle.load(f)
    
    X_train_baseline = train_baseline[X_feature_baseline]
    y_train_baseline = train_baseline[y_feature]

    X_val_baseline = val_baseline[X_feature_baseline]
    y_val_baseline = val_baseline[y_feature]

    #Standardize the dataset
    scaler = preprocessing.StandardScaler()
    # fit and transform data
    scaler.fit(X_train_baseline)
    X_train_baseline = scaler.transform(X_train_baseline)
    X_val_baseline = scaler.transform(X_val_baseline)

    # train the model using the optimized parameters
    model_LR = linear_model.LogisticRegression(penalty='l1', C=0.1,  solver='saga', max_iter=1000) # optimized parameters
    model_LR.fit(X_train_baseline, y_train_baseline)
    joblib.dump(model_LR, "./trained_models/BDWOFS/model{}.joblib".format(k+1))

    # validation results
    y_pred_class = model_LR.predict(X_val_baseline)
    y_pred_proba = model_LR.predict_proba(X_val_baseline)[::,1]
    precision_, recall_, thresholds_ = precision_recall_curve(y_val_baseline, y_pred_proba)
    auprc_ = auc(recall_, precision_)
    fpr_, tpr_, thresholds_ = roc_curve(y_val_baseline, y_pred_proba)
    auroc_ = auc(fpr_, tpr_)
    f1_ = f1_score(y_val_baseline,y_pred_class)
    accuracy_ = accuracy_score(y_val_baseline,y_pred_class)

    print('Classification Report:\n', classification_report(y_val_baseline, y_pred_class))
    print('F1 score:', f1_)
    print('Accuracy:', accuracy_)
    print('AUPRC:', auprc_)
    print('AUROC:', auroc_)
    print('AUROC threshold:', thresholds_)


    f1_list.append(f1_)
    accuracy_list.append(accuracy_)
    auprc_list.append(auprc_)
    auroc_list.append(auroc_)

    # Plot ROC,PRC and confusion matrix
    bc = BinaryClassification(y_val_baseline,y_pred_proba, labels=['Non-sepsis', 'Sepsis'])
    plt.figure(figsize=(20,20))
    plt.title('Validation Results for LR using baseline data without feature selection')
    plt.subplot2grid(shape=(2,4), loc=(0,0), colspan=2)
    bc.plot_roc_curve()
    plt.subplot2grid((2,4), (0,2), colspan=2)
    bc.plot_precision_recall_curve()
    plt.subplot2grid((2,4), (1,0), colspan=2)
    bc.plot_confusion_matrix()
    plt.subplot2grid((2,4), (1,2), colspan=2)
    bc.plot_confusion_matrix(normalize=True)
    plt.savefig('./figs/val_wo_fs_baseline_{}.pdf'.format(k+1))
    plt.clf()

def Average(lst):
    return sum(lst) / len(lst)

print('Averaged trauining results:\n F1_score:{}\n Accuracy:{}\n AUROC:{}\n AUPRC:{}\n'.format(Average(f1_list),Average(accuracy_list), Average(auroc_list), Average(auprc_list)))




  0%|          | 0/5 [00:00<?, ?it/s]

---Fold1/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.56      0.59     72765
           1       0.60      0.65      0.63     74088

    accuracy                           0.61    146853
   macro avg       0.61      0.61      0.61    146853
weighted avg       0.61      0.61      0.61    146853

F1 score: 0.6252623780713669
Accuracy: 0.6073284168522264
AUPRC: 0.6282841201768549
AUROC: 0.6466492202990883
AUROC threshold: [1.97742877 0.97742877 0.97258831 ... 0.08067428 0.07511855 0.07505978]


 20%|██        | 1/5 [00:17<01:11, 17.80s/it]

---Fold2/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.59      0.55      0.57     74649
           1       0.58      0.62      0.60     74088

    accuracy                           0.58    148737
   macro avg       0.58      0.58      0.58    148737
weighted avg       0.58      0.58      0.58    148737

F1 score: 0.5958681249754988
Accuracy: 0.5841384457129026
AUPRC: 0.6018530705842269
AUROC: 0.6270035549114701
AUROC threshold: [1.91742107 0.91742107 0.90623865 ... 0.09000774 0.08983486 0.07970339]


 40%|████      | 2/5 [00:37<00:56, 18.95s/it]

---Fold3/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.58      0.59     74049
           1       0.60      0.62      0.61     74244

    accuracy                           0.60    148293
   macro avg       0.60      0.60      0.60    148293
weighted avg       0.60      0.60      0.60    148293

F1 score: 0.6059282495185531
Accuracy: 0.5984503651554692
AUPRC: 0.6215604827106587
AUROC: 0.6451291433072998
AUROC threshold: [1.90733583 0.90733583 0.88964281 ... 0.07921267 0.07796248 0.06454534]


 60%|██████    | 3/5 [00:53<00:34, 17.35s/it]

---Fold4/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.56      0.59     74353
           1       0.59      0.65      0.62     74100

    accuracy                           0.60    148453
   macro avg       0.60      0.60      0.60    148453
weighted avg       0.60      0.60      0.60    148453

F1 score: 0.6191707165753283
Accuracy: 0.6030528180636295
AUPRC: 0.6212040157806212
AUROC: 0.6445474680878625
AUROC threshold: [1.91958633 0.91958633 0.91590903 ... 0.11125605 0.09620058 0.07449764]


 80%|████████  | 4/5 [01:10<00:17, 17.25s/it]

---Fold5/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.56      0.58     76409
           1       0.58      0.62      0.60     74160

    accuracy                           0.59    150569
   macro avg       0.59      0.59      0.59    150569
weighted avg       0.59      0.59      0.59    150569

F1 score: 0.5995583435930508
Accuracy: 0.5917287090968261
AUPRC: 0.6039914238437414
AUROC: 0.6371601574324447
AUROC threshold: [1.97359393 0.97359393 0.97293049 ... 0.10679033 0.10314931 0.07480676]


100%|██████████| 5/5 [01:25<00:00, 17.04s/it]

Averaged trauining results:
 F1_score:0.6091575625467596
 Accuracy:0.5969397509762108
 AUROC:0.6400979088076331
 AUPRC:0.6153786226192206






<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

### Baseline Data with Feature Selection

In [18]:
f1_list = []
accuracy_list = []
auprc_list = []
auroc_list = []

for k in tqdm(range(5)):

    print('---Fold{}/5---'.format(k+1))

    # load the train and val data
    filename = '../data/data_both/kFold_baseline/fold{}/train.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        train_baseline = pickle.load(f)

    filename = '../data/data_both/kFold_baseline/fold{}/val.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        val_baseline = pickle.load(f)
    
    X_train_baseline = train_baseline[X_feature_baseline_fs]
    y_train_baseline = train_baseline[y_feature]

    X_val_baseline = val_baseline[X_feature_baseline_fs]
    y_val_baseline = val_baseline[y_feature]

    #Standardize the dataset
    scaler = preprocessing.StandardScaler()
    # fit and transform data
    scaler.fit(X_train_baseline)
    X_train_baseline = scaler.transform(X_train_baseline)
    X_val_baseline = scaler.transform(X_val_baseline)

    # train the model using the optimized parameters
    model_LR = linear_model.LogisticRegression(penalty='l1', C=0.1,  solver='saga', max_iter=1000) # optimized parameters
    model_LR.fit(X_train_baseline, y_train_baseline)
    joblib.dump(model_LR, "./trained_models/BDWFS/model{}.joblib".format(k+1))

    # validation results
    y_pred_class = model_LR.predict(X_val_baseline)
    y_pred_proba = model_LR.predict_proba(X_val_baseline)[::,1]
    precision_, recall_, thresholds_ = precision_recall_curve(y_val_baseline, y_pred_proba)
    auprc_ = auc(recall_, precision_)
    fpr_, tpr_, thresholds_ = roc_curve(y_val_baseline, y_pred_proba)
    auroc_ = auc(fpr_, tpr_)
    f1_ = f1_score(y_val_baseline,y_pred_class)
    accuracy_ = accuracy_score(y_val_baseline,y_pred_class)

    print('Classification Report:\n', classification_report(y_val_baseline, y_pred_class))
    print('F1 score:', f1_)
    print('Accuracy:', accuracy_)
    print('AUPRC:', auprc_)
    print('AUROC:', auroc_)
    print('AUROC threshold:', thresholds_)


    f1_list.append(f1_)
    accuracy_list.append(accuracy_)
    auprc_list.append(auprc_)
    auroc_list.append(auroc_)

    # Plot ROC,PRC and confusion matrix
    bc = BinaryClassification(y_val_baseline,y_pred_proba, labels=['Non-sepsis', 'Sepsis'])
    plt.figure(figsize=(20,20))
    plt.subplot2grid(shape=(2,4), loc=(0,0), colspan=2)
    bc.plot_roc_curve()
    plt.subplot2grid((2,4), (0,2), colspan=2)
    bc.plot_precision_recall_curve()
    plt.subplot2grid((2,4), (1,0), colspan=2)
    bc.plot_confusion_matrix()
    plt.subplot2grid((2,4), (1,2), colspan=2)
    bc.plot_confusion_matrix(normalize=True)
    plt.title('Validation Results for LR using baseline data with feature selection')
    plt.savefig('./figs/val_w_fs_baseline_{}.pdf'.format(k+1))
    plt.clf()

def Average(lst):
    return sum(lst) / len(lst)

print('Averaged trauining results:\n F1_score:{}\n Accuracy:{}\n AUROC:{}\n AUPRC:{}\n'.format(Average(f1_list),Average(accuracy_list), Average(auroc_list), Average(auprc_list)))




  0%|          | 0/5 [00:00<?, ?it/s]

---Fold1/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.55      0.58     72765
           1       0.60      0.65      0.62     74088

    accuracy                           0.60    146853
   macro avg       0.60      0.60      0.60    146853
weighted avg       0.60      0.60      0.60    146853

F1 score: 0.6240852575488455
Accuracy: 0.6036785084404132
AUPRC: 0.6291044573430182
AUROC: 0.6452264603197085
AUROC threshold: [1.92295411 0.92295411 0.91545219 ... 0.09132688 0.07664526 0.07651829]


 20%|██        | 1/5 [00:10<00:43, 10.76s/it]

---Fold2/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.59      0.55      0.57     74649
           1       0.57      0.61      0.59     74088

    accuracy                           0.58    148737
   macro avg       0.58      0.58      0.58    148737
weighted avg       0.58      0.58      0.58    148737

F1 score: 0.5938192723953579
Accuracy: 0.5811398643242771
AUPRC: 0.6039768769569549
AUROC: 0.626133384902178
AUROC threshold: [1.91203787 0.91203787 0.89763862 ... 0.11112851 0.10834538 0.0788769 ]


 40%|████      | 2/5 [00:21<00:33, 11.02s/it]

---Fold3/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.57      0.59     74049
           1       0.59      0.63      0.61     74244

    accuracy                           0.60    148293
   macro avg       0.60      0.60      0.60    148293
weighted avg       0.60      0.60      0.60    148293

F1 score: 0.611146751155392
Accuracy: 0.599428159117423
AUPRC: 0.6209295940103094
AUROC: 0.6444836217434581
AUROC threshold: [1.90358228 0.90358228 0.87495849 ... 0.08517733 0.07947758 0.07687931]


 60%|██████    | 3/5 [00:34<00:23, 11.59s/it]

---Fold4/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.62      0.55      0.58     74353
           1       0.59      0.66      0.62     74100

    accuracy                           0.60    148453
   macro avg       0.61      0.60      0.60    148453
weighted avg       0.61      0.60      0.60    148453

F1 score: 0.623735934367855
Accuracy: 0.6042451146154002
AUPRC: 0.6216538004396014
AUROC: 0.6438094773966685
AUROC threshold: [1.91033539 0.91033539 0.90739824 ... 0.11395172 0.09307753 0.08363404]


 80%|████████  | 4/5 [00:46<00:11, 11.77s/it]

---Fold5/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.56      0.58     76409
           1       0.58      0.62      0.60     74160

    accuracy                           0.59    150569
   macro avg       0.59      0.59      0.59    150569
weighted avg       0.59      0.59      0.59    150569

F1 score: 0.6013766924281917
Accuracy: 0.5926983642051152
AUPRC: 0.6089811420729806
AUROC: 0.6375620600955147
AUROC threshold: [1.9782738  0.9782738  0.97772784 ... 0.10322481 0.09946816 0.07871643]


100%|██████████| 5/5 [00:57<00:00, 11.59s/it]

Averaged trauining results:
 F1_score:0.6108327815791285
 Accuracy:0.5962380021405258
 AUROC:0.6394430008915056
 AUPRC:0.6169291741645729






<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

### Engineered Data without Feature Selection

In [19]:
f1_list = []
accuracy_list = []
auprc_list = []
auroc_list = []

for k in tqdm(range(5)):

    print('---Fold{}/5---'.format(k+1))

    # load the train and val data
    filename = '../data/data_both/kFold_engineered/fold{}/train.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        train_engineered = pickle.load(f)

    filename = '../data/data_both/kFold_engineered/fold{}/val.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        val_engineered = pickle.load(f)
    
    X_train_engineered = train_engineered[X_feature_engineered]
    y_train_engineered = train_engineered[y_feature]

    X_val_engineered = val_engineered[X_feature_engineered]
    y_val_engineered = val_engineered[y_feature]

    #Standardize the dataset
    scaler = preprocessing.StandardScaler()
    # fit and transform data
    scaler.fit(X_train_engineered)
    X_train_engineered = scaler.transform(X_train_engineered)
    X_val_engineered = scaler.transform(X_val_engineered)

    # train the model using the optimized parameters
    model_LR = linear_model.LogisticRegression(penalty='l2', C=4.642,  solver='saga', max_iter=1000) # optimized parameters
    model_LR.fit(X_train_engineered, y_train_engineered)
    joblib.dump(model_LR, "./trained_models/EDWOFS/model{}.joblib".format(k+1))

    # train results
    y_pred_class = model_LR.predict(X_val_engineered)
    y_pred_proba = model_LR.predict_proba(X_val_engineered)[::,1]
    precision_, recall_, thresholds_ = precision_recall_curve(y_val_engineered, y_pred_proba)
    auprc_ = auc(recall_, precision_)
    fpr_, tpr_, thresholds = roc_curve(y_val_engineered, y_pred_proba)
    auroc_ = auc(fpr_, tpr_)
    f1_ = f1_score(y_val_engineered,y_pred_class)
    accuracy_ = accuracy_score(y_val_engineered,y_pred_class)

    print('Classification Report:\n', classification_report(y_val_engineered, y_pred_class))
    print('F1 score:', f1_)
    print('Accuracy:', accuracy_)
    print('AUPRC:', auprc_)
    print('AUROC:', auroc_)

    f1_list.append(f1_)
    accuracy_list.append(accuracy_)
    auprc_list.append(auprc_)
    auroc_list.append(auroc_)

    # Plot ROC,PRC and confusion matrix
    bc = BinaryClassification(y_val_engineered,y_pred_proba, labels=['Non-sepsis', 'Sepsis'])
    plt.figure(figsize=(20,20))
    plt.subplot2grid(shape=(2,4), loc=(0,0), colspan=2)
    bc.plot_roc_curve()
    plt.subplot2grid((2,4), (0,2), colspan=2)
    bc.plot_precision_recall_curve()
    plt.subplot2grid((2,4), (1,0), colspan=2)
    bc.plot_confusion_matrix()
    plt.subplot2grid((2,4), (1,2), colspan=2)
    bc.plot_confusion_matrix(normalize=True)
    plt.savefig('./figs/val_wo_fs_engineered_{}.pdf'.format(k+1))
    plt.clf()

def Average(lst):
    return sum(lst) / len(lst)

print('Averaged trauining results:\n F1_score:{}\n Accuracy:{}\n AUROC:{}\n AUPRC:{}\n'.format(Average(f1_list),Average(accuracy_list), Average(auroc_list), Average(auprc_list)))




  0%|          | 0/5 [00:00<?, ?it/s]

---Fold1/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.69      0.72     72765
           1       0.72      0.77      0.74     74088

    accuracy                           0.73    146853
   macro avg       0.73      0.73      0.73    146853
weighted avg       0.73      0.73      0.73    146853

F1 score: 0.7423082422367513
Accuracy: 0.7300021109544919
AUPRC: 0.7604508823451716
AUROC: 0.7966710722206118


 20%|██        | 1/5 [01:16<05:04, 76.19s/it]

---Fold2/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.69      0.72     74649
           1       0.71      0.76      0.74     74088

    accuracy                           0.73    148737
   macro avg       0.73      0.73      0.73    148737
weighted avg       0.73      0.73      0.73    148737

F1 score: 0.7362135581313665
Accuracy: 0.7273442384880695
AUPRC: 0.748809780899778
AUROC: 0.7918460533293871


 40%|████      | 2/5 [02:40<04:02, 80.71s/it]

---Fold3/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.72      0.73     74049
           1       0.73      0.75      0.74     74244

    accuracy                           0.73    148293
   macro avg       0.73      0.73      0.73    148293
weighted avg       0.73      0.73      0.73    148293

F1 score: 0.737112855324565
Accuracy: 0.7325834665156143
AUPRC: 0.7585649179716827
AUROC: 0.7953656469414427


 60%|██████    | 3/5 [03:48<02:30, 75.06s/it]

---Fold4/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.69      0.71     74353
           1       0.71      0.76      0.73     74100

    accuracy                           0.72    148453
   macro avg       0.72      0.72      0.72    148453
weighted avg       0.72      0.72      0.72    148453

F1 score: 0.7311788707974294
Accuracy: 0.722188167298741
AUPRC: 0.7674352451497981
AUROC: 0.795848082385857


 80%|████████  | 4/5 [05:24<01:23, 83.27s/it]

---Fold5/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.71      0.73     76409
           1       0.72      0.76      0.74     74160

    accuracy                           0.73    150569
   macro avg       0.74      0.74      0.73    150569
weighted avg       0.74      0.73      0.73    150569

F1 score: 0.7385340931420712
Accuracy: 0.7348856670363754
AUPRC: 0.7534872843549885
AUROC: 0.7969632923331479


100%|██████████| 5/5 [06:32<00:00, 78.58s/it]

Averaged trauining results:
 F1_score:0.7370695239264368
 Accuracy:0.7294007300586585
 AUROC:0.7953388294420892
 AUPRC:0.7577496221442838






<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

### Engineered Data with Feature Selection

In [20]:
f1_list = []
accuracy_list = []
auprc_list = []
auroc_list = []

for k in tqdm(range(5)):

    print('---Fold{}/5---'.format(k+1))

    # load the train and val data
    filename = '../data/data_both/kFold_engineered/fold{}/train.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        train_engineered = pickle.load(f)

    filename = '../data/data_both/kFold_engineered/fold{}/val.pickle'.format(k+1)
    with open(filename, 'rb') as f:
        val_engineered = pickle.load(f)
    
    X_train_engineered = train_engineered[X_feature_engineered_fs]
    y_train_engineered = train_engineered[y_feature]

    X_val_engineered = val_engineered[X_feature_engineered_fs]
    y_val_engineered = val_engineered[y_feature]

    #Standardize the dataset
    scaler = preprocessing.StandardScaler()
    # fit and transform data
    scaler.fit(X_train_engineered)
    X_train_engineered = scaler.transform(X_train_engineered)
    X_val_engineered = scaler.transform(X_val_engineered)

    # train the model using the optimized parameters
    model_LR = linear_model.LogisticRegression(penalty='l2', C=0.359,  solver='saga', max_iter=1000) # optimized parameters
    model_LR.fit(X_train_engineered, y_train_engineered)
    joblib.dump(model_LR, "./trained_models/EDWFS/model{}.joblib".format(k+1))

    # train results
    y_pred_class = model_LR.predict(X_val_engineered)
    y_pred_proba = model_LR.predict_proba(X_val_engineered)[::,1]
    precision_, recall_, thresholds_ = precision_recall_curve(y_val_engineered, y_pred_proba)
    auprc_ = auc(recall_, precision_)
    fpr_, tpr_, thresholds = roc_curve(y_val_engineered, y_pred_proba)
    auroc_ = auc(fpr_, tpr_)
    f1_ = f1_score(y_val_engineered,y_pred_class)
    accuracy_ = accuracy_score(y_val_engineered,y_pred_class)

    print('Classification Report:\n', classification_report(y_val_engineered, y_pred_class))
    print('F1 score:', f1_)
    print('Accuracy:', accuracy_)
    print('AUPRC:', auprc_)
    print('AUROC:', auroc_)

    f1_list.append(f1_)
    accuracy_list.append(accuracy_)
    auprc_list.append(auprc_)
    auroc_list.append(auroc_)

    # Plot ROC,PRC and confusion matrix
    bc = BinaryClassification(y_val_engineered,y_pred_proba, labels=['Non-sepsis', 'Sepsis'])
    plt.figure(figsize=(20,20))
    plt.subplot2grid(shape=(2,4), loc=(0,0), colspan=2)
    bc.plot_roc_curve()
    plt.subplot2grid((2,4), (0,2), colspan=2)
    bc.plot_precision_recall_curve()
    plt.subplot2grid((2,4), (1,0), colspan=2)
    bc.plot_confusion_matrix()
    plt.subplot2grid((2,4), (1,2), colspan=2)
    bc.plot_confusion_matrix(normalize=True)
    plt.savefig('./figs/val_w_fs_engineered_{}.pdf'.format(k+1))
    plt.clf()

def Average(lst):
    return sum(lst) / len(lst)

print('Averaged trauining results:\n F1_score:{}\n Accuracy:{}\n AUROC:{}\n AUPRC:{}\n'.format(Average(f1_list),Average(accuracy_list), Average(auroc_list), Average(auprc_list)))




  0%|          | 0/5 [00:00<?, ?it/s]

---Fold1/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.67      0.71     72765
           1       0.71      0.79      0.75     74088

    accuracy                           0.73    146853
   macro avg       0.73      0.73      0.73    146853
weighted avg       0.73      0.73      0.73    146853

F1 score: 0.7484423427165543
Accuracy: 0.7319428271809224
AUPRC: 0.7506091554115969
AUROC: 0.7940431020860471


 20%|██        | 1/5 [00:40<02:43, 40.82s/it]

---Fold2/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.67      0.70     74649
           1       0.70      0.76      0.73     74088

    accuracy                           0.72    148737
   macro avg       0.72      0.72      0.72    148737
weighted avg       0.72      0.72      0.72    148737

F1 score: 0.728613987799688
Accuracy: 0.7170441786509073
AUPRC: 0.7345815733471153
AUROC: 0.7817493438669931


 40%|████      | 2/5 [01:27<02:12, 44.16s/it]

---Fold3/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.70      0.72     74049
           1       0.72      0.75      0.73     74244

    accuracy                           0.73    148293
   macro avg       0.73      0.73      0.73    148293
weighted avg       0.73      0.73      0.73    148293

F1 score: 0.7330268531854862
Accuracy: 0.7262648944994032
AUPRC: 0.7449627792393205
AUROC: 0.7905162693272336


 60%|██████    | 3/5 [02:08<01:25, 42.86s/it]

---Fold4/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.68      0.71     74353
           1       0.70      0.77      0.73     74100

    accuracy                           0.72    148453
   macro avg       0.72      0.72      0.72    148453
weighted avg       0.72      0.72      0.72    148453

F1 score: 0.7345264054294672
Accuracy: 0.7228078920601133
AUPRC: 0.7535840144088678
AUROC: 0.790079525990228


 80%|████████  | 4/5 [03:11<00:50, 50.90s/it]

---Fold5/5---
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.69      0.72     76409
           1       0.70      0.76      0.73     74160

    accuracy                           0.72    150569
   macro avg       0.72      0.72      0.72    150569
weighted avg       0.72      0.72      0.72    150569

F1 score: 0.7287386215864758
Accuracy: 0.7229177320696824
AUPRC: 0.7423721936921293
AUROC: 0.7900242461144528


100%|██████████| 5/5 [04:08<00:00, 49.76s/it]

Averaged trauining results:
 F1_score:0.7346696421435344
 Accuracy:0.7241955048922057
 AUROC:0.789282497476991
 AUPRC:0.745221943219806






<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

<Figure size 1440x1440 with 0 Axes>

## Performance on Test Data

### Helper Functions

In [10]:
def save_challenge_predictions(file, scores, labels):
    with open(file, 'w') as f:
        f.write('PredictedProbability,PredictedLabel\n')
        for (s, l) in zip(scores, labels):
            f.write('%g,%d\n' % (s, l))

def save_challenge_testlabel(file, labels):
    with open(file, 'w') as f:
        f.write('sepsis\n')
        for l in labels:
            f.write('%d\n' % l)

def load_model_predict(X_test, k_fold, path):
    "ensemble the five LR  models by averaging their output probabilities"
    test_pred = np.zeros((X_test.shape[0], k_fold))
    for k in range(k_fold):
        # load the model
        model_path_name = path + 'model{}.joblib'.format(k+1)
        loaded_model = joblib.load(model_path_name)
        #Standardize the dataset
        scaler = preprocessing.StandardScaler()
        # fit and transform data
        scaler.fit(X_test)
        X_test = scaler.transform(X_test)
        # predict
        y_test_pred = loaded_model.predict_proba(X_test)[::,1]
        test_pred[:, k] = y_test_pred # save prediction results 5 times
    test_pred = pd.DataFrame(test_pred)
    result_pro = test_pred.mean(axis=1)

    return result_pro

def feature_extraction(case, data_features):
    labels = np.array(case['sepsis'])
    features = case[data_features]
    if 'time' in features.columns:
        features = features.drop(columns=['time'],axis = 1)

    return  features, labels    

def predict(data_set,
            data_dir,
            save_prediction_dir,
            save_label_dir,
            model_path,
            risk_threshold,
            data_features
            ):
    for csv in tqdm(data_set):
        csv = csv.replace('psv','csv')
        patient = pd.read_csv(data_dir+csv, sep=',')
        features, labels = feature_extraction(patient, data_features)

        predict_pro = load_model_predict(features, k_fold = 5, path = model_path)
        PredictedProbability = np.array(predict_pro)
        PredictedLabel = [0 if i <= risk_threshold else 1 for i in predict_pro]

        save_prediction_name = save_prediction_dir + csv
        save_challenge_predictions(save_prediction_name, PredictedProbability, PredictedLabel)
        save_testlabel_name = save_label_dir + csv
        save_challenge_testlabel(save_testlabel_name, labels)

### Score Functions

In [11]:
# This file contains functions for evaluating algorithms for the 2019 PhysioNet/
# CinC Challenge. You can run it as follows:

################################################################################

# The evaluate_scores function computes a normalized utility score for a cohort
# of patients along with several traditional scoring metrics.
#
# Inputs:
#   'label_directory' is a directory of pipe-delimited text files containing a
#   binary vector of labels for whether a patient is not septic (0) or septic
#   (1) for each time interval.
#
#   'prediction_directory' is a directory of pipe-delimited text files, where
#   the first column of the file gives the predicted probability that the
#   patient is septic at each time, and the second column of the file is a
#   binarized version of this vector. Note that there must be a prediction for
#   every label.
#
# Outputs:
#   'auroc' is the area under the receiver operating characteristic curve
#   (AUROC).
#
#   'auprc' is the area under the precision recall curve (AUPRC).
#
#   'accuracy' is accuracy.
#
#   'f_measure' is F-measure.
#
#   'normalized_observed_utility' is a normalized utility-based measure that we
#   created for the Challenge. This score is normalized so that a perfect score
#   is 1 and no positive predictions is 0.
#
# Example:
#   Omitted due to length. See the below examples.

import numpy as np, os, os.path, sys, warnings

def evaluate_sepsis_score(label_directory, prediction_directory):
    # Set parameters.
    label_header       = 'sepsis'
    prediction_header  = 'PredictedLabel'
    probability_header = 'PredictedProbability'

    dt_early   = -12
    dt_optimal = -6
    dt_late    = 3

    max_u_tp = 1
    min_u_fn = -2
    u_fp     = -0.05
    u_tn     = 0

    # Find label and prediction files.
    label_files = []
    for f in os.listdir(label_directory):
        g = os.path.join(label_directory, f)
        if os.path.isfile(g) and not f.lower().startswith('.') and f.lower().endswith('csv'):
            label_files.append(g)
    label_files = sorted(label_files)

    prediction_files = []
    for f in os.listdir(prediction_directory):
        g = os.path.join(prediction_directory, f)
        if os.path.isfile(g) and not f.lower().startswith('.') and f.lower().endswith('csv'):
            prediction_files.append(g)
    prediction_files = sorted(prediction_files)

    if len(label_files) != len(prediction_files):
        raise Exception('Numbers of label and prediction files must be the same.')

    # Load labels and predictions.
    num_files            = len(label_files)
    cohort_labels        = []
    cohort_predictions   = []
    cohort_probabilities = []

    for k in range(num_files):
        labels        = load_column(label_files[k], label_header, ',')
        predictions   = load_column(prediction_files[k], prediction_header, ',')
        probabilities = load_column(prediction_files[k], probability_header, ',')

        # Check labels and predictions for errors.
        if not (len(labels) == len(predictions) and len(predictions) == len(probabilities)):
            raise Exception('Numbers of labels and predictions for a file must be the same.')

        num_rows = len(labels)

        for i in range(num_rows):
            if labels[i] not in (0, 1):
                raise Exception('Labels must satisfy label == 0 or label == 1.')

            if predictions[i] not in (0, 1):
                raise Exception('Predictions must satisfy prediction == 0 or prediction == 1.')

            if not 0 <= probabilities[i] <= 1:
                warnings.warn('Probabilities do not satisfy 0 <= probability <= 1.')

        if 0 < np.sum(predictions) < num_rows:
            min_probability_positive = np.min(probabilities[predictions == 1])
            max_probability_negative = np.max(probabilities[predictions == 0])

            if min_probability_positive <= max_probability_negative:
                warnings.warn('Predictions are inconsistent with probabilities, i.e., a positive prediction has a lower (or equal) probability than a negative prediction.')

        # Record labels and predictions.
        cohort_labels.append(labels)
        cohort_predictions.append(predictions)
        cohort_probabilities.append(probabilities)

    # Compute AUC, accuracy, and F-measure.
    labels        = np.concatenate(cohort_labels)
    predictions   = np.concatenate(cohort_predictions)
    probabilities = np.concatenate(cohort_probabilities)

    auroc, auprc        = compute_auc(labels, probabilities)
    accuracy, f_measure = compute_accuracy_f_measure(labels, predictions)

    # Compute utility.
    observed_utilities = np.zeros(num_files)
    best_utilities     = np.zeros(num_files)
    worst_utilities    = np.zeros(num_files)
    inaction_utilities = np.zeros(num_files)

    for k in range(num_files):
        labels = cohort_labels[k]
        num_rows          = len(labels)
        observed_predictions = cohort_predictions[k]
        best_predictions     = np.zeros(num_rows)
        worst_predictions    = np.zeros(num_rows)
        inaction_predictions = np.zeros(num_rows)

        if np.any(labels):
            t_sepsis = np.argmax(labels) - dt_optimal
            best_predictions[max(0, t_sepsis + dt_early) : min(t_sepsis + dt_late + 1, num_rows)] = 1
        worst_predictions = 1 - best_predictions

        observed_utilities[k] = compute_prediction_utility(labels, observed_predictions, dt_early, dt_optimal, dt_late, max_u_tp, min_u_fn, u_fp, u_tn)
        best_utilities[k]     = compute_prediction_utility(labels, best_predictions, dt_early, dt_optimal, dt_late, max_u_tp, min_u_fn, u_fp, u_tn)
        worst_utilities[k]    = compute_prediction_utility(labels, worst_predictions, dt_early, dt_optimal, dt_late, max_u_tp, min_u_fn, u_fp, u_tn)
        inaction_utilities[k] = compute_prediction_utility(labels, inaction_predictions, dt_early, dt_optimal, dt_late, max_u_tp, min_u_fn, u_fp, u_tn)

    unnormalized_observed_utility = np.sum(observed_utilities)
    unnormalized__utility     = np.sum(best_utilities)
    unnormalized_worst_utility    = np.sum(worst_utilities)
    unnormalized_inaction_utility = np.sum(inaction_utilities)

    normalized_observed_utility = (unnormalized_observed_utility - unnormalized_inaction_utility) / (unnormalized__utility - unnormalized_inaction_utility)

    return auroc, auprc, accuracy, f_measure, normalized_observed_utility

# The load_column function loads a column from a table.
#
# Inputs:
#   'filename' is a string containing a filename.
#
#   'header' is a string containing a header.
#
# Outputs:
#   'column' is a vector containing a column from the file with the given
#   header.
#
# Example:
#   Omitted.

def load_column(filename, header, delimiter):
    column = []
    with open(filename, 'r') as f:
        for i, l in enumerate(f):
            arrs = l.strip().split(delimiter)
            if i == 0:
                try:
                    j = arrs.index(header)
                except:
                    raise Exception('{} must contain column with header {} containing numerical entries.'.format(filename, header))
            else:
                if len(arrs[j]):
                    column.append(float(arrs[j]))
    return np.array(column)

# The compute_auc function computes AUROC and AUPRC as well as other summary
# statistics (TP, FP, FN, TN, tpr_, TNR, PPV, NPV, etc.) that can be exposed
# from this function.
#
# Inputs:
#   'labels' is a binary vector, where labels[i] == 0 if the patient is not
#   labeled as septic at time i and labels[i] == 1 if the patient is labeled as
#   septic at time i.
#
#   'predictions' is a probability vector, where predictions[i] gives the
#   predicted probability that the patient is septic at time i.  Note that there
#   must be a prediction for every label, i.e, len(labels) ==
#   len(predictions).
#
# Outputs:
#   'auroc' is a scalar that gives the AUROC of the algorithm using its
#   predicted probabilities, where specificity is interpolated for intermediate
#   sensitivity values.
#
#   'auprc' is a scalar that gives the AUPRC of the algorithm using its
#   predicted probabilities, where precision is a piecewise constant function of
#   recall.
#
# Example:
#   In [1]: labels = [0, 0, 0, 0, 1, 1]
#   In [2]: predictions = [0.3, 0.4, 0.6, 0.7, 0.8, 0.8]
#   In [3]: auroc, auprc = compute_auc(labels, predictions)
#   In [4]: auroc
#   Out[4]: 1.0
#   In [5]: auprc
#   Out[5]: 1.0

def compute_auc(labels, predictions, check_errors=True):
    # Check inputs for errors.
    if check_errors:
        if len(predictions) != len(labels):
            raise Exception('Numbers of predictions and labels must be the same.')

        for label in labels:
            if not label in (0, 1):
                raise Exception('Labels must satisfy label == 0 or label == 1.')

        for prediction in predictions:
            if not 0 <= prediction <= 1:
                warnings.warn('Predictions do not satisfy 0 <= prediction <= 1.')

    # Find prediction thresholds.
    thresholds = np.unique(predictions)[::-1]
    if thresholds[0] != 1:
        thresholds = np.insert(thresholds, 0, 1)
    if thresholds[-1] == 0:
        thresholds = thresholds[:-1]

    n = len(labels)
    m = len(thresholds)

    # Populate contingency table across prediction thresholds.
    tp = np.zeros(m)
    fp = np.zeros(m)
    fn = np.zeros(m)
    tn = np.zeros(m)

    # Find indices that sort the predicted probabilities from largest to
    # smallest.
    idx = np.argsort(predictions)[::-1]

    i = 0
    for j in range(m):
        # Initialize contingency table for j-th prediction threshold.
        if j == 0:
            tp[j] = 0
            fp[j] = 0
            fn[j] = np.sum(labels)
            tn[j] = n - fn[j]
        else:
            tp[j] = tp[j - 1]
            fp[j] = fp[j - 1]
            fn[j] = fn[j - 1]
            tn[j] = tn[j - 1]

        # Update contingency table for i-th largest predicted probability.
        while i < n and predictions[idx[i]] >= thresholds[j]:
            if labels[idx[i]]:
                tp[j] += 1
                fn[j] -= 1
            else:
                fp[j] += 1
                tn[j] -= 1
            i += 1

    # Summarize contingency table.
    tpr_ = np.zeros(m)
    tnr = np.zeros(m)
    ppv = np.zeros(m)
    npv = np.zeros(m)

    for j in range(m):
        if tp[j] + fn[j]:
            tpr_[j] = tp[j] / (tp[j] + fn[j])
        else:
            tpr_[j] = 1
        if fp[j] + tn[j]:
            tnr[j] = tn[j] / (fp[j] + tn[j])
        else:
            tnr[j] = 1
        if tp[j] + fp[j]:
            ppv[j] = tp[j] / (tp[j] + fp[j])
        else:
            ppv[j] = 1
        if fn[j] + tn[j]:
            npv[j] = tn[j] / (fn[j] + tn[j])
        else:
            npv[j] = 1

    # Compute AUROC as the area under a piecewise linear function with tpr_ /
    # sensitivity (x-axis) and TNR / specificity (y-axis) and AUPRC as the area
    # under a piecewise constant with tpr_ / recall (x-axis) and PPV / precision
    # (y-axis).
    auroc = 0
    auprc = 0
    for j in range(m-1):
        auroc += 0.5 * (tpr_[j + 1] - tpr_[j]) * (tnr[j + 1] + tnr[j])
        auprc += (tpr_[j + 1] - tpr_[j]) * ppv[j + 1]

    return auroc, auprc

# The compute_accuracy_f_measure function computes the accuracy and F-measure
# for a patient.
#
# Inputs:
#   'labels' is a binary vector, where labels[i] == 0 if the patient is not
#   labeled as septic at time i and labels[i] == 1 if the patient is labeled as
#   septic at time i.
#
#   'predictions' is a binary vector, where predictions[i] == 0 if the patient
#   is not predicted to be septic at time i and predictions[i] == 1 if the
#   patient is predicted to be septic at time i.  Note that there must be a
#   prediction for every label, i.e, len(labels) == len(predictions).
#
# Output:
#   'accuracy' is a scalar that gives the accuracy of the predictions using its
#   binarized predictions.
#
#   'f_measure' is a scalar that gives the F-measure of the predictions using its
#   binarized predictions.
#
# Example:
#   In [1]: labels = [0, 0, 0, 0, 1, 1]
#   In [2]: predictions = [0, 0, 1, 1, 1, 1]
#   In [3]: accuracy, f_measure = compute_accuracy_f_measure(labels, predictions)
#   In [4]: accuracy
#   Out[4]: 0.666666666667
#   In [5]: f_measure
#   Out[5]: 0.666666666667

def compute_accuracy_f_measure(labels, predictions, check_errors=True):
    # Check inputs for errors.
    if check_errors:
        if len(predictions) != len(labels):
            raise Exception('Numbers of predictions and labels must be the same.')

        for label in labels:
            if not label in (0, 1):
                raise Exception('Labels must satisfy label == 0 or label == 1.')

        for prediction in predictions:
            if not prediction in (0, 1):
                raise Exception('Predictions must satisfy prediction == 0 or prediction == 1.')

    # Populate contingency table.
    n = len(labels)
    tp = 0
    fp = 0
    fn = 0
    tn = 0

    for i in range(n):
        if labels[i] and predictions[i]:
            tp += 1
        elif not labels[i] and predictions[i]:
            fp += 1
        elif labels[i] and not predictions[i]:
            fn += 1
        elif not labels[i] and not predictions[i]:
            tn += 1

    # Summarize contingency table.
    if tp + fp + fn + tn:
        accuracy = float(tp + tn) / float(tp + fp + fn + tn)
    else:
        accuracy = 1.0

    if 2 * tp + fp + fn:
        f_measure = float(2 * tp) / float(2 * tp + fp + fn)
    else:
        f_measure = 1.0

    return accuracy, f_measure

# The compute_prediction_utility function computes the total time-dependent
# utility for a patient.
#
# Inputs:
#   'labels' is a binary vector, where labels[i] == 0 if the patient is not
#   labeled as septic at time i and labels[i] == 1 if the patient is labeled as
#   septic at time i.
#
#   'predictions' is a binary vector, where predictions[i] == 0 if the patient
#   is not predicted to be septic at time i and predictions[i] == 1 if the
#   patient is predicted to be septic at time i.  Note that there must be a
#   prediction for every label, i.e, len(labels) == len(predictions).
#
# Output:
#   'utility' is a scalar that gives the total time-dependent utility of the
#   algorithm using its binarized predictions.
#
# Example:
#   In [1]: labels = [0, 0, 0, 0, 1, 1]
#   In [2]: predictions = [0, 0, 1, 1, 1, 1]
#   In [3]: utility = compute_prediction_utility(labels, predictions)
#   In [4]: utility
#   Out[4]: 3.388888888888889

def compute_prediction_utility(labels, predictions, dt_early=-12, dt_optimal=-6, dt_late=3.0, max_u_tp=1, min_u_fn=-2, u_fp=-0.05, u_tn=0, check_errors=True):
    # Check inputs for errors.
    if check_errors:
        if len(predictions) != len(labels):
            raise Exception('Numbers of predictions and labels must be the same.')

        for label in labels:
            if not label in (0, 1):
                raise Exception('Labels must satisfy label == 0 or label == 1.')

        for prediction in predictions:
            if not prediction in (0, 1):
                raise Exception('Predictions must satisfy prediction == 0 or prediction == 1.')

        if dt_early >= dt_optimal:
            raise Exception('The earliest beneficial time for predictions must be before the optimal time.')

        if dt_optimal >= dt_late:
            raise Exception('The optimal time for predictions must be before the latest beneficial time.')

    # Does the patient eventually have sepsis?
    if np.any(labels):
        is_septic = True
        t_sepsis = np.argmax(labels) - dt_optimal
    else:
        is_septic = False
        t_sepsis = float('inf')

    n = len(labels)

    # Define slopes and intercept points for utility functions of the form
    # u = m * t + b.
    m_1 = float(max_u_tp) / float(dt_optimal - dt_early)
    b_1 = -m_1 * dt_early
    m_2 = float(-max_u_tp) / float(dt_late - dt_optimal)
    b_2 = -m_2 * dt_late
    m_3 = float(min_u_fn) / float(dt_late - dt_optimal)
    b_3 = -m_3 * dt_optimal

    # Compare predicted and true conditions.
    u = np.zeros(n)
    for t in range(n):
        if t <= t_sepsis + dt_late:
            # TP
            if is_septic and predictions[t]:
                if t <= t_sepsis + dt_optimal:
                    u[t] = max(m_1 * (t - t_sepsis) + b_1, u_fp)
                elif t <= t_sepsis + dt_late:
                    u[t] = m_2 * (t - t_sepsis) + b_2
            # FP
            elif not is_septic and predictions[t]:
                u[t] = u_fp
            # FN
            elif is_septic and not predictions[t]:
                if t <= t_sepsis + dt_optimal:
                    u[t] = 0
                elif t <= t_sepsis + dt_late:
                    u[t] = m_3 * (t - t_sepsis) + b_3
            # TN
            elif not is_septic and not predictions[t]:
                u[t] = u_tn

    # Find total utility for patient.
    return np.sum(u)

### Baseline Data without Feature Selection

In [20]:
!mkdir -p ./prediction
!mkdir -p ./label

In [None]:
# load test data
test_set = np.load('../data/data_both/test_set.npy')
test_data_path = '../data/data_both/test_baseline/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/BDWOFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_baseline)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

Results:

AUROC|AUPRC|Accuracy|F-measure|Utility

0.5213224697648841|0.10036421964552332|0.5518996912607901|0.1496520553841739|0.361013145579852

### Baseline Data with Feature Selection

In [25]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [None]:
# load test data
test_set = np.load('../data/data_both/test_set.npy')
test_data_path = '../data/data_both/test_baseline/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/BDWFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_baseline_fs)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

Results:

AUROC|AUPRC|Accuracy|F-measure|Utility

0.5680442055030079|0.11604327516848185|0.5609854451515343|0.16759055720156743|0.41790978899004516

### Engineered Data without Feature Selection

In [33]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [None]:
# load test data
test_set = np.load('../data/data_both/test_set.npy')
test_data_path = '../data/data_both/test_engineered/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/EDWOFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_engineered)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

Results:

AUROC|AUPRC|Accuracy|F-measure|Utility 

0.7557546797659839|0.3654222701215125|0.5709281078696995|0.22267881195351916|0.6322508083042628


### Engineered Data with Feature Selection

In [35]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [36]:
# load test data
test_set = np.load('../data/data_both/test_set.npy')
test_data_path = '../data/data_both/test_engineered/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/EDWFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_engineered_fs)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

100%|██████████| 1446/1446 [00:27<00:00, 52.59it/s]


AUROC|AUPRC|Accuracy|F-measure|Utility
0.7448881931154562|0.3536970550301417|0.5667695797366266|0.21928011808788464|0.6215216327746108


Results:

AUROC|AUPRC|Accuracy|F-measure|Utility

0.7448881931154562|0.3536970550301417|0.5667695797366266|0.21928011808788464|0.6215216327746108

## Test Real Neonatal Data

In [24]:
neonate_sepsis = np.load('../../models_neoantes/data/all_sepsis.npy')
neonate_nonsepsis = np.load('../../models_neoantes/data/all_nonsepsis.npy')
neonate_nonsepsis_balanced = np.random.choice(neonate_nonsepsis, len(neonate_sepsis), replace=False)
test_set_balanced = np.concatenate((neonate_sepsis, neonate_nonsepsis_balanced))
np.save('../../models_neoantes/data/test_set_balanced.npy', test_set_balanced)

### BDWOFS

In [25]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [26]:
# load test data
test_set = np.load('../../models_neoantes/data/test_set_balanced.npy')
test_data_path = '../../datasets/MIMICIII/neonates/baseline_all/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/BDWOFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_baseline)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

100%|██████████| 226/226 [00:05<00:00, 43.23it/s]


AUROC|AUPRC|Accuracy|F-measure|Utility
0.5387213805423651|0.05615803353392489|0.513461266443491|0.07560691570366389|0.18441979309485226


### BDWFS

In [27]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [29]:
# load test data
test_set = np.load('../../models_neoantes/data/test_set_balanced.npy')
test_data_path = '../../datasets/MIMICIII/neonates/baseline_all/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/BDWFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_baseline_fs)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

100%|██████████| 226/226 [00:04<00:00, 47.76it/s]


AUROC|AUPRC|Accuracy|F-measure|Utility
0.5319552082091128|0.0541009365500336|0.5130369182894055|0.07471779251030282|0.17702438821720878


### EDWOFS

In [30]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [31]:
# load test data
test_set = np.load('../../models_neoantes/data/test_set_balanced.npy')
test_data_path = '../../datasets/MIMICIII/neonates/engineered_all/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/EDWOFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_engineered)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

100%|██████████| 226/226 [00:06<00:00, 33.69it/s]


AUROC|AUPRC|Accuracy|F-measure|Utility
0.7838814788617027|0.2391587047850281|0.5659389881654014|0.1254037621128634|0.48729901533092357


### EDWFS

In [32]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [33]:
# load test data
test_set = np.load('../../models_neoantes/data/test_set_balanced.npy')
test_data_path = '../../datasets/MIMICIII/neonates/engineered_all/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/EDWFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_engineered_fs)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

100%|██████████| 226/226 [00:05<00:00, 39.79it/s]


AUROC|AUPRC|Accuracy|F-measure|Utility
0.7707348774704764|0.2199808045974155|0.5685322268848131|0.12606245821793524|0.48459013669034856


## Test Artificial Neonatal Data
### BDWOFS

In [12]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [13]:
# load test data
test_set = np.load('../../artificial_neonatal_data/data/balanced_226/test_set_balanced.npy')
test_data_path = '../../artificial_neonatal_data/data/balanced_226/baseline_all/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/BDWOFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_baseline)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

100%|██████████| 226/226 [00:06<00:00, 36.64it/s]


AUROC|AUPRC|Accuracy|F-measure|Utility
0.46690971473585635|0.04205330411922846|0.5060211474446837|0.06625335430739336|0.10099713324192952


### BDWFS

In [None]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [14]:
# load test data
test_set = np.load('../../artificial_neonatal_data/data/balanced_226/test_set_balanced.npy')
test_data_path = '../../artificial_neonatal_data/data/balanced_226/baseline_all/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/BDWFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_baseline_fs)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

100%|██████████| 226/226 [00:05<00:00, 40.37it/s]


AUROC|AUPRC|Accuracy|F-measure|Utility
0.4654106997761587|0.04185025794738932|0.5052868611709418|0.06564349112426035|0.09874942872574685


### EDWOFS

In [15]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [17]:
# load test data
test_set = np.load('../../artificial_neonatal_data/data/balanced_226/test_set_balanced.npy')
test_data_path = '../../artificial_neonatal_data/data/balanced_226/engineered_all/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/EDWOFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_engineered)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

100%|██████████| 226/226 [00:06<00:00, 33.27it/s]


AUROC|AUPRC|Accuracy|F-measure|Utility
0.8555088059403332|0.2790463515870136|0.5618269042490699|0.1384156319183752|0.572209065603058


### EDWFS

In [None]:
!rm -r -f ./prediction
!rm -r -f ./label
!mkdir -p ./prediction
!mkdir -p ./label

In [18]:
# load test data
test_set = np.load('../../artificial_neonatal_data/data/balanced_226/test_set_balanced.npy')
test_data_path = '../../artificial_neonatal_data/data/balanced_226/engineered_all/' 

# pathes
prediction_directory = './prediction/'
label_directory = './label/'
model_path = './trained_models/EDWFS/' 

predict(test_set, test_data_path, prediction_directory, label_directory, model_path, 0.5, X_feature_engineered_fs)

auroc, auprc, accuracy, f_measure, utility = evaluate_sepsis_score(label_directory, prediction_directory)
output_string = 'AUROC|AUPRC|Accuracy|F-measure|Utility\n{}|{}|{}|{}|{}'.format(
                auroc, auprc, accuracy, f_measure, utility)
print(output_string)

100%|██████████| 226/226 [00:05<00:00, 42.91it/s]


AUROC|AUPRC|Accuracy|F-measure|Utility
0.8404158420979196|0.26552646601277174|0.5564421382416291|0.1351531927078362|0.5542066558643899
