In [1]:
!nvidia-smi

Mon Aug 22 14:06:32 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.60.02    Driver Version: 510.60.02    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro RTX 6000     On   | 00000000:1A:00.0 Off |                  Off |
| 33%   26C    P8    31W / 260W |  19010MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Quadro RTX 6000     On   | 00000000:1C:00.0 Off |                  Off |
| 53%   67C    P2   109W / 260W |   4777MiB / 24576MiB |      0%      Default |
|       

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import warnings
warnings.filterwarnings("ignore")

from Parse_TFrecords import *
from define_model import *
from load_label import *
from utilities import *
import gc
import tensorflow as tf
import joblib

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

import numpy as np

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
    
print(tf.__version__)

2.8.0


## Model Ensemble stacking

In [3]:
archis = ['DenseNet121', 'ResNet50V2', 'Xception', 'MobileNetV2']

data = 'mimic'
split = 'train'

df_train = pd.DataFrame()

for i in archis:
    outfile = 'preds/{d}/{j}_preds/{i}_preds.npy'.format(d=data,j=split, i=i)

    df_train[i] = np.reshape(np.load(outfile), (-1)).tolist()
    
y_train = get_data_label(data, split)

In [7]:
archis = ['DenseNet121', 'ResNet50V2', 'Xception', 'MobileNetV2']

data = 'mimic'
split = 'test'

df_eval = pd.DataFrame()

for i in archis:
    outfile = 'preds/{d}/emory_{j}_preds/{i}_preds.npy'.format(d=data,j=split, i=i)

    df_eval[i] = np.reshape(np.load(outfile), (-1)).tolist()
    
y_label = get_data_label('emory', split)

### Logistic Regression

In [5]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=0).fit(df_train, y_train)

filename = 'stacking_model/LR_model.sav'
joblib.dump(clf, filename)

['stacking_model_merged/LR_model.sav']

In [8]:
clf = joblib.load('stacking_model/LR_model.sav')

y_pred = clf.predict_proba(df_eval)[:, 1]

thresh = get_thresh(y_label, y_pred, 'Youden')

test_CI(y_pred, y_label, thresh)

print(clf.coef_, clf.intercept_)
gc.collect()

AUC       : 0.74, CI: [0.73 - 0.74]
Precision : 0.34, CI: [0.33 - 0.35]
Recall    : 0.63, CI: [0.61 - 0.64]
F1-Score  : 0.44, CI: [0.43 - 0.45]
AUPRC     : 0.41, CI: [0.39 - 0.42]
Balanced ACC : 0.67, CI: [0.66 - 0.68]
[[-1.42002334  4.11236233  4.57499224 -0.11277668]] [-4.22922766]


0

### XGboost

In [7]:
from xgboost import XGBClassifier

clf = XGBClassifier()
clf.fit(df_train, y_train)

filename = 'stacking_model/xgboost_model.sav'
joblib.dump(clf, filename)

['stacking_model_merged/xgboost_model.sav']

In [9]:
clf = joblib.load('stacking_model/xgboost_model.sav')

y_pred = clf.predict_proba(df_eval)[:, 1]

thresh = get_thresh(y_label, y_pred, 'Youden')

test_CI(y_pred, y_label, thresh)

gc.collect()

AUC       : 0.73, CI: [0.73 - 0.74]
Precision : 0.33, CI: [0.32 - 0.34]
Recall    : 0.65, CI: [0.64 - 0.66]
F1-Score  : 0.44, CI: [0.43 - 0.45]
AUPRC     : 0.40, CI: [0.39 - 0.42]
Balanced ACC : 0.67, CI: [0.66 - 0.68]


0

### KNN

In [9]:
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors=30)
clf.fit(df_train, y_train)

filename = 'stacking_model/KNN_model.sav'
joblib.dump(clf, filename)

['stacking_model_merged/KNN_model.sav']

In [10]:
clf = joblib.load('stacking_model/KNN_model.sav')

y_pred = clf.predict_proba(df_eval)[:, 1]

thresh = get_thresh(y_label, y_pred, 'Youden')

test_CI(y_pred, y_label, thresh)

gc.collect()

AUC       : 0.72, CI: [0.72 - 0.73]
Precision : 0.33, CI: [0.32 - 0.34]
Recall    : 0.62, CI: [0.61 - 0.63]
F1-Score  : 0.43, CI: [0.42 - 0.44]
AUPRC     : 0.39, CI: [0.38 - 0.40]
Balanced ACC : 0.67, CI: [0.66 - 0.67]


0

### Random Forest

In [11]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(max_depth=3, random_state=0, class_weight='balanced')
clf.fit(df_train, y_train)

filename = 'stacking_model/RF_model.sav'
joblib.dump(clf, filename)

['stacking_model_merged/RF_model.sav']

In [11]:
clf = joblib.load('stacking_model/RF_model.sav')

y_pred = clf.predict_proba(df_eval)[:, 1]

thresh = get_thresh(y_label, y_pred, 'Youden')

test_CI(y_pred, y_label, thresh)

gc.collect()

AUC       : 0.74, CI: [0.74 - 0.75]
Precision : 0.32, CI: [0.31 - 0.33]
Recall    : 0.71, CI: [0.70 - 0.72]
F1-Score  : 0.44, CI: [0.43 - 0.45]
AUPRC     : 0.41, CI: [0.40 - 0.43]
Balanced ACC : 0.68, CI: [0.67 - 0.68]


0