In [1]:
!nvidia-smi

Sun Aug 28 09:59:46 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.60.02    Driver Version: 510.60.02    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Quadro RTX 6000     On   | 00000000:1A:00.0 Off |                  Off |
| 33%   25C    P8    31W / 260W |      3MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Quadro RTX 6000     On   | 00000000:1C:00.0 Off |                  Off |
| 51%   74C    P2   194W / 260W |   4777MiB / 24576MiB |     42%      Default |
|       

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import warnings
warnings.filterwarnings("ignore")

from Parse_TFrecords import *
from define_model import *
from load_label import *
from utilities import *
import gc
import joblib
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

import numpy as np
from sklearn.metrics import classification_report, average_precision_score

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
    
print(gpus)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:4', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:5', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:6', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:7', device_type='GPU')]


In [3]:
BATCH_SIZE = 32

# All
# pos = 57925
# neg = 212124

# Emory
pos = 25294
neg = 120259

# MIMIC
# pos = 32631
# neg = 91865

total = pos+neg

weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}
print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

Weight for class 0: 0.61
Weight for class 1: 2.88


## Joint Data Fusion

In [5]:
record_file_train = 'tfrecords/copd_emory_train.tfrecords'
train_dataset = (tf.data.TFRecordDataset(
    record_file_train, buffer_size=BATCH_SIZE, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_demo)
.shuffle(BATCH_SIZE)
.batch(BATCH_SIZE))

record_file_val = 'tfrecords/copd_emory_val.tfrecords'
val_dataset = (tf.data.TFRecordDataset(
    record_file_val, buffer_size=BATCH_SIZE, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_demo)
.batch(BATCH_SIZE))

In [6]:
checkpoint_filepath = 'checkpoints_fusion/checkpoint_Xception'

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,
    save_weights_only=True,
    monitor='val_loss',
    mode='min')

callback = [tf.keras.callbacks.LearningRateScheduler(scheduler),
            tf.keras.callbacks.EarlyStopping(mode='min', patience=3, monitor='val_loss'),
            model_checkpoint_callback]

In [None]:
model = get_model_demo()

model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                 optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics='AUC')

model.fit(train_dataset, epochs=5, shuffle=True, validation_data=val_dataset, callbacks=callback, class_weight=class_weight)

del model
gc.collect()

Epoch 1/5
Epoch 2/5
Epoch 3/5

In [None]:
BATCH_SIZE = 32
record_file_test = 'tfrecords/copd_emory_test.tfrecords'
test_dataset = (tf.data.TFRecordDataset(
    record_file_test, buffer_size=BATCH_SIZE, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_demo)
.batch(BATCH_SIZE))

In [None]:
model = get_model_demo()

model.load_weights(checkpoint_filepath)

y_label, y_demo = get_data_label('emory', split, True)

y_preds = model.predict(test_dataset)

In [None]:
thresh = get_thresh(y_label, y_preds, 'Youden')

test_CI(y_preds, y_label, thresh)

gc.collect()

## Data Source Fusion

In [None]:
record_file_train = 'tfrecords/copd_merged_train.tfrecords'
train_dataset = (tf.data.TFRecordDataset(
    record_file_train, buffer_size=BATCH_SIZE, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_train)
.shuffle(BATCH_SIZE)
.batch(BATCH_SIZE))

record_file_val = 'tfrecords/copd_merged_val.tfrecords'
val_dataset = (tf.data.TFRecordDataset(
    record_file_val, buffer_size=BATCH_SIZE, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_train)
.batch(BATCH_SIZE))

In [None]:
archi = 'Xception'
checkpoint_filepath = 'checkpoints_merged/checkpoint_BCE_{i}'.format(i=archi)
monitor_ = 'val_loss'

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,
    save_weights_only=True,
    monitor=monitor_,
    mode='min')

callback = [tf.keras.callbacks.LearningRateScheduler(scheduler),
            tf.keras.callbacks.EarlyStopping(mode='min', patience=3, monitor=monitor_),
            model_checkpoint_callback]

model = get_model_demo(archi)

loss_func = tf.keras.losses.BinaryCrossentropy(from_logits=True)

model.compile(loss=loss_func,
                 optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics='AUC')

model.fit(train_dataset, epochs=5, shuffle=True, validation_data=val_dataset, callbacks=callback, class_weight=class_weight)

del model
gc.collect()

In [None]:
BATCH_SIZE = 32
record_file_test = 'tfrecords/copd_emory_test.tfrecords'
test_dataset = (tf.data.TFRecordDataset(
    record_file_test, buffer_size=BATCH_SIZE, compression_type=None, num_parallel_reads=32)
.map(parse_TFrecord_demo)
.batch(BATCH_SIZE))

In [None]:
checkpoint_filepath = 'checkpoints_merged/checkpoint_BCE_{i}'.format(i=archi)

model = define_model()

model.load_weights(checkpoint_filepath)

y_label, y_demo = get_data_label('merged', split, True)

y_preds = model.predict(test_dataset)

In [None]:
thresh = get_thresh(y_label, y_preds, 'Youden')

test_CI(y_preds, y_label, thresh)

gc.collect()

## Fairness Analysis

In [None]:
race_list = ['White', 'Black', 'Latino', 'Others', 'Asian']

for race_num, race in enumerate(race_list):
    print(race)
    
    idx = []
    for i, l in enumerate(y_demo):
        if (l['Race']==race_num):
            idx.append(i)
            
    temp_df = y_preds[idx]
    temp_label = y_label[idx]
    
    thresh = get_thresh(temp_label, temp_df, 'Youden')

    test_CI(temp_df, temp_label, thresh)

    gc.collect()

In [None]:
gender_list = ['Female', 'Male']

for gender_num, gender in enumerate(gender_list):
    print(gender)
    
    idx = []
    for i, l in enumerate(y_demo):
        if (l['Gender']==gender_num):
            idx.append(i)

    temp_df = y_preds[idx]
    temp_label = y_label[idx]
    
    thresh = get_thresh(temp_label, temp_df, 'Youden')

    test_CI(temp_df, temp_label, thresh)

    gc.collect()

In [None]:
age_list = ['0-40', '40-60', '60-80', '80-']

for age_num, age in enumerate(age_list):
    print(age)
    
    idx = []
    for i, l in enumerate(y_demo):
        if (l['Age']==age_num):
            idx.append(i)

    temp_df = y_preds[idx]
    temp_label = y_label[idx]
    
    thresh = get_thresh(temp_label, temp_df, 'Youden')

    test_CI(temp_df, temp_label, thresh)

    gc.collect()