In [1]:
# This model was made using a docker image
# Docker image can be found at https://hub.docker.com/r/blackboxradiology/tf-2.6_with_pytorch
# docker pull blackboxradiology/tf-2.6_with_pytorch

# python version 3.6.9
# mayplotlib version 3.3.4
# numpy version 1.19.5
# pandas version 1.1.5
# PIL version 8.2.0
# sklearn version 0.24.2
# tensorflow version 2.6.0

from datetime import datetime
import glob
import math
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import os
import pandas as pd
from PIL import Image
import random as python_random
import seaborn as sns
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, precision_recall_curve
from sklearn.metrics import auc, accuracy_score, recall_score, precision_score, f1_score, confusion_matrix
from sklearn.utils import shuffle
import sys
import tensorflow as tf
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import GlobalAveragePooling2D, Input, Dense, Activation
from tensorflow.keras.models import Model
from tensorflow.keras import initializers
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from tensorflow.keras.mixed_precision import experimental as mixed_precision

In [2]:
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

# All preprocessing steps of MIMIC .jpg images are included in this repository
# Image data preprocessing include resizing to 320x320
# and normalizing images with ImageNet mean and standard deviation values using
# from tensorflow.keras.applications.densenet import preprocess_input

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPUs will likely run quickly with dtype policy mixed_float16 as they all have compute capability of at least 7.0
Instructions for updating:
Use tf.keras.mixed_precision.LossScaleOptimizer instead. LossScaleOptimizer now has all the functionality of DynamicLossScale


In [3]:
# metadata_df is mimic-cxr-2.0.0-metadata.csv from https://physionet.org/content/mimic-cxr-jpg/2.0.0/
metadata_df = pd.read_csv('mimic-cxr-2.0.0-metadata.csv')
metadata_df[:0]

Unnamed: 0,dicom_id,subject_id,study_id,PerformedProcedureStepDescription,ViewPosition,Rows,Columns,StudyDate,StudyTime,ProcedureCodeSequence_CodeMeaning,ViewCodeSequence_CodeMeaning,PatientOrientationCodeSequence_CodeMeaning


In [4]:
# demographic_df is the addmissions.csv from the "core" directory found at https://physionet.org/content/mimiciv/1.0/
demographic_df = pd.read_csv('admissions.csv')
demographic_df[:0]

Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,marital_status,ethnicity,edregtime,edouttime,hospital_expire_flag


In [5]:
# mimic-cxr-2.0.0-chexpert.csv can be found at https://physionet.org/content/mimic-cxr-jpg/2.0.0/
pathology_df = pd.read_csv('mimic-cxr-2.0.0-chexpert.csv')
pathology_df = pathology_df.fillna(0)
pathology_df[:0]

Unnamed: 0,subject_id,study_id,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices


In [6]:
print("Number of images: " + str(len(metadata_df)))

Number of images: 377110


In [7]:
print("Number of patients: " + str(metadata_df.subject_id.nunique()))

Number of patients: 65379


In [8]:
demographic_df.ethnicity.value_counts()

WHITE                            337630
BLACK/AFRICAN AMERICAN            80293
HISPANIC/LATINO                   29823
OTHER                             26813
ASIAN                             24506
UNKNOWN                           19400
UNABLE TO OBTAIN                   3740
AMERICAN INDIAN/ALASKA NATIVE      1535
Name: ethnicity, dtype: int64

In [9]:
# remove patients who have inconsistent documented race information
# credit to github.com/robintibor
ethnicity_df = demographic_df.loc[:,['subject_id', 'ethnicity']].drop_duplicates()

v = ethnicity_df.subject_id.value_counts()
subject_id_more_than_once = v.index[v.gt(1)]

ambiguous_ethnicity_df = ethnicity_df[ethnicity_df.subject_id.isin(subject_id_more_than_once)]
inconsistent_race = ambiguous_ethnicity_df.subject_id.unique()

grouped = ambiguous_ethnicity_df.groupby('subject_id')
grouped.aggregate(lambda x: "_".join(sorted(x))).ethnicity.value_counts()

OTHER_WHITE                                                   2489
UNKNOWN_WHITE                                                 1131
BLACK/AFRICAN AMERICAN_OTHER                                   560
UNABLE TO OBTAIN_WHITE                                         308
HISPANIC/LATINO_OTHER                                          307
HISPANIC/LATINO_WHITE                                          204
HISPANIC/LATINO_UNKNOWN                                        173
BLACK/AFRICAN AMERICAN_WHITE                                   168
BLACK/AFRICAN AMERICAN_UNKNOWN                                 156
OTHER_UNKNOWN                                                  130
BLACK/AFRICAN AMERICAN_HISPANIC/LATINO                         111
ASIAN_OTHER                                                     98
UNABLE TO OBTAIN_UNKNOWN                                        49
ASIAN_WHITE                                                     46
BLACK/AFRICAN AMERICAN_UNABLE TO OBTAIN                       

In [10]:
merge_df = pd.merge(metadata_df,ethnicity_df,on='subject_id')
merge_df = merge_df[~merge_df.subject_id.isin(inconsistent_race)]
merge_df = merge_df.rename(columns={"ethnicity": "race"})
merge_df = merge_df[merge_df.race.isin(['ASIAN','BLACK/AFRICAN AMERICAN','WHITE'])]
merge_df = merge_df[merge_df.ViewPosition.isin(['AP','PA'])]
merge_df = pd.merge(merge_df,pathology_df[['study_id','Atelectasis','Cardiomegaly','Consolidation','Edema','Pleural Effusion','Enlarged Cardiomediastinum','Fracture','Lung Lesion','Lung Opacity','No Finding','Pleural Other','Pneumonia','Pneumothorax','Support Devices']],on='study_id', how='left')

In [11]:
print("Total images after inclusion/exclusion criteria: " + str(len(merge_df)))

Total images after inclusion/exclusion criteria: 183217


In [12]:
print("Total patients after inclusion/exclusion criteria: " + str(merge_df.subject_id.nunique()))

Total patients after inclusion/exclusion criteria: 43209


In [13]:
data_df = merge_df
data_df.insert(5, "split","none", True)
unique_sub_id = data_df.subject_id.unique()

train_percent, valid_percent, test_percent = 0.80, 0.10, 0.10

unique_sub_id = shuffle(unique_sub_id)
value1 = (round(len(unique_sub_id)*train_percent))
value2 = (round(len(unique_sub_id)*valid_percent))
value3 = value1 + value2
value4 = (round(len(unique_sub_id)*test_percent))

In [14]:
print("Patients in training set: " + str(value1))

Patients in training set: 34567


In [15]:
print("Patients in validation set: " + str(value2))

Patients in validation set: 4321


In [16]:
print("Patients in testing set: " + str(value4))

Patients in testing set: 4321


In [17]:
data_df = shuffle(data_df)

train_sub_id = unique_sub_id[:value1]
validate_sub_id = unique_sub_id[value1:value3]
test_sub_id = unique_sub_id[value3:]

In [18]:
data_df.loc[data_df.subject_id.isin(train_sub_id), "split"]="train"
data_df.loc[data_df.subject_id.isin(validate_sub_id), "split"]="validate"
data_df.loc[data_df.subject_id.isin(test_sub_id), "split"]="test"

In [19]:
data_df.split.value_counts(normalize=True)

train       0.795952
validate    0.104057
test        0.099991
Name: split, dtype: float64

In [20]:
data_df.race.value_counts()

WHITE                     141873
BLACK/AFRICAN AMERICAN     34238
ASIAN                       7106
Name: race, dtype: int64

In [21]:
data_df.race.value_counts(normalize=True)

WHITE                     0.774344
BLACK/AFRICAN AMERICAN    0.186871
ASIAN                     0.038785
Name: race, dtype: float64

In [22]:
data_df.subject_id = data_df.subject_id.astype(str)
data_df.study_id = data_df.study_id.astype(str)
data_df.insert(2, "path", "")
data_df.path = data_df.subject_id.str[0:2]
data_df.path = "p" + data_df.path
data_df.path = data_df.path + "/p" + data_df.subject_id + "/s" + data_df.study_id + "/" + data_df.dicom_id + ".jpg"
data_df = data_df.rename(columns={"Pleural Effusion": "Effusion"})

In [23]:
pathology_dict=[
'Atelectasis',
'Cardiomegaly',
'Consolidation',
'Edema',
'Effusion',
'Enlarged Cardiomediastinum',
'Fracture',
'Lung Lesion',
'Lung Opacity',
'No Finding',
'Pleural Other',
'Pneumonia',
'Pneumothorax',
'Support Devices',
]

In [24]:
#all NaN and -1.0 are mapped to 0.0

data_df = data_df.fillna(0)

for pathology in pathology_dict:
    print(pathology)
    data_df[pathology] = data_df[pathology].mask(data_df[pathology]<0.0, 0.0)


Atelectasis
Cardiomegaly
Consolidation
Edema
Effusion
Enlarged Cardiomediastinum
Fracture
Lung Lesion
Lung Opacity
No Finding
Pleural Other
Pneumonia
Pneumothorax
Support Devices


In [25]:
train_df = data_df[data_df.split=="train"]
validation_df = data_df[data_df.split=="validate"]
test_df = data_df[data_df.split=="test"]

In [26]:
#False indicates no patient_id shared between groups

unique_train_id = train_df.subject_id.unique()
unique_validation_id = validation_df.subject_id.unique()
unique_test_id = test_df.subject_id.unique()
all_id = np.concatenate((unique_train_id, unique_validation_id, unique_test_id), axis=None)

def contains_duplicates(X):
    return len(np.unique(X)) != len(X)

contains_duplicates(all_id)

False

In [27]:
HEIGHT, WIDTH = 320, 320
from tensorflow.keras.applications.densenet import preprocess_input

arc_name = "MIMIC-" + str(HEIGHT) + "x" + str(WIDTH) + "_80-10-10-split-DenseNet121-Float16_pathology_detection_"

In [None]:
input_a = Input(shape=(HEIGHT, WIDTH, 3))
base_model = DenseNet121(input_tensor=input_a, include_top=False, input_shape=(HEIGHT,WIDTH,3), weights='imagenet')
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(14, name='dense_logits')(x)
output = Activation('sigmoid', dtype='float32', name='predictions')(x)
model = Model(inputs=[input_a], outputs=[output])

In [33]:
learning_rate = 1e-3
momentum_val=0.9
decay_val= 0.0
batch_s = 128 # may need to reduce batch size if OOM error occurs
train_batch_size = batch_s
test_batch_size = 128
reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.1, patience=2, min_lr=1e-5, verbose=1)

adam_opt = tf.keras.optimizers.Adam(learning_rate=learning_rate, decay=decay_val)
adam_opt = tf.keras.mixed_precision.LossScaleOptimizer(adam_opt)


model.compile(optimizer=adam_opt,
                loss=tf.losses.BinaryCrossentropy(),
                metrics=[
                    tf.keras.metrics.AUC(curve='ROC', name='ROC-AUC'),
                    tf.keras.metrics.AUC(curve='PR', name='PR-AUC')
                ],
)

In [34]:
train_gen = ImageDataGenerator(
            rotation_range=15,
            fill_mode='constant',
            horizontal_flip=True,
            zoom_range=0.1,
            preprocessing_function=preprocess_input
            )

validate_gen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [35]:
#up sampling 'ASIAN' and 'BLACK/AFRICAN AMERICAN' classes
train_df = data_df[data_df.split=="train"]
other_df = train_df[train_df.race!="WHITE"]
train_df = pd.concat([other_df, train_df])

In [36]:
train_batches = train_gen.flow_from_dataframe(train_df, directory="/path/to/directory/", x_col="path", y_col=pathology_dict, class_mode="raw",target_size=(HEIGHT, WIDTH),shuffle=True,seed=2021,batch_size=train_batch_size, dtype='float32')
validate_batches = validate_gen.flow_from_dataframe(validation_df, directory="/path/to/directory/", x_col="path", y_col=pathology_dict, class_mode="raw",target_size=(HEIGHT, WIDTH),shuffle=False,batch_size=test_batch_size, dtype='float32')        

Found 178330 validated image filenames.
Found 19065 validated image filenames.


In [37]:
train_epoch = math.ceil(len(train_df) / train_batch_size)
val_epoch = math.ceil(len(validation_df) / test_batch_size)

In [38]:
var_date = datetime.now().strftime("%Y%m%d-%H%M%S")
ES = EarlyStopping(monitor='val_loss', mode='min', patience=4, restore_best_weights=True)
checkloss = ModelCheckpoint("../saved_models/racial_bias/trials/" + str(arc_name) + "_CXR_LR-" + str(learning_rate) + "_" + var_date+"_epoch:{epoch:03d}_val_loss:{val_loss:.5f}.hdf5", monitor='val_loss', mode='min', verbose=1, save_best_only=True, save_weights_only=False)

In [39]:
model.fit(train_batches,
            validation_data=validate_batches,
            epochs=100,
            steps_per_epoch=int(train_epoch),
            validation_steps=int(val_epoch),
            workers=32,
            max_queue_size=50,
            shuffle=True,
            callbacks=[checkloss, reduce_lr, ES]
           )

Epoch 1/100
INFO:tensorflow:batch_all_reduce: 364 all-reduces with algorithm = nccl, num_packs = 1
INFO:tensorflow:batch_all_reduce: 364 all-reduces with algorithm = nccl, num_packs = 1

Epoch 00001: val_loss improved from inf to 0.28563, saving model to ../saved_models/racial_bias/trials/MIMIC-320x320_80-10-10-split-DenseNet121-Float16_pathology_detection__CXR_LR-0.001_20211013-165756_epoch:001_val_loss:0.28563.hdf5
Epoch 2/100

Epoch 00002: val_loss did not improve from 0.28563
Epoch 3/100

Epoch 00003: val_loss improved from 0.28563 to 0.28317, saving model to ../saved_models/racial_bias/trials/MIMIC-320x320_80-10-10-split-DenseNet121-Float16_pathology_detection__CXR_LR-0.001_20211013-165756_epoch:003_val_loss:0.28317.hdf5
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.28317
Epoch 5/100

Epoch 00005: val_loss improved from 0.28317 to 0.27994, saving model to ../saved_models/racial_bias/trials/MIMIC-320x320_80-10-10-split-DenseNet121-Float16_pathology_detection__CXR_LR-0.0

<keras.callbacks.History at 0x7f5bd0ecf390>

In [40]:
model_transfer = model

In [41]:
x = GlobalAveragePooling2D()(model_transfer.layers[-4].output)
x = tf.keras.layers.Dense(3, name='dense_logits')(x)
output = tf.keras.layers.Activation('softmax', dtype='float32', name='predictions')(x)
model = Model(inputs=[model_transfer.input], outputs=[output])

In [42]:
for layers in model.layers[:-2]:
    layers.trainable = False

In [43]:
for layers in model.layers:
    print(str(layers.name) + " : " + str(layers.trainable))

input_1 : False
random_flip : False
random_rotation : False
zero_padding2d : False
conv1/conv : False
conv1/bn : False
conv1/relu : False
zero_padding2d_1 : False
pool1 : False
conv2_block1_0_bn : False
conv2_block1_0_relu : False
conv2_block1_1_conv : False
conv2_block1_1_bn : False
conv2_block1_1_relu : False
conv2_block1_2_conv : False
conv2_block1_concat : False
conv2_block2_0_bn : False
conv2_block2_0_relu : False
conv2_block2_1_conv : False
conv2_block2_1_bn : False
conv2_block2_1_relu : False
conv2_block2_2_conv : False
conv2_block2_concat : False
conv2_block3_0_bn : False
conv2_block3_0_relu : False
conv2_block3_1_conv : False
conv2_block3_1_bn : False
conv2_block3_1_relu : False
conv2_block3_2_conv : False
conv2_block3_concat : False
conv2_block4_0_bn : False
conv2_block4_0_relu : False
conv2_block4_1_conv : False
conv2_block4_1_bn : False
conv2_block4_1_relu : False
conv2_block4_2_conv : False
conv2_block4_concat : False
conv2_block5_0_bn : False
conv2_block5_0_relu : False
c

In [44]:
print("weights:", len(model.weights))
print("trainable_weights:", len(model.trainable_weights))
print("non_trainable_weights:", len(model.non_trainable_weights))

weights: 606
trainable_weights: 2
non_trainable_weights: 604


In [45]:
# learning rate is increased to 3e-3 instead of 1e-3
# learning rate decreased by factor of 2 instead of 10
# batch_size is increased to 512

learning_rate = 3e-3
momentum_val=0.9
decay_val= 0.0
batch_s = 512 # may need to reduce batch size if OOM error occurs
train_batch_size = batch_s
test_batch_size = 256

reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.5, patience=2, min_lr=1e-5, verbose=1)

adam_opt = tf.keras.optimizers.Adam(learning_rate=learning_rate, decay=decay_val)
adam_opt = tf.keras.mixed_precision.LossScaleOptimizer(adam_opt)

model.compile(optimizer=adam_opt,
                loss=tf.losses.CategoricalCrossentropy(),
                metrics=[
                    tf.keras.metrics.AUC(curve='ROC', name='ROC-AUC'),
                    tf.keras.metrics.AUC(curve='PR', name='PR-AUC')
                ],
)

In [46]:
#up sampling 'ASIAN' and 'BLACK/AFRICAN AMERICAN' classes

train_df = data_df[data_df.split=="train"]
other_df = train_df[train_df.race!="WHITE"]
train_df = pd.concat([other_df, train_df])

In [47]:
train_df.race.value_counts()

WHITE                     113334
BLACK/AFRICAN AMERICAN     54306
ASIAN                      10690
Name: race, dtype: int64

In [48]:
train_batches = train_gen.flow_from_dataframe(train_df, directory="/path/to/directory/", x_col="path", y_col="race", class_mode="categorical",target_size=(HEIGHT, WIDTH),shuffle=True,seed=2021,batch_size=train_batch_size, dtype='float32')
validate_batches = validate_gen.flow_from_dataframe(validation_df, directory="/path/to/directory/", x_col="path", y_col="race", class_mode="categorical",target_size=(HEIGHT, WIDTH),shuffle=False,batch_size=test_batch_size, dtype='float32')        

Found 178330 validated image filenames belonging to 3 classes.
Found 19065 validated image filenames belonging to 3 classes.


In [49]:
train_epoch = math.ceil(len(train_df) / train_batch_size)
val_epoch = math.ceil(len(validation_df) / test_batch_size)

In [50]:
var_date = datetime.now().strftime("%Y%m%d-%H%M%S")
ES = EarlyStopping(monitor='val_loss', mode='min', patience=6, restore_best_weights=True)
checkloss = ModelCheckpoint("../saved_models/racial_bias/trials/" + str(arc_name) + "_CXR_LR-" + str(learning_rate) + "_" + var_date+"_epoch_{epoch:03d}_val_loss_{val_loss:.5f}.hdf5", monitor='val_loss', mode='min', verbose=1, save_best_only=True, save_weights_only=False)

In [51]:
model.fit(train_batches,
            validation_data=validate_batches,
            epochs=100,
            steps_per_epoch=int(train_epoch),
            validation_steps=int(val_epoch),
            workers=32,
            max_queue_size=50,
            shuffle=True,
            callbacks=[checkloss, reduce_lr, ES]
           )

Epoch 1/100
INFO:tensorflow:batch_all_reduce: 2 all-reduces with algorithm = nccl, num_packs = 1
INFO:tensorflow:batch_all_reduce: 2 all-reduces with algorithm = nccl, num_packs = 1

Epoch 00001: val_loss improved from inf to 0.52793, saving model to ../saved_models/racial_bias/trials/MIMIC-320x320_80-10-10-split-DenseNet121-Float16_pathology_detection__CXR_LR-0.003_20211013-212652_epoch_001_val_loss_0.52793.hdf5
Epoch 2/100

Epoch 00002: val_loss did not improve from 0.52793
Epoch 3/100

Epoch 00003: val_loss improved from 0.52793 to 0.51507, saving model to ../saved_models/racial_bias/trials/MIMIC-320x320_80-10-10-split-DenseNet121-Float16_pathology_detection__CXR_LR-0.003_20211013-212652_epoch_003_val_loss_0.51507.hdf5
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.51507
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.51507

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.001500000013038516.
Epoch 6/100

Epoch 00006: val_loss improved from 0.51507 to 

<keras.callbacks.History at 0x7f5a5d0ccba8>

In [53]:
test_batches = validate_gen.flow_from_dataframe(test_df, directory="/tf/notebooks/SSD_data/mimic_directory/resize_mimic_320x320/", x_col="path", y_col="race", class_mode="categorical",target_size=(HEIGHT, WIDTH),shuffle=False,batch_size=test_batch_size, dtype='float32')        

Found 18320 validated image filenames belonging to 3 classes.


In [54]:
multilabel_predict_test = model.predict(test_batches, max_queue_size=10, verbose=1, steps=math.ceil(len(test_df)/test_batch_size), workers=32)



In [56]:
result = multilabel_predict_test
labels = np.argmax(result, axis=1)
target_names = ['Asian', 'Black', 'White']

print ('Classwise ROC AUC \n')
for p in list(set(labels)):
    fpr, tpr, thresholds = roc_curve(test_batches.classes, result[:,p], pos_label = p)
    auroc = round(auc(fpr, tpr), 4)
    print ('Class - {} ROC-AUC- {}'.format(target_names[p], auroc))


Classwise ROC AUC 

Class - Asian ROC-AUC- 0.7986
Class - Black ROC-AUC- 0.8416
Class - White ROC-AUC- 0.8258
