In [None]:
!nvidia-smi

In [None]:
from transformation import *
from utilities import *
from calculate_disparity import *
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2 as cv
import warnings
import skimage.transform as st
import gc
import os

print(tf.__version__)
warnings.filterwarnings("ignore")

gpus = tf.config.list_physical_devices(device_type='GPU')
tf.config.set_visible_devices(devices=gpus[1], device_type='GPU')

In [None]:
seed = 2021
os.environ['PYTHONHASHSEED']=str(seed)
tf.random.set_seed(seed)
np.random.seed(seed)
Labels_diseases = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum', 'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding', 'Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax', 'Support Devices']

In [None]:
df = pd.read_csv('../Data/Chexpert_demo.csv', index_col=0)


def get_age_interval(age):
    if (age < 40):
        return 0
    elif (40 <= age < 60):
        return 1
    elif (60 <= age < 80):
        return 2
    elif (age >= 80):
        return 3
    else:
        return 3

def get_gender(gender):
    if (gender=='Female'):
        return 0
    else:
        return 1

In [None]:
def get_data(dataset='mimic', data_split='test', types, feature_type=-1, random_aug=False):
    np.random.seed(2021)
    
    X = []
    y = []
    if (dataset == 'mimic'):
        if (data_split == 'train'):
            filename = ['data/mimic_train.tfrecords']
        elif(data_split == 'val'):
            filename = 'data/mimic_val.tfrecords'
        else:
            filename = 'data/mimic_test.tfrecords'
    elif (dataset == 'chexpert'):
        if (data_split == 'train'):
            filename = '../Data/Chexpert_train.tfrecords'
        else:
            filename = '../Data/Chexpert_test.tfrecords'
        
    raw_dataset = tf.data.TFRecordDataset(filename)
    for raw_record in raw_dataset:
        sub_y = []

        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
        
        if (dataset == 'mimic'):
            if (types == 'race'):
                race = example.features.feature['race'].int64_list.value[0]
                feature = race
            elif (types == 'age'):
                age = example.features.feature['age'].int64_list.value[0]
                if (age > 0):
                    age -= 1
                feature = age
            else:
                gender = example.features.feature['gender'].int64_list.value[0]
                feature = gender
            
        elif (dataset == 'chexpert'):
            id = str(example.features.feature['id'].int64_list.value[0])
            
            if (types == 'race'):
                race = example.features.feature['race'].int64_list.value[0]
                feature = race
            elif (types == 'age'):
                age = get_age_interval(df.loc[df['PATIENT']=='patient{i}'.format(i=id.zfill(5)), 'AGE_AT_CXR'].values[0])
                feature = age
            else:
                gender = get_gender(df.loc[df['PATIENT']=='patient{i}'.format(i=id.zfill(5)), 'GENDER'].values[0])
                feature = gender
        
        if not (race == 0 or race == 1 or race == 4):
            continue
            
        if (feature_type == -1 or feature == feature_type):

            sub_y.append(1 if example.features.feature['Atelectasis'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Cardiomegaly'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Consolidation'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Edema'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Enlarged Cardiomediastinum'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Fracture'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Lung Lesion'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Lung Opacity'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['No Finding'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Pleural Effusion'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Pleural Other'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Pneumonia'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Pneumothorax'].float_list.value[0] == 1 else 0)
            sub_y.append(1 if example.features.feature['Support Devices'].float_list.value[0] == 1 else 0)
            
            nparr = np.fromstring(example.features.feature['jpg_bytes'].bytes_list.value[0], np.uint8)
            img_np = cv.imdecode(nparr, cv.IMREAD_GRAYSCALE)
            
            if (random_aug==True):
                rand = np.random.randint(0, 4)
                if (rand == 0):
                    seed = np.random.uniform(-np.pi/4, np.pi/4)
                    img_np = shear_transform(seed, img_np)
                elif (rand == 1):
                    angle = np.random.uniform(-90, 90)
                    img_np = rotation_transformation(angle, img_np)
                elif (rand == 2):
                    img_np = fish(img_np, 0.4)
                else:
                    seed = np.random.uniform(0.4, 1)
                    img_np = scaling_transformation(seed, img_np)

            X.append(np.float32(st.resize(img_np, (224, 224))))
            y.append(sub_y)            
    
    return np.array(X), np.array(y)

# Radiological labels Detection

In [None]:
checkpoint_filepath = 'checkpoints/model_mimic_proposed'

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_best_only=True,
    save_weights_only=True,
    monitor='val_loss',
    mode='min')

callback = [tf.keras.callbacks.LearningRateScheduler(scheduler),
            tf.keras.callbacks.EarlyStopping(mode='min', patience=4, monitor='val_loss'),
            model_checkpoint_callback]

# mimic
X_train, y_train = get_data(dataset='mimic', data_split='train', feature_type=-1, combine=True)
X_val, y_val = get_data(dataset='mimic', data_split='val', feature_type=-1, combine=True)

# chexpert
# X_train, y_train = get_data(dataset='chexpert', data_split='train', feature_type=-1, combine=True)
# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=2021)

model = define_model_diseases()

model.fit(X_train, y_train, epochs=15, validation_data=(X_val, y_val), callbacks=callback, batch_size=BATCH_SIZE)

y_preds = model.predict(X_val)

best_thresh = cal_best_thresh(y_val, y_preds)

np.savetxt('mimic_proposed_thresh.txt', [best_thresh])

del X_train, y_train
del X_val, y_val
gc.collect()

In [None]:
X_test, y_test = get_data('mimic', 'test', -1)

model = define_model_diseases()

model.load_weights('checkpoints/model_mimic_baseline')
y_preds = model.predict(X_test)
plot_roc(y_test, y_preds, 'ROC', Label_diseases)
print(test(y_preds, y_test))

model.load_weights('checkpoints/model_mimic_proposed')
y_preds = model.predict(X_test)
plot_roc(y_test, y_preds, 'ROC', Label_diseases)
print(test(y_preds, y_test))

model.load_weights('checkpoints/model_chexpert_baseline')
y_preds = model.predict(X_test)
plot_roc(y_test, y_preds, 'ROC', Label_diseases)
print(test(y_preds, y_test))

model.load_weights('checkpoints/model_chexpert_proposed')
y_preds = model.predict(X_test)
plot_roc(y_test, y_preds, 'ROC', Label_diseases)
print(test(y_preds, y_test))

del model 
gc.collect()

# Calculate TPR Disparity

In [None]:
X_test_white, y_test_white = get_data('mimic', 'test', 'race', 0, True)
X_test_black, y_test_black = get_data('mimic', 'test', 'race', 1, True)
X_test_asia, y_test_asia = get_data('mimic', 'test', 'race', 4, True)

In [None]:
model = define_model_diseases()

model.load_weights('checkpoints/model_mimic_baseline')
best_thresh = np.loadtxt('original_thresh.txt')

# model.load_weights('checkpoints/model_mimic_proposed')
# best_thresh = np.loadtxt('mimic_combine_thresh.txt')

# model.load_weights('checkpoints/model_chexpert_baseline')
# best_thresh = np.loadtxt('chexpert_thresh.txt')

# model.load_weights('checkpoints/model_chexpert_proposed')
# best_thresh = np.loadtxt('chexpert_combine_thresh.txt')

y_preds_white = model.predict(X_test_white)
y_preds_black = model.predict(X_test_black)
y_preds_asia = model.predict(X_test_asia)

In [None]:
disparity = []
tprs_white = []
tprs_black = []
tprs_asia = []
for i in range(14):
    tpr_white = get_tpr(y_test_white[:, i], y_preds_white[:, i], best_thresh[i])
    tpr_black = get_tpr(y_test_black[:, i], y_preds_black[:, i], best_thresh[i])
    tpr_asia = get_tpr(y_test_asia[:, i], y_preds_asia[:, i], best_thresh[i])

    median = np.median([tpr_white, tpr_black, tpr_asia])
    gap = np.abs(tpr_white-median) + np.abs(tpr_black-median) + np.abs(tpr_asia-median)
    disparity.append(gap)
    tprs_white.append(tpr_white-median)
    tprs_black.append(tpr_black-median)
    tprs_asia.append(tpr_asia-median)

In [None]:
np.mean(disparity)

In [None]:
from sklearn.metrics import roc_auc_score
print(test(y_preds_white, y_test_white))
print(test(y_preds_black, y_test_black))
print(test(y_preds_asia, y_test_asia))

In [None]:
result = []
result.append(disparity)
result.append(tprs_white)
result.append(tprs_black)
result.append(tprs_asia)