In [None]:
!nvidia-smi

In [None]:
import os
import warnings

warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
%matplotlib inline
import pandas as pd
import skimage.transform as st
import os
from PIL import Image, ImageOps
import gc
import seaborn as sns
import itertools
from sklearn.metrics import roc_curve, auc, classification_report, accuracy_score, roc_auc_score
import re
from sklearn.multiclass import OneVsRestClassifier
from tensorflow.keras import backend as K
from sklearn.linear_model import LogisticRegression
from transformation import *
from utilities import *

 
print(tf.__version__)

gpus = tf.config.list_physical_devices(device_type='GPU')
tf.config.set_visible_devices(devices=gpus[0], device_type='GPU')

2.8.0


In [None]:
seed = 2021
os.environ['PYTHONHASHSEED']=str(seed)
tf.random.set_seed(seed)
np.random.seed(seed)

In [8]:
def get_age_interval(age):
    if (age < 40):
        return 0
    elif (40 <= age < 60):
        return 1
    elif (60 <= age < 80):
        return 2
    elif (age >= 80):
        return 3
    else:
        return 3

def get_gender(gender):
    if (gender=='Female'):
        return 0
    else:
        return 1

def get_demo_data_mimic(data_split='test', feature='race', only_labels=False, random_aug=False):
    X_test = []
    y_test = []

    if (random_aug==True):
        if (data_split == 'train'):
            filename = 'data/mimic_train_transformed.tfrecords'
        elif(data_split == 'val'):
            filename = 'data/mimic_val_transformed.tfrecords'
        else:
            filename = 'data/mimic_test_transformed.tfrecords'
    else:
        if (data_split == 'train'):
            filename = 'data/mimic_train.tfrecords'
        elif(data_split == 'val'):
            filename = 'data/mimic_val.tfrecords'
        else:
            filename = 'data/mimic_test.tfrecords'

    raw_dataset = tf.data.TFRecordDataset(filename)
    for raw_record in raw_dataset:
        sub_y = []

        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())

        ethnicity = example.features.feature['race'].int64_list.value[0]

        if not (ethnicity == 0 or ethnicity == 1 or ethnicity == 4):
            continue

        if (feature == 'race'):
            ethnicity = example.features.feature['race'].int64_list.value[0]

            if (ethnicity == 0):
                label = [1, 0, 0]
            elif (ethnicity == 1):
                label = [0, 1, 0]
            elif (ethnicity == 4):
                label = [0, 0, 1]
            else:
                continue

        elif (feature == 'age'):

            age = example.features.feature['age'].int64_list.value[0]
            if (age > 0):
                age -= 1

            if (age == 0):
                label = [1, 0, 0, 0]
            elif (age == 1):
                label = [0, 1, 0, 0]
            elif (age == 2):
                label = [0, 0, 1 ,0]
            elif (age == 3):
                label = [0, 0, 0, 1]
            else:
                continue

        elif (feature == 'gender'):

            gender = example.features.feature['gender'].int64_list.value[0]

            if (gender == 0):
                label = [1, 0]
            elif (gender == 1):
                label = [0, 1]
            else:
                continue

        else:
            continue

        if not (only_labels):

            nparr = np.fromstring(example.features.feature['jpg_bytes'].bytes_list.value[0], np.uint8)
            img_np = cv.imdecode(nparr, cv.IMREAD_GRAYSCALE)

            X_test.append(np.reshape(st.resize(img_np, (224, 224)), (224, 224, 1)))

        y_test.append(label)

    if (only_labels):
        return np.array(y_test)
    else:
        return np.array(X_test), np.array(y_test)

In [None]:
df = pd.read_csv('../Data/Chexpert_demo.csv', index_col=0)

def get_demo_data_chexpert(data_split='test', feature='race', only_labels=False, random_aug=False):
    X_test = []
    y_test = []


    if (data_split == 'train'):
        filename = '../Data/Chexpert_train.tfrecords'
    else:
        filename = '../Data/Chexpert_test.tfrecords'

    raw_dataset = tf.data.TFRecordDataset(filename)
    for raw_record in raw_dataset:
        sub_y = []

        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())

        ethnicity = example.features.feature['race'].int64_list.value[0]

        if not (ethnicity == 0 or ethnicity == 1 or ethnicity == 4):
            continue

        if (feature == 'race'):
            ethnicity = example.features.feature['race'].int64_list.value[0]

            if (ethnicity == 0):
                label = [1, 0, 0]
            elif (ethnicity == 1):
                label = [0, 1, 0]
            elif (ethnicity == 4):
                label = [0, 0, 1]
            else:
                continue

        elif (feature == 'age'):
            age = get_age_interval(df.loc[df['PATIENT']=='patient{i}'.format(i=id.zfill(5)), 'AGE_AT_CXR'].values[0])

            if (age == 0):
                label = [1, 0, 0, 0]
            elif (age == 1):
                label = [0, 1, 0, 0]
            elif (age == 2):
                label = [0, 0, 1 ,0]
            elif (age == 3):
                label = [0, 0, 0, 1]
            else:
                continue

        elif (feature == 'gender'):
            gender = get_gender(df.loc[df['PATIENT']=='patient{i}'.format(i=id.zfill(5)), 'GENDER'].values[0])

            if (gender == 0):
                label = [1, 0]
            elif (gender == 1):
                label = [0, 1]
            else:
                continue

        else:
            continue

        if not (only_labels):

            nparr = np.fromstring(example.features.feature['jpg_bytes'].bytes_list.value[0], np.uint8)
            img_np = cv.imdecode(nparr, cv.IMREAD_GRAYSCALE)

            if (random_aug==True):
                rand = np.random.randint(0, 4)
                if (rand == 0):
                    seed = np.random.uniform(-np.pi/4, np.pi/4)
                    img_np = shear_transform(seed, img_np)
                elif (rand == 1):
                    angle = np.random.uniform(-90, 90)
                    img_np = rotation_transformation(angle, img_np)
                elif (rand == 2):
                    img_np = fish(img_np, 0.4)
                else:
                    seed = np.random.uniform(0.4, 1)
                    img_np = scaling_transformation(seed, img_np)

            X_test.append(np.reshape(st.resize(img_np, (224, 224)), (224, 224, 1)))

        y_test.append(label)

    if (only_labels):
        return np.array(y_test)
    else:
        return np.array(X_test), np.array(y_test)

In [7]:
model = define_model_diseases()

# model.load_weights('checkpoints/model_mimic_combine')
model.load_weights('checkpoints/model_original')
#     model.load_weights('checkpoints/model_chexpert')
#     model.load_weights('checkpoints/model_chexpert_combine')

input_layer = model.get_layer(model.layers[0].name)
upsampling = model.get_layer(model.layers[1].name)
denset121 = model.get_layer(model.layers[2].name)

Dnet = tf.keras.Sequential()
Dnet.add(input_layer)
Dnet.add(upsampling)
Dnet.add(tf.keras.Model(denset121.inputs, denset121.get_layer('max_pool').output))

In [10]:
X_train_, y_train = get_demo_data_mimic(data_split='train', random_aug=False)
X_train = Dnet.predict(X_train_)

del X_train_, y_train
gc.collect()

X_test_, y_test = get_demo_data_mimic(data_split='test', random_aug=False)
X_test = Dnet.predict(X_test_)

del X_test_, y_test
gc.collect()

In [None]:
Label_age = ['-40', '40-60', '60-80', '80-']

y_train = get_demo_data_mimic(data_split='train', feature='age', only_labels=True)

y_test = get_demo_data_mimic(data_split='test', feature='age', only_labels=True)

clf = OneVsRestClassifier(LogisticRegression()).fit(X_train, y_train)

y_preds = clf.predict_proba(X_test)

test(y_preds, y_test)

#     for i, l in enumerate(Label_age):
#         print(l)
#         test(y_preds[:, i], y_test[:, i])

Label_gender = ['Female', 'Male']

# X_train = Dnet.predict(X_train_)
# X_test = Dnet.predict(X_test_)

y_train = get_demo_data_mimic(data_split='train', feature='gender', only_labels=True)

y_test = get_demo_data_mimic(data_split='test', feature='gender', only_labels=True)

clf = OneVsRestClassifier(LogisticRegression()).fit(X_train, y_train)

y_preds = clf.predict_proba(X_test)

test(y_preds, y_test)

Label_race = ['WHITE', 'BLACK', 'ASIAN']

# X_train = Dnet.predict(X_train_)
# X_test = Dnet.predict(X_test_)

y_train = get_demo_data_mimic(data_split='train', feature='race', only_labels=True)

y_test = get_demo_data_mimic(data_split='test', feature='race', only_labels=True)

clf = OneVsRestClassifier(LogisticRegression()).fit(X_train, y_train)

y_preds = clf.predict_proba(X_test)

test(y_preds, y_test)


del X_train, X_test
gc.collect()

In [None]:
guess = np.random.randint(0, 2, (58615,))
guess = np.eye(2)[guess]

test(guess, y_test)

plot(guess, y_test, Label_gender)

In [None]:
guess = np.random.randint(0, 4, (58615,))
guess = np.eye(4)[guess]

test(guess, y_test)

plot(guess, y_test, Label_age)

In [None]:
guess = np.random.randint(0, 3, (58615,))
guess = np.eye(3)[guess]

test(guess, y_test)

plot(guess, y_test, Labels_race)