In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import cv2 as cv
import warnings
from tensorflow.keras import layers, models
from tensorflow.keras import backend as K
import itertools
import skimage.transform as st
from enum import Enum
from sklearn.metrics import roc_curve, auc, classification_report, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns

print(tf.__version__)
warnings.filterwarnings("ignore")

# gpus = tf.config.list_physical_devices(device_type='GPU')
# tf.config.set_visible_devices(devices=gpus[0], device_type='GPU')

from google.colab import drive
drive.mount('/content/drive')

2.5.0
Mounted at /content/drive


# Dnet

In [8]:
def get_data(train=True):
    X_test = []
    y_test = []
    
    if (train):
        filename = '/content/drive/MyDrive/works/Data/mimic_train.tfrecords'
    else:
        filename = '/content/drive/MyDrive/works/Data/mimic_test.tfrecords'
    
    raw_dataset = tf.data.TFRecordDataset(filename)
    for raw_record in raw_dataset:
        sub_y = []

        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
                
        ethnicity = example.features.feature['race'].float_list.value[0] 
        
        No_Finding = 1 if example.features.feature['No Finding'].float_list.value[0] == 1 else 0    

        if (No_Finding):
            
            if (tf.math.equal(ethnicity, 0)):
                label = tf.constant([1, 0, 0])
            elif (tf.math.equal(ethnicity, 1)):
                label = tf.constant([0, 1, 0])
            elif (tf.math.equal(ethnicity, 4)):
                label = tf.constant([0, 0, 1])
            else:
                continue

            nparr = np.fromstring(example.features.feature['jpg_bytes'].bytes_list.value[0], np.uint8)
            img_np = cv.imdecode(nparr, cv.IMREAD_GRAYSCALE)

            X_test.append(st.resize(img_np, (256, 256)))

            y_test.append(label)
        
    return np.array(X_test), np.array(y_test)

In [3]:
def test(y_preds, y_test):
    
    n_bootstraps = 1000
    rng_seed = 42  # control reproducibility
    bootstrapped_scores = []

    rng = np.random.RandomState(rng_seed)
    for i in range(n_bootstraps):
        # bootstrap by sampling with replacement on the prediction indices
        indices = rng.randint(0, len(y_preds), len(y_preds))
        if len(np.unique(y_test[indices])) < 2:
            # We need at least one positive and one negative sample for ROC AUC
            # to be defined: reject the sample
            continue

        score = roc_auc_score(y_test[indices], y_preds[indices])
        bootstrapped_scores.append(score)
        
    plt.hist(bootstrapped_scores, bins=100)
    plt.title('Histogram of the bootstrapped ROC AUC scores')
    plt.show()
    
    auc_score = np.array(bootstrapped_scores)
    
    mean_score = auc_score.mean()
    std_dev = auc_score.std()
    std_error = std_dev / np.math.sqrt(1)
    ci =  2.262 * std_error
    lower_bound = mean_score - ci
    upper_bound = mean_score + ci

    print("Sample auc mean: {:0.2f}". format(mean_score))
    print("Samole auc std: {:0.2f}".format(std_dev))
    print("Sample auc CI: {:0.2f}". format(ci))
    print("Confidence interval for the score: [{:0.2f} - {:0.2f}]".format(
        lower_bound, upper_bound))

In [None]:
INPUT_SHAPE = (256, 256, 1)

def swish_activation(x):
    return (K.sigmoid(x) * x)

def define_model():
    base_model = tf.keras.applications.densenet.DenseNet121(
            include_top=False, weights=None, input_shape=INPUT_SHAPE, pooling='max')
            
    pred_layer = tf.keras.layers.Dense(3, activation='softmax')(base_model.output)
 
    model = tf.keras.Model(inputs=base_model.input, outputs=pred_layer, name='model')  
  
    return model

In [None]:
model = define_model()

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 256, 256, 1) 0                                            
__________________________________________________________________________________________________
zero_padding2d (ZeroPadding2D)  (None, 262, 262, 1)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1/conv (Conv2D)             (None, 128, 128, 64) 3136        zero_padding2d[0][0]             
__________________________________________________________________________________________________
conv1/bn (BatchNormalization)   (None, 128, 128, 64) 256         conv1/conv[0][0]                 
______________________________________________________________________________________________

In [None]:
def scheduler(epoch, lr):
    if epoch % 2 == 0:
        return lr * tf.math.exp(-0.05)
    else:
        return lr

callback = [tf.keras.callbacks.LearningRateScheduler(scheduler)]

model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
                 optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics='AUC')

X_train, y_train = get_data()

model.fit(X_train, y_train, epochs=10, shuffle=True, callbacks=callback)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f1bb00ee950>

In [4]:
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot
 
    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix
 
    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']
 
    title:        the text to display at the top of the matrix
 
    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues
 
    normalize:    If False, plot the raw numbers
                  If True, plot the proportions
 
    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph
 
    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
 
    """
 
    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy
 
    if cmap is None:
        cmap = plt.get_cmap('Blues')
 
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
 
    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)
 
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
 
 
    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
 
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    
    plt.show()
    
def plot_roc(y_test, preds, title, label):
    fig = plt.figure(figsize=(8,6))

    num = len(label)
    for i in range(num):
        fpr, tpr, _ = roc_curve(y_test[:, i], preds[:, i])
        roc_auc = auc(fpr, tpr)
        # plot the roc curve for the model
        plt.plot(fpr, tpr, linestyle='solid', label='{} AUC={:.3f}'.format(label[i], roc_auc))

    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.plot([0,1], [0,1], color='orange', linestyle='--')
    
    plt.savefig('Dnet_race_wba_auc.svg')
    
    plt.show()

In [5]:
Labels_race = ['WHITE', 'AFRICAN AMERICAN', 'ASIA']

def plot(y_preds, y_test):

    plot_roc(y_test, y_preds, 'ROC', Labels_race)

    cm_race = [0 for i in range(len(Labels_race))]
    for i in range(len(Labels_race)):
        cm_race[i] = [0 for j in range(len(Labels_race))]

    preds = tf.one_hot(tf.argmax(y_preds, axis=1), len(Labels_race))
    for i in range(len(y_test)):
        cm_race[np.argmax(preds[i])][np.argmax(y_test[i])] += 1

    plot_confusion_matrix(np.array(cm_race).transpose(), 
                          normalize = False,
                          target_names = Labels_race,
                          title = 'CM')

In [6]:
!unzip '/content/drive/MyDrive/works/Dnet_race_No_finding_V2.zip'

Archive:  /content/drive/MyDrive/works/Dnet_race_No_finding_V2.zip
   creating: saved_model/Dnet_race_No_finding/
   creating: saved_model/Dnet_race_No_finding/assets/
  inflating: saved_model/Dnet_race_No_finding/saved_model.pb  
   creating: saved_model/Dnet_race_No_finding/variables/
  inflating: saved_model/Dnet_race_No_finding/variables/variables.index  
  inflating: saved_model/Dnet_race_No_finding/variables/variables.data-00000-of-00001  


In [None]:
# model = tf.keras.models.load_model('/content/saved_model/Dnet_race_No_finding')

X_test, y_test = get_data(train=False)

y_preds = model.predict(X_test)
test(y_preds, y_test)
plot(y_preds, y_test)

In [None]:
model.save('Dnet_race_No_finding')

INFO:tensorflow:Assets written to: Dnet_race_No_finding/assets
