In [0]:
#Files are in Google Drive, so we must make a connection by authorization
from google.colab import drive
drive.mount('/content/drive')

In [0]:
#Check if and which drive is mounted 
import os
os.chdir("/content/drive/")
!ls

In [0]:
#Import necessary modules and install the ones that are not in Colab
from tensorflow import keras
import numpy as np
import glob
import scipy
import pandas as pd
import sklearn
import re
!pip install glob3

In [0]:
#Load the train and validation images
file_train = glob.glob('/content/drive/My Drive/model1/CheXpert-v1.0-small/train/**/study1/*frontal.jpg')


In [0]:
xrays_train = []

for file in file_train:
    img = scipy.ndimage.imread(file)
    img = scipy.misc.imresize(img, (320,320))
    xrays_train.append(img)
    
xrays_train = np.array(xrays_train)
xrays_train = np.expand_dims(xrays_train, axis=3)

In [0]:
xrays_train.shape

In [0]:
#Since images are RGB, and thus three channels, divide by 255 (range 0-255)
#this way we normalize the images. That is, divide each element of the dataset by the total pixel number: 255. 
#Once this is done, the array will have values between 0 and 1.
xrays_train = (xrays_train)/(255)



In [0]:
file_train = [re.sub(".*train.",'',x) for x in file_train]
file_train = [x.replace('\\','/') for x in file_train]
file_train = np.array(file_train)


In [0]:
y_train = pd.read_csv('/content/drive/My Drive/model1/CheXpert-v1.0-small/train.csv')
y1 = y_train['Path'].replace(to_replace='.*train/',value='', regex=True)
y1 = np.array(y1)
y_train.set_index(y1, inplace=True)
y_train = y_train.loc[file_train]

# drop irrelevant columns (i.e. 'sex', 'age') 
# replace all 'Na' and '-1' with a '0'
# get one hot encoding subset, replace NAs and drop uncertainty values
y_train = y_train.iloc[:,5:].fillna(0).replace(-1.0,0)

In [0]:
x = ' '; 

print('Atelectasis',x*11, 'auc: 0.718')
print('Cardiomegaly', x*10, 'auc: 0.735')
print('Consolidation', x*9, 'auc: 0.742')
print('Edema', x*17, 'auc: 0.804')
print('Pleural Effusion', x*6, 'auc: 0.751')

In [0]:
y_train

In [0]:
y_train.sum()

In [0]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import backend as K
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.models import Model, model_from_json
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras import optimizers

#Xception used for feature extraction
def build_model():
    base_model = Xception(weights='imagenet', include_top=False, input_shape=(320,320,3))

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    
    outputs=[]
    for i in range(14):
        exec("output" + str(i).zfill(2) + " = Dense(2, activation='sigmoid', name='output" + str(i).zfill(2) + "')(x)")
        exec("outputs.append(" + "output" + str(i).zfill(2) + ")")  

    model = Model(inputs=base_model.input, outputs=outputs)
   
    
    for layer in base_model.layers:
        layer.trainable = True
    def mean_pred(y_true, y_pred):
        return K.mean(y_pred)
  
    loss_dict = {}
    for i in range(14):
        exec("loss_dict['output"+str(i).zfill(2)+"'] = 'binary_crossentropy'")
    optimizer = tf.keras.optimizers.Adam(lr=0.0001,beta_1 = 0.9, beta_2 = 0.999, epsilon=1e-08)        
    model.compile(optimizer=optimizer, 
                  loss=loss_dict,
                  metrics=["accuracy", mean_pred])
    
    return model


In [0]:
model = build_model()
model.summary()

In [0]:
y_dict_train = {}

for i in range(14):
    exec("y2" + str(i).zfill(2) + " = keras.utils.to_categorical(y_train.iloc[:," + str(i) + "], 2)")
    exec("y_dict_train['output" + str(i).zfill(2) + "'] = y2" + str(i).zfill(2))
 


In [0]:
stack_xrays_train = np.stack((xrays_train,)*3,axis=3)
stack_xrays_train = stack_xrays_train[:,:,:,:,0]

In [0]:
import matplotlib.pyplot as plt 

imgplot = plt.imshow(stack_xrays_train[5])
plt.show()

In [0]:
#To preserve memory
del file_train
del xrays_train

In [0]:

from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

model = build_model()

es_cb = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='auto')
cp_cb = ModelCheckpoint(filepath = '/content/drive/My Drive/model1/CheXpert-v1.0-small/chexpert.hdf5', monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
#tensorboard = TensorBoard(log_dir='/content/drive/My Drive/model1/CheXpert-v1.0-small/MyTensorboard', histogram_freq=1, write_graph=True, write_images=True)
#csv_logger = CSVLogger('/content/drive/My Drive/model1/CheXpert-v1.0-small/logging.csv', append=True, separator=';')

batch_size=12
nb_epoch=10
    
history = model.fit(stack_xrays_train, 
                    y_dict_train, 
                    batch_size=batch_size, 
                    shuffle=True, 
                    epochs=nb_epoch, 
                    validation_split=0.20,
                    callbacks=[cp_cb, 
                               es_cb, 
                               #tensorboard, 
                               #csv_logger
                              ]
                   )

# EVALUATION

In [0]:
file_valid = glob.glob('/content/drive/My Drive/model1/CheXpert-v1.0-small/valid/**/**/*frontal.jpg')

xrays_valid = []

for file in file_valid:
    img = scipy.ndimage.imread(file)
    img = scipy.misc.imresize(img, (320,320))
    xrays_valid.append(img)
    
xrays_valid = np.array(xrays_valid)
xrays_valid = np.expand_dims(xrays_valid, axis=3)


xrays_valid = (xrays_valid)/(255)


file_valid = [re.sub(".*valid.",'',x) for x in file_valid]
file_valid = [x.replace('\\','/') for x in file_valid]
file_valid = np.array(file_valid)



# VALIDATE
# phenotypes
y_valid = pd.read_csv('/content/drive/My Drive/model1/CheXpert-v1.0-small/valid.csv')
y3 = y_valid['Path'].replace(to_replace='.*valid/',value='', regex=True)
y3 = np.array(y3)
y_valid.set_index(y3, inplace=True)
y_valid = y_valid.loc[file_valid]

# get one hot encoding subset, replace NAs and drop uncertainty values
y_valid = y_valid.iloc[:,5:].fillna(0).replace(-1.0,0)



stack_xrays_valid = np.stack((xrays_valid,)*3,axis=3)
stack_xrays_valid = stack_xrays_valid[:,:,:,:,0]

In [0]:
#To preserve memory
del file_valid
del xrays_valid

In [0]:
y_dict_valid = {}
for i in range(14):
    exec("y4" + str(i).zfill(2) + " = keras.utils.to_categorical(y_valid.iloc[:," + str(i) + "], 2)")
    exec("y_dict_valid['output" + str(i).zfill(2) + "'] = y4" + str(i).zfill(2))

In [0]:
from sklearn.metrics import roc_curve, auc
for x, y in (stack_xrays_valid, yo):
    y_pred = model.predict([x])[0]
    fpr, tpr, _ = roc_auc(yi, y_pred)

In [0]:
evaluation = model.evaluate(stack_xrays_valid, y_dict_valid, batch_size=4, verbose=1) 


In [0]:
import matplotlib.pyplot as plt

%matplotlib inline

plt.plot(history.history['loss'],"o-",label="loss",)
plt.plot(history.history['val_loss'],"o-",label="val_loss")
plt.title('Model loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(loc='upper right')
plt.show()

In [0]:
labels = ['No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity',
          'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis',
          'Pneumothorax', 'Pleural Effusion', 'Pleural Other', 'Fracture',
          'Support Devices']

In [0]:
fig = plt.figure(figsize=(16,20))
axes = [] 
for i in range(14):
    axes.append(fig.add_subplot(5, 3, i+1))
    exec("axes[i].plot(history.history['output" + str(i).zfill(2) + "_loss'],'o-',label='" + labels[i] + "_loss')")    
    exec("axes[i].plot(history.history['val_output" + str(i).zfill(2) + "_loss'],'o-',label='val_" + labels[i] + "_loss')")
    axes[i].legend(loc='upper right')

plt.show()

In [0]:
fig = plt.figure(figsize=(16,20))
axes = [] 
for i in range(14):
    axes.append(fig.add_subplot(5, 3, i+1))
    exec("axes[i].plot(history.history['output" + str(i).zfill(2) + "_acc'],'o-',label='" + labels[i] + "_acc')")    
    exec("axes[i].plot(history.history['val_output" + str(i).zfill(2) + "_acc'],'o-',label='val_" + labels[i] + "_acc')")
    axes[i].set_ylim([0, 1])
    axes[i].legend(loc='lower right')

plt.show()

In [0]:
prediction = model.predict(stacked_xrays_valid[24])
print(prediction)