In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/My Drive/hackathons/DATASET_MED_My
%ls

Mounted at /content/gdrive
/content/gdrive/.shortcut-targets-by-id/1EMMRWEB_9gCSmnaLRszJOWMg0gcogEsh/hackathons/DATASET_MED_My
CRX14.ipynb      [0m[01;34mmodels[0m/       [01;34msrc[0m/                        [01;34mWith_bbox[0m/
environment.yml  [01;34mOnly_labels[0m/  test_labels_file__hack.txt
LICENSE          README.md     Untitled0.ipynb


In [None]:
import glob
import gzip
import os
import tarfile
import time
import warnings
from urllib.request import urlretrieve

import pandas as pd

import keras
from keras.applications import DenseNet121, ResNet50
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Dense, Flatten
from keras.metrics import AUC
from keras.models import load_model, Model
from keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

import tensorflow as tf
tf.test.is_gpu_available()

# import efficientnet.keras as efn

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

In [None]:
import tensorflow as tf
tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

In [None]:
ROOT_DIR = '.'
DATA_PATH = 'Only_labels/Images'
CHECKPOINT_PATH = 'models'

SAMPLE_RATE = 1.00
EPOCHS = 50
BATCH_SIZE = 64
CHECKPOINT_RATE = 2

CLASSES = [
  'Hernia',
  'Pneumonia',
  'Fibrosis',
  'Edema',
  'Emphysema',
  'Cardiomegaly',
  'Pleural_Thickening',
  'Consolidation',
  'Pneumothorax',
  'Mass',
  'Nodule',
  'Atelectasis',
  'Effusion',
  'Infiltration'
]

df=pd.read_csv('Only_labels/Images/labels.csv')

# Preprop

In [None]:
train_df, test_df = train_test_split(df, test_size=0.2)

print('Training/Validation Samples:  {}'.format(len(train_df)))
print('Test Samples:  {}'.format(len(test_df)))

Training/Validation Samples:  17155
Test Samples:  4289


In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.1
)
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=None,
    x_col='Image Index',
    y_col=CLASSES,
    subset='training',
    batch_size=BATCH_SIZE,
    shuffle=True,
    class_mode='raw',
    #classes=CLASSES,
    target_size=(224, 224)
)

Found 15440 validated image filenames.


In [None]:
valid_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=None,
    x_col='Image Index',
    y_col=CLASSES,
    subset='validation',
    batch_size=BATCH_SIZE,
    shuffle=True,
    class_mode='raw',
    #classes=[],
    target_size=(224, 224)
)

Found 1715 validated image filenames.


In [None]:
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=None,
    x_col='Image Index',
    y_col=CLASSES,
    batch_size=BATCH_SIZE,
    shuffle=False,
    class_mode='raw',
    #classes=[],
    target_size=(224, 224)
)

Found 4289 validated image filenames.


# Modeling

In [None]:
class TimeHistory(keras.callbacks.Callback):


    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.time()

    def on_epoch_end(self, batch, logs={}):
        self.times.append(time.time() - self.epoch_time_start)

In [None]:
resnet_base = ResNet50(
    include_top=False,
    weights='imagenet',
    input_shape=(224, 224, 3),
    pooling='avg'
)
output = Dense(14, activation='sigmoid')(resnet_base.output)

resnet = Model(resnet_base.input, output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
resnet.summary()


Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
_______________________________________________________________________________________

In [None]:
resnet.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['binary_accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]
)

In [None]:
resnet_time = TimeHistory()
resnet_stopping = EarlyStopping(patience=5, restore_best_weights=True)
resnet_checkpoint = ModelCheckpoint(filepath=CHECKPOINT_PATH + '/resnet-best_new.hdf5', 
                                    save_best_only=True)

resnet_history = resnet.fit_generator(
    generator=train_generator,
    epochs=EPOCHS,
    shuffle=True,
    validation_data=valid_generator,
    callbacks=[resnet_time, resnet_stopping, resnet_checkpoint]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50


In [None]:
resnet = load_model(CHECKPOINT_PATH + '/resnet-best_new.hdf5', 
                    compile=False)

resnet_pred = resnet.predict_generator(
    generator=test_generator,
    verbose=1
)

Instructions for updating:
Please use Model.predict, which supports generators.


In [None]:
for idx, cls in enumerate(CLASSES):
    print('{} AUC:  '.format(cls), roc_auc_score(test_df[cls], resnet_pred[:,idx]))

Hernia AUC:   0.9007534388608558
Pneumonia AUC:   0.7378528799819113
Fibrosis AUC:   0.7651106997549919
Edema AUC:   0.8728330679354107
Emphysema AUC:   0.8202901587467885
Cardiomegaly AUC:   0.9013198499738879
Pleural_Thickening AUC:   0.6934678053660496
Consolidation AUC:   0.735363198964043
Pneumothorax AUC:   0.7895499594325608
Mass AUC:   0.7445542944485999
Nodule AUC:   0.7107298988270294
Atelectasis AUC:   0.7247192476325526
Effusion AUC:   0.7721840580139895
Infiltration AUC:   0.6665275245525459
