In [95]:
import numpy as np
import pandas as pd
import os
from glob import glob
import matplotlib.pyplot as plt
from itertools import chain
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.applications.mobilenet import MobileNet
from keras.layers import GlobalAveragePooling2D, Dense, Dropout
from keras.models import Sequential

In [96]:
# Load and preprocess data
all_xray_df = pd.read_csv("E:/A__CVPR/Dataset/bbox/Data_Entry_2017.csv")
all_image_paths = {os.path.basename(x): x for x in glob(os.path.join('D:/New CX/CXR8/images', '*.png'))}
print('Scans found:', len(all_image_paths), ', Total Headers', all_xray_df.shape[0])
all_xray_df['path'] = all_xray_df['Image Index'].map(all_image_paths.get)

Scans found: 112120 , Total Headers 112120


In [97]:
label_counts = all_xray_df['Finding Labels'].value_counts()[:15]
all_xray_df['Finding Labels'] = all_xray_df['Finding Labels'].map(lambda x: x.replace('No Finding', ''))
all_labels = np.unique(list(chain(*all_xray_df['Finding Labels'].map(lambda x: x.split('|')).tolist())))
all_labels = [x for x in all_labels if len(x) > 0]
print('All Labels ({}): {}'.format(len(all_labels), all_labels))
for c_label in all_labels:
    if len(c_label) > 1:
        all_xray_df[c_label] = all_xray_df['Finding Labels'].map(lambda finding: 1.0 if c_label in finding else 0)

All Labels (14): ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'Nodule', 'Pleural_Thickening', 'Pneumonia', 'Pneumothorax']


In [98]:
MIN_CASES = 1000
all_labels = [c_label for c_label in all_labels if all_xray_df[c_label].sum() > MIN_CASES]
print('Clean Labels ({})'.format(len(all_labels)), [(c_label, int(all_xray_df[c_label].sum())) for c_label in all_labels])

Clean Labels (13) [('Atelectasis', 11559), ('Cardiomegaly', 2776), ('Consolidation', 4667), ('Edema', 2303), ('Effusion', 13317), ('Emphysema', 2516), ('Fibrosis', 1686), ('Infiltration', 19894), ('Mass', 5782), ('Nodule', 6331), ('Pleural_Thickening', 3385), ('Pneumonia', 1431), ('Pneumothorax', 5302)]


In [99]:
sample_weights = all_xray_df['Finding Labels'].map(lambda x: len(x.split('|')) if len(x) > 0 else 0).values + 4e-2
sample_weights /= sample_weights.sum()
all_xray_df = all_xray_df.sample(40000, weights=sample_weights)

In [100]:
train_df, valid_df = train_test_split(all_xray_df, test_size=0.25, random_state=2018, stratify=all_xray_df['Finding Labels'].map(lambda x: x[:4]))
print('train', train_df.shape[0], 'validation', valid_df.shape[0])
valid_df['newLabel'] = valid_df.apply(lambda x: x['Finding Labels'].split('|'), axis=1)
train_df['newLabel'] = train_df.apply(lambda x: x['Finding Labels'].split('|'), axis=1)

train 30000 validation 10000


In [101]:
core_idg = ImageDataGenerator(samplewise_center=True, samplewise_std_normalization=True, horizontal_flip=True, vertical_flip=False, height_shift_range=0.05, width_shift_range=0.1, rotation_range=5, shear_range=0.1, fill_mode='reflect', zoom_range=0.15)

In [102]:
weight_path = "{}_weights.best.hdf5".format('xray_class')
checkpoint = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True)
early = EarlyStopping(monitor="val_loss", mode="min", patience=5)
callbacks_list = [checkpoint, early]

In [103]:
# Define the model creation function
def MakeModel(IMG_SIZE, bs, channels=1):
    model = Sequential()
    base_mobilenet_model = MobileNet(input_shape=(*IMG_SIZE, channels), include_top=False, weights=None)
    model.add(base_mobilenet_model)
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.5))
    model.add(Dense(512))
    model.add(Dropout(0.5))
    model.add(Dense(len(all_labels), activation='sigmoid'))
    return model

In [104]:
IMG_SIZE_LIST = [(1024, 1024), (512, 512), (256, 256), (224, 224), (192, 192), (128, 128), (64, 64)]
BATCH_SIZE_LIST = [4, 8, 16, 32, 32, 32, 64]
STEPS_PER_EPOCH = 10000
EPOCHS = 10

In [109]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

class AdamAccumulate(Adam):
    def __init__(self, lr=0.001, accum_iters=1, **kwargs):
        super(AdamAccumulate, self).__init__(learning_rate=lr, **kwargs)
        self.accum_iters = accum_iters
        self.iterations = tf.Variable(0, dtype=tf.int64, trainable=False)
        self.accum_grads = None

    def apply_gradients(self, grads_and_vars, name=None, experimental_aggregate_gradients=True):
        if self.accum_grads is None:
            self.accum_grads = [tf.Variable(tf.zeros_like(var), trainable=False) for grad, var in grads_and_vars]

        for (grad, var), accum_grad in zip(grads_and_vars, self.accum_grads):
            accum_grad.assign_add(grad)

        self.iterations.assign_add(1)

        def apply_accumulated_grads():
            apply_grads = [(accum_grad / tf.cast(self.accum_iters, tf.float32), var) for accum_grad, (grad, var) in zip(self.accum_grads, grads_and_vars)]
            super(AdamAccumulate, self).apply_gradients(apply_grads, name, experimental_aggregate_gradients)
            for accum_grad in self.accum_grads:
                accum_grad.assign(tf.zeros_like(accum_grad))

        tf.cond(tf.equal(self.iterations % self.accum_iters, 0), apply_accumulated_grads, lambda: None)

In [110]:

# Training loop
run_this_code = True
if run_this_code:
    train_results = defaultdict(dict)
    test_results = defaultdict(dict)
    
    lr = 0.0005
    syntheticBatch = 256
    
    plt.figure(figsize=(20, 5))

    for imageSize, batchSize in zip(IMG_SIZE_LIST, BATCH_SIZE_LIST):
        batch = int(256 / batchSize)
        
        train_gen = core_idg.flow_from_dataframe(dataframe=train_df, directory=None, x_col='path', y_col='newLabel', class_mode='categorical', classes=all_labels, target_size=imageSize, color_mode='grayscale', batch_size=batchSize)
        valid_gen = core_idg.flow_from_dataframe(dataframe=valid_df, directory=None, x_col='path', y_col='newLabel', class_mode='categorical', classes=all_labels, target_size=imageSize, color_mode='grayscale', batch_size=batchSize)

        print('Running Image Size:', imageSize, 'Running Batch size:', batchSize, 'Learning Rate:', lr)
        
        predictions_train = pd.DataFrame()
        predictions_test = pd.DataFrame()

        opt = AdamAccumulate(lr=lr, accum_iters=batch)
        
        multi_disease_model = MakeModel(imageSize, batchSize)
        multi_disease_model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['binary_accuracy', 'mae'])

        history = multi_disease_model.fit(train_gen, steps_per_epoch=STEPS_PER_EPOCH, validation_data=valid_gen, epochs=EPOCHS, callbacks=callbacks_list, validation_steps=1000)

        plt.plot(history.history['val_loss'])

        p = history.history['val_loss'][0]

        del multi_disease_model, history
        gc.collect()
        print('*' * 50)
        print('')

        test_results[imageSize[0]][lr] = p
        imageSizeFile = pd.DataFrame(test_results)
        imageSizeFile.to_csv("imageSize.csv", index=True)

    plt.legend([str(x[0]) for x in IMG_SIZE_LIST], loc='upper right')
    plt.title('model accuracy')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.show()
    plt.savefig('image_size_selection.png', bbox_inches='tight')

Found 28308 validated image filenames belonging to 13 classes.
Found 9438 validated image filenames belonging to 13 classes.
Running Image Size: (1024, 1024) Running Batch size: 4 Learning Rate: 0.0005
Epoch 1/10


UnknownError: Graph execution error:

Detected at node 'mod' defined at (most recent call last):
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\asyncio\base_events.py", line 1909, in _run_once
      handle._run()
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
      await result
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\ipykernel\ipkernel.py", line 359, in execute_request
      await super().execute_request(stream, ident, parent)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\ipykernel\ipkernel.py", line 446, in do_execute
      res = shell.run_cell(
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell
      result = self._run_cell(
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell
      result = runner(coro)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Mohammad Sheakh\AppData\Local\Temp\ipykernel_13488\752811114.py", line 28, in <module>
      history = multi_disease_model.fit(train_gen, steps_per_epoch=STEPS_PER_EPOCH, validation_data=valid_gen, epochs=EPOCHS, callbacks=callbacks_list, validation_steps=1000)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\keras\engine\training.py", line 997, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "d:\Application Installed\anacondaa\envs\sheakh310\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 579, in minimize
    File "C:\Users\Mohammad Sheakh\AppData\Local\Temp\ipykernel_13488\3721505913.py", line 26, in apply_gradients
      tf.cond(tf.equal(self.iterations % self.accum_iters, 0), apply_accumulated_grads, lambda: None)
Node: 'mod'
JIT compilation failed.
	 [[{{node mod}}]] [Op:__inference_train_function_60538]

<Figure size 2000x500 with 0 Axes>