In [1]:
from keras.models import Sequential
"""Import from keras_preprocessing not from keras.preprocessing, because Keras may or maynot contain the features discussed here depending upon when you read this article, until the keras_preprocessed library is updated in Keras use the github version."""
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.engine import Input, Model
from keras import regularizers, optimizers
import pandas as pd
import numpy as np
from itertools import chain

Using TensorFlow backend.


In [2]:
# Load data
BBox_df = pd.read_csv('../BBox_List_2017.csv',dtype=str)
de_df = pd.read_csv('../Data_Entry_2017.csv',dtype=str)
train_filter = pd.read_csv('../train_val_list.txt',dtype=str)
train_filter.columns=['Image Index']
test_filter = pd.read_csv('../test_list.txt',dtype=str)
test_filter.columns=['Image Index']

#Clean Data

# remove people over 100, Found some people that are labeled "412" years old
bad_ages = de_df[de_df['Patient Age'].astype(int) >= 100].index
de_df = de_df.drop(bad_ages)

# Break the | into columns
all_labels = np.unique(list(chain(*de_df['Finding Labels'].map(lambda x: x.split('|')).tolist())))
all_labels = [x for x in all_labels if len(x) > 0]

# Take the Labels and one hot encode them
for c_label in all_labels:
    if len(c_label)>1: # leave out empty labels
        de_df[c_label] = de_df['Finding Labels'].map(lambda finding: 1 if c_label in finding else 0)

# remove the unneeded columns from the df
de_df = de_df.drop(columns=['Finding Labels','Unnamed: 11','Patient Gender','View Position','Patient Age','Patient ID','Follow-up #','OriginalImagePixelSpacing[x','y]','OriginalImage[Width','Height]'])
NoF_df = de_df.sample(20000)

all_labels.remove('No Finding')

print('All Labels ({}): {}'.format(len(all_labels), all_labels))


All Labels (14): ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Effusion', 'Emphysema', 'Fibrosis', 'Hernia', 'Infiltration', 'Mass', 'Nodule', 'Pleural_Thickening', 'Pneumonia', 'Pneumothorax']


In [3]:
de_df.head()

Unnamed: 0,Image Index,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,00000001_000.png,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,00000001_001.png,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0
2,00000001_002.png,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
3,00000002_000.png,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,00000003_000.png,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [4]:
datagen=ImageDataGenerator(validation_split=0.2, rescale=1./255)
test_datagen=ImageDataGenerator(rescale=1./255)

# Set this to use the included split with the data set.
train_df = pd.merge(train_filter,de_df , on='Image Index', how='inner')
test_df = pd.merge(test_filter,de_df , on='Image Index', how='inner')

batch_fit = 512

train_generator=datagen.flow_from_dataframe(
                dataframe=train_df,
                directory="../images/",
                x_col="Image Index",
                y_col=all_labels,
                batch_size=batch_fit,
                seed=42,
                shuffle=True,
                class_mode= "other",
                subset='training',
                target_size=(128,128),
                color_mode="grayscale")


valid_generator=datagen.flow_from_dataframe(
                dataframe=train_df,
                directory="../images/",
                x_col="Image Index",
                y_col=all_labels,
                batch_size=batch_fit,
                seed=42,
                shuffle=True,
                class_mode= "other",
                subset='validation',
                target_size=(128,128),
                color_mode="grayscale")

test_generator=test_datagen.flow_from_dataframe(
                dataframe=test_df,
                directory="../images/",
                x_col="Image Index",
                batch_size=all_labels,
                seed=42,
                shuffle=False,
                class_mode=None,
                target_size=(128,128),
                color_mode="grayscale")

Found 69209 validated image filenames.
Found 17302 validated image filenames.
Found 25591 validated image filenames.


In [5]:
inp = Input(shape=(128,128,1))
x = Conv2D(32, (3, 3), padding = 'same')(inp)
x = Activation('relu')(x)
x = Conv2D(32, (3, 3))(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
x = Dropout(0.25)(x)
x = Conv2D(64, (3, 3), padding = 'same')(x)
x = Activation('relu')(x)
x = Conv2D(64, (3, 3))(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(256)(x)
x = Activation('relu')(x)
x = Dropout(0.25)(x)

output1 = Dense(1, activation = 'sigmoid')(x)
output2 = Dense(1, activation = 'sigmoid')(x)
output3 = Dense(1, activation = 'sigmoid')(x)
output4 = Dense(1, activation = 'sigmoid')(x)
output5 = Dense(1, activation = 'sigmoid')(x)
output6 = Dense(1, activation = 'sigmoid')(x)
output7 = Dense(1, activation = 'sigmoid')(x)
output8 = Dense(1, activation = 'sigmoid')(x)
output9 = Dense(1, activation = 'sigmoid')(x)
output10 = Dense(1, activation = 'sigmoid')(x)
output11 = Dense(1, activation = 'sigmoid')(x)
output12 = Dense(1, activation = 'sigmoid')(x)
output13 = Dense(1, activation = 'sigmoid')(x)
output14 = Dense(1, activation = 'sigmoid')(x)

model = Model(inp,[output1,
                   output2,
                   output3,
                   output4,
                   output5,
                   output6,
                   output7,
                   output8,
                   output9,
                   output10,
                   output11,
                   output12,
                   output13,
                   output14
                  ])

model.compile(optimizers.rmsprop(lr = 0.001),
loss = ["binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy",
        "binary_crossentropy"],metrics = ["accuracy"])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [6]:
def generator_wrapper(generator):
    for batch_x,batch_y in generator:
        # 14 for the, 14 labels
        yield (batch_x,[batch_y[:,i] for i in range(14)])

In [7]:
STEP_SIZE_TRAIN=train_generator.n // train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n // valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n // batch_fit


model.fit_generator(generator=generator_wrapper(train_generator),
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=generator_wrapper(valid_generator),
                    validation_steps=STEP_SIZE_VALID,
                    epochs=4,
                    verbose=1)

Instructions for updating:
Use tf.cast instead.
Epoch 1/4
  1/135 [..............................] - ETA: 28:30 - loss: 9.9333 - dense_2_loss: 0.7010 - dense_3_loss: 0.7167 - dense_4_loss: 0.6680 - dense_5_loss: 0.7615 - dense_6_loss: 0.7296 - dense_7_loss: 0.7664 - dense_8_loss: 0.6938 - dense_9_loss: 0.5833 - dense_10_loss: 0.7284 - dense_11_loss: 0.7445 - dense_12_loss: 0.6904 - dense_13_loss: 0.7709 - dense_14_loss: 0.6901 - dense_15_loss: 0.6887 - dense_2_acc: 0.4434 - dense_3_acc: 0.2910 - dense_4_acc: 0.7188 - dense_5_acc: 0.0879 - dense_6_acc: 0.2422 - dense_7_acc: 0.0410 - dense_8_acc: 0.4980 - dense_9_acc: 0.9941 - dense_10_acc: 0.2383 - dense_11_acc: 0.1250 - dense_12_acc: 0.5547 - dense_13_acc: 0.0488 - dense_14_acc: 0.5293 - dense_15_acc: 0.5547

RuntimeError: Your generator is NOT thread-safe.Keras requires a thread-safe generator when`use_multiprocessing=False, workers > 1`.For more information see issue #1638.

In [None]:
test_generator.reset()

In [None]:
pred = model.predict_generator(test_generator,
                                workers = 24,
                                steps=STEP_SIZE_TEST,
                                verbose=1)

In [None]:
model.save(f'Models3/MOSM.h5')