In [1]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if "GPU" not in device_name:
    print("GPU device not found")
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
import os
import numpy as np
import pandas as pd
from glob import glob
from itertools import chain
from sklearn.metrics import roc_curve, auc, roc_auc_score, accuracy_score, average_precision_score
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
import cv2

import tensorflow as tf
os.getcwd()

Using TensorFlow backend.


'/kaggle/working'

In [3]:
data_directory = '/kaggle/input/data/'
image_size = 256
batch_size = 32
os.chdir(data_directory)

In [4]:
df = pd.read_csv('Data_Entry_2017.csv')
os.chdir('/kaggle/working')

In [5]:
data_image_paths = {os.path.basename(x): x for x in glob(os.path.join(data_directory, 'images*', '*', '*.png'))}
df['path'] = df['Image Index'].map(data_image_paths.get)
df.head(5)

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,y],Unnamed: 11,path
0,00000001_000.png,Cardiomegaly,0,1,58,M,PA,2682,2749,0.143,0.143,,/kaggle/input/data/images_001/images/00000001_...
1,00000001_001.png,Cardiomegaly|Emphysema,1,1,58,M,PA,2894,2729,0.143,0.143,,/kaggle/input/data/images_001/images/00000001_...
2,00000001_002.png,Cardiomegaly|Effusion,2,1,58,M,PA,2500,2048,0.168,0.168,,/kaggle/input/data/images_001/images/00000001_...
3,00000002_000.png,No Finding,0,2,81,M,PA,2500,2048,0.171,0.171,,/kaggle/input/data/images_001/images/00000002_...
4,00000003_000.png,Hernia,0,3,81,F,PA,2582,2991,0.143,0.143,,/kaggle/input/data/images_001/images/00000003_...


In [6]:
# generating labels for binary classificatiion
l=[]
for i in list(df['Finding Labels']):
    if i=='Effusion':
        l.append('Present')
    else:
        l.append('Absent')
df['labels']=l

In [7]:
labels=['Absent','Present']
df['Absent']=list(map((lambda x: 1.0 if x=='False' else 0.0),list(df['labels'])))
df['Present']=list(map((lambda x: 0.0 if x=='False' else 1.0),list(df['labels'])))

In [8]:
# Removing normal/NO-Finding labelled images and Hernia Images
df=df.drop(df[df['Finding Labels']=='No Finding'].index,axis=0)


In [9]:
#generating more balanced dataset
df1=df.drop(df[df['labels']=='False'].index,axis=0)
df1=df1.reset_index()
df2=df.drop(df[df['labels']=='True'].index,axis=0)
df2=df2.reset_index()
df2=df2.drop('index',axis=1)

df3=df2[df2.index<4000]
df=pd.concat([df1, df3], ignore_index=True)


In [10]:
df = df.sample(frac=1).reset_index(drop=True)
df=df.drop(['index'],axis=1)


In [11]:
train_df, valid_df = train_test_split(df, test_size=0.20, random_state=2018,stratify=df['labels'].map(lambda x: x[:4]))

In [12]:
core_idg = ImageDataGenerator(rescale=1 / 255,
                                  samplewise_center=True,
                                  samplewise_std_normalization=True,
                                  horizontal_flip=True,
                                  vertical_flip=False,
                                  height_shift_range=0.05,
                                  width_shift_range=0.1,
                                  rotation_range=5,
                                  shear_range=0.1,
                                  fill_mode='reflect',
                                  zoom_range=0.15)

train_gen = core_idg.flow_from_dataframe(dataframe=train_df,
                                             directory=None,
                                             x_col='path',
                                             y_col='labels',
                                             class_mode='categorical',
                                             batch_size=batch_size,
                                             classes=labels,
                                             target_size=(image_size, image_size))

valid_gen = core_idg.flow_from_dataframe(dataframe=valid_df,
                                             directory=None,
                                             x_col='path',
                                             y_col='labels',
                                             class_mode='categorical',
                                             batch_size=batch_size,
                                             classes=labels,
                                             target_size=(image_size, image_size))

test_X, test_Y = next(core_idg.flow_from_dataframe(dataframe=valid_df,
                                                       directory=None,
                                                       x_col='path',
                                                       y_col='labels',
                                                       class_mode='categorical',
                                                       batch_size=1024,
                                                       classes=labels,
                                                       target_size=(image_size, image_size)))

Found 44607 validated image filenames belonging to 2 classes.
Found 11152 validated image filenames belonging to 2 classes.
Found 11152 validated image filenames belonging to 2 classes.


In [13]:
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2

base_model = InceptionResNetV2(include_top=False, weights='imagenet', input_shape=(256, 256, 3))
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(len(labels), activation="sigmoid")(x)
model = tf.keras.Model(base_model.input, output)
model.compile(optimizer=tf.keras.optimizers.Adam(), loss='binary_crossentropy', metrics=['accuracy'])

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.7/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [14]:
def get_callbacks(model_name):
    callbacks = []
    tensor_board = tf.keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0)
    callbacks.append(tensor_board)
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'{model_name}.h5',
        verbose=1,
        save_best_only=True)
    callbacks.append(checkpoint)
    return callbacks

In [15]:
callbacks = get_callbacks('multi_class')
model.fit(train_gen,
              steps_per_epoch=100,
              validation_data=(test_X, test_Y),
              epochs=20,
              callbacks=callbacks)

Train for 100 steps, validate on 1024 samples
Epoch 1/20
Epoch 00001: val_loss improved from inf to 10.47966, saving model to multi_class.h5
Epoch 2/20
Epoch 00002: val_loss improved from 10.47966 to 5.03630, saving model to multi_class.h5
Epoch 3/20
Epoch 00003: val_loss improved from 5.03630 to 0.30306, saving model to multi_class.h5
Epoch 4/20
Epoch 00004: val_loss did not improve from 0.30306
Epoch 5/20
Epoch 00005: val_loss improved from 0.30306 to 0.27008, saving model to multi_class.h5
Epoch 6/20
Epoch 00006: val_loss improved from 0.27008 to 0.26353, saving model to multi_class.h5
Epoch 7/20
Epoch 00007: val_loss did not improve from 0.26353
Epoch 8/20
Epoch 00008: val_loss did not improve from 0.26353
Epoch 9/20
Epoch 00009: val_loss did not improve from 0.26353
Epoch 10/20
Epoch 00010: val_loss did not improve from 0.26353
Epoch 11/20
Epoch 00011: val_loss did not improve from 0.26353
Epoch 12/20
Epoch 00012: val_loss improved from 0.26353 to 0.26123, saving model to multi_cl

<tensorflow.python.keras.callbacks.History at 0x7f8db6ec5c90>

In [16]:
y_pred = model.predict(test_X)

In [18]:
y_pred = model.predict(test_X)
predicted = [np.argmax(i) for i in y_pred]
actual=[np.argmax(i) for i in test_Y]