In [1]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if "GPU" not in device_name:
    print("GPU device not found")
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
import os
import numpy as np
import pandas as pd
from glob import glob
from itertools import chain
from sklearn.metrics import roc_curve, auc, roc_auc_score, accuracy_score, average_precision_score
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf

In [3]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [4]:
DATA_DIR = '../input/data/'
image_size = 256
batch_size = 32

## Data preprocessing

### Preprocessing Metadata

In [5]:
df = pd.read_csv(f'{DATA_DIR}Data_Entry_2017.csv')

In [6]:
data_image_paths = {os.path.basename(x): x for x in glob(os.path.join(DATA_DIR, 'images*', '*', '*.png'))}

In [7]:
df['path'] = df['Image Index'].map(data_image_paths.get)

In [8]:
df['Finding Labels'] = df['Finding Labels'].map(lambda x: x.replace('No Finding', ''))

In [9]:
labels = np.unique(list(chain(*df['Finding Labels'].map(lambda x: x.split('|')).tolist())))
labels = [x for x in labels if len(x) > 0]

In [10]:
labels

['Atelectasis',
 'Cardiomegaly',
 'Consolidation',
 'Edema',
 'Effusion',
 'Emphysema',
 'Fibrosis',
 'Hernia',
 'Infiltration',
 'Mass',
 'Nodule',
 'Pleural_Thickening',
 'Pneumonia',
 'Pneumothorax']

In [11]:
for label in labels:
    if len(label) > 1:
        df[label] = df['Finding Labels'].map(lambda finding: 1.0 if label in finding else 0.0)

In [12]:
df.head()

Unnamed: 0,Image Index,Finding Labels,Follow-up #,Patient ID,Patient Age,Patient Gender,View Position,OriginalImage[Width,Height],OriginalImagePixelSpacing[x,...,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,Mass,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax
0,00000001_000.png,Cardiomegaly,0,1,58,M,PA,2682,2749,0.143,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,00000001_001.png,Cardiomegaly|Emphysema,1,1,58,M,PA,2894,2729,0.143,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,00000001_002.png,Cardiomegaly|Effusion,2,1,58,M,PA,2500,2048,0.168,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,00000002_000.png,,0,2,81,M,PA,2500,2048,0.171,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,00000003_000.png,Hernia,0,3,81,F,PA,2582,2991,0.143,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
labels = [label for label in labels if df[label].sum() > 1000]

In [None]:
labels

In [13]:
train_df, valid_df = train_test_split(df, test_size=0.20, random_state=2018, stratify=df['Finding Labels'].map(lambda x: x[:4]))

In [14]:
train_df['labels'] = train_df.apply(lambda x: x['Finding Labels'].split('|'), axis=1)
valid_df['labels'] = valid_df.apply(lambda x: x['Finding Labels'].split('|'), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


### Define DataGenerator

In [15]:
core_idg = ImageDataGenerator(rescale=1 / 255,
                                  samplewise_center=True,
                                  samplewise_std_normalization=True,
                                  horizontal_flip=True,
                                  vertical_flip=False,
                                  height_shift_range=0.05,
                                  width_shift_range=0.1,
                                  rotation_range=5,
                                  shear_range=0.1,
                                  fill_mode='reflect',
                                  zoom_range=0.15)

train_gen = core_idg.flow_from_dataframe(dataframe=train_df,
                                             directory=None,
                                             x_col='path',
                                             y_col='labels',
                                             class_mode='categorical',
                                             batch_size=batch_size,
                                             classes=labels,
                                             target_size=(image_size, image_size))

valid_gen = core_idg.flow_from_dataframe(dataframe=valid_df,
                                             directory=None,
                                             x_col='path',
                                             y_col='labels',
                                             class_mode='categorical',
                                             batch_size=batch_size,
                                             classes=labels,
                                             target_size=(image_size, image_size))

test_X, test_Y = next(core_idg.flow_from_dataframe(dataframe=valid_df,
                                                       directory=None,
                                                       x_col='path',
                                                       y_col='labels',
                                                       class_mode='categorical',
                                                       batch_size=1024,
                                                       classes=labels,
                                                       target_size=(image_size, image_size)))

Found 41407 validated image filenames belonging to 14 classes.
Found 10352 validated image filenames belonging to 14 classes.
Found 10352 validated image filenames belonging to 14 classes.


## Create model

In [16]:
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.nasnet import NASNetMobile
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2

base_model = InceptionResNetV2(include_top=False, weights='imagenet', input_shape=(256, 256, 3))
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
output = tf.keras.layers.Dense(len(labels), activation="softmax")(x)
model = tf.keras.Model(base_model.input, output)
model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='binary_crossentropy', metrics=['accuracy'])

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.7/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [17]:
def get_callbacks(model_name):
    callbacks = []
    tensor_board = tf.keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0)
    callbacks.append(tensor_board)
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=f'model.{model_name}.h5',
        verbose=1,
        save_best_only=True)
    # erly = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
    callbacks.append(checkpoint)
    # callbacks.append(erly)
    return callbacks

## Train model

In [18]:
callbacks = get_callbacks('inceptionresnetv2')
model.fit(train_gen,
              steps_per_epoch=128,
              validation_data=(test_X, test_Y),
              epochs=100,
              callbacks=callbacks)

Train for 128 steps, validate on 1024 samples
Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.44087, saving model to model.inceptionresnetv2.h5
Epoch 2/100
Epoch 00002: val_loss did not improve from 0.44087
Epoch 3/100
Epoch 00003: val_loss improved from 0.44087 to 0.35086, saving model to model.inceptionresnetv2.h5
Epoch 4/100
Epoch 00004: val_loss improved from 0.35086 to 0.34284, saving model to model.inceptionresnetv2.h5
Epoch 5/100
Epoch 00005: val_loss did not improve from 0.34284
Epoch 6/100
Epoch 00006: val_loss did not improve from 0.34284
Epoch 7/100
Epoch 00007: val_loss improved from 0.34284 to 0.31585, saving model to model.inceptionresnetv2.h5
Epoch 8/100
Epoch 00008: val_loss improved from 0.31585 to 0.30040, saving model to model.inceptionresnetv2.h5
Epoch 9/100
Epoch 00009: val_loss did not improve from 0.30040
Epoch 10/100
Epoch 00010: val_loss improved from 0.30040 to 0.29833, saving model to model.inceptionresnetv2.h5
Epoch 11/100
Epoch 00011: val_loss impr

<tensorflow.python.keras.callbacks.History at 0x7f7cd27f1110>

In [27]:
model.save('Inceptionmodel.hdf5')

In [28]:
model.save('Inception_Model.h5')

In [20]:
from keras.models import load_model
inception = tf.keras.models.load_model('Inception_model.h5')

Using TensorFlow backend.


In [21]:
test_X.shape

(1024, 256, 256, 3)

In [22]:
sample_image = test_X[0]
sample_image = sample_image.reshape((1, 256,256,3))
y_pred = inception.predict(sample_image)

# From Here trying to predict roc auc score

In [24]:
y_pred

array([[0.20432581, 0.07241496, 0.02441477, 0.00288388, 0.04341677,
        0.00486424, 0.01700941, 0.00156534, 0.4571745 , 0.02062774,
        0.10504288, 0.01822759, 0.02369078, 0.00434129]], dtype=float32)

In [None]:
from sklearn.metrics import roc_auc_score

print(y_pred)
# auc scores
auc_score1 = roc_auc_score(test_Y, y_pred[:,1], multi_class='ovr')
print(auc_score1)

In [None]:
y_pred

In [None]:
clf = OneVsRestClassifier(LinearSVC(random_state=0))
y_score = inception.fit(X_train, y_train).decision_function(X_test)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot of a ROC curve for a specific class
for i in range(n_classes):
    plt.figure()
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f)' % roc_auc[i])
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.show()

In [None]:
from sklearn.metrics import roc_auc_score
test_Y = np.argmax(test_Y, axis=-1)
y_pred = np.argmax(y_pred, axis=-1)
fpr_roc, tpr_roc, thresholds_roc = roc_curve(test_Y, y_pred)
roc_auc = metrics.auc(fpr_roc, tpr_roc)

In [None]:
print('test binary accuracy = ',model.evaluate(test_X,test_Y, verbose=0)[1])

In [None]:
from sklearn.metrics import plot_confusion_matrix
titles_options = [("Confusion matrix, without normalization", None),
                  ("Normalized confusion matrix", 'true')]
for title, normalize in titles_options:
    disp = plot_confusion_matrix(model.fit(train_gen, valid_gen), X_test, y_test,
                                 display_labels=labels,
                                 cmap=plt.cm.Blues,
                                 normalize=normalize)
    disp.ax_.set_title(title)

    print(title)
    print(disp.confusion_matrix)

plt.show()

In [26]:
test_X.shape
sample_image = test_X[1]
sample_image = sample_image.reshape((1, 256,256,3))
x = inception.predict(sample_image)
def disease(arr):
    for i in range(arr.size):
        print("Disease:",disease_name[i],"Probability:",arr[i])
    
disease(x[0]) 

Disease: Atelectasis Probability: 0.010536501
Disease: Cardiomegaly Probability: 0.0019368976
Disease: Effusion Probability: 0.0071056136
Disease: Infiltration Probability: 0.0019988636
Disease: Mass Probability: 0.028456708
Disease: Nodule Probability: 0.0024924085
Disease: Pneumonia Probability: 0.004790143
Disease: Pneumothorax Probability: 0.00035487764
Disease: Consolidation Probability: 0.030642252
Disease: Edema Probability: 0.41488007
Disease: Emphysema Probability: 0.47335756
Disease: Fibrosis Probability: 0.011693604
Disease: Pleural Thickening Probability: 0.0016613251
Disease: Hernia Probability: 0.010093181


In [None]:
test_X.shape

In [None]:
sample_image = test_X[0]
sample_image = sample_image.reshape((1, 256,256,3))

In [None]:
x = model.predict(sample_image)

In [None]:
x

In [25]:
disease_name = ["Atelectasis","Cardiomegaly","Effusion","Infiltration","Mass","Nodule","Pneumonia","Pneumothorax","Consolidation","Edema","Emphysema","Fibrosis","Pleural Thickening","Hernia"] 

In [None]:
def disease(arr):
    for i in range(arr.size):
        print("Disease:",disease_name[i],"Probability:",arr[i])
    
disease(x[0])  

**Code for prediction**

In [None]:
import cv2
img = cv2.imread('../input/data/images_008/images/00016051_010.png')
width = int(img.shape[1]*0.250)
height = int(img.shape[0]*0.250)
sample_image2 = cv2.resize(img, (width, height), interpolation = cv2.INTER_AREA)
print(sample_image2.shape)

In [None]:
sample_image2 = sample_image2.reshape((1, 256, 256, 3))

In [None]:
model.predict(sample_image2)

In [None]:
x

In [None]:
disease_name = ["Atelectasis","Cardiomegaly","Effusion","Infiltration","Mass","Nodule","Pneumonia","Pneumothorax","Consolidation","Edema","Emphysema","Fibrosis","Pleural Thickening","Hernia"] 

In [None]:
def disease(arr):
    max_val = arr[0]
    for i in range(arr.size):
        print(arr[i])
        if arr[i] > max_val:
            max_val = arr[i]
            index = i
    
    print("Disease:",disease_name[index],"Probability:",max_val)

In [None]:
disease(x[0])

In [None]:
# Prediction
import cv2
img = cv2.imread('../input/data/images_010/images/00020945_050.png')
width = int(img.shape[1]*0.250)
height = int(img.shape[0]*0.250)
sample_image2 = cv2.resize(img, (width, height), interpolation = cv2.INTER_AREA)
print(sample_image2.shape)

sample_image2 = sample_image2.reshape((1, 256, 256, 3))
x = model.predict(sample_image2)

print(x)

disease(x[0])