In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import tensorflow as tf
import cv2
AUTOTUNE = tf.data.AUTOTUNE
from pathlib import Path

In [4]:
path = Path('../input/disease')
data_dir = path / 'chest_xray'
train_dir = data_dir / 'train'
test_dir = data_dir / 'test'
val_dir = data_dir / 'val'

train_images = (tf.io.gfile.glob(str(train_dir) + '/*/*'))
train_images.extend(tf.io.gfile.glob(str(val_dir) + '/*/*'))
test_images = (tf.io.gfile.glob(str(test_dir) + '/*/*'))

print(f"total number of images in train set : {len(train_images)}")
print(f"total number of images in test set : {len(test_images)}")

total = len(train_images) + len(test_images)
print(total)

normal = (tf.io.gfile.glob(str(train_dir) + '/NORMAL/*'))
pneumonic = (tf.io.gfile.glob(str(train_dir) + '/PNEUMONIA/*'))
normal_image_count = len(normal)
pneumonic_image_count = len(pneumonic)

In [5]:
img_height, img_width = 224, 224
batch_size = 32
print("Normal Images-----------")
fig, ax = plt.subplots(3, 3 , figsize = (10, 10), sharey =True)
for i in range(9):
  img = cv2.imread(normal[i])
  ax[i//3, i%3].imshow(img)
  ax[i//3, i%3].set_title(f"Shape : ({img.shape})")
  i=i+1
print("Pneumonic Images--------")
fig, ax = plt.subplots(3, 3 , figsize = (10, 10), sharey =True)
for i in range(9):
  img = cv2.imread(pneumonic[i])
  ax[i//3, i%3].imshow(img)
  ax[i//3, i%3].set_title(f"Shape : ({img.shape})")
  i=i+1

In [6]:
image_ds = tf.data.Dataset.list_files(train_images, shuffle = False)
image_count = len(image_ds)

image_ds = image_ds.shuffle(image_count, reshuffle_each_iteration = False)

test_ds = tf.data.Dataset.list_files(test_images, shuffle = False)

class_names = np.unique(sorted([item.name for item in data_dir.glob('*/*/')]))
print(class_names)

In [7]:
val_size = int(image_count * 0.2)
train_ds = image_ds.skip(val_size)
val_ds = image_ds.take(val_size)

train_image_count = len(train_ds)
val_image_count = len(val_ds)

print(f"size of train dataset : {train_image_count}")
print(f"size of val dataset : {val_image_count}")

In [8]:
def encode_decode(file_path = data_dir):
  img = tf.io.read_file(file_path)
  img = tf.io.decode_jpeg(img, channels = 3)
  img = tf.image.resize(img, (img_height, img_width))

  label = tf.strings.split(file_path, sep = '/')[-2]
  oneHot = label == class_names
  encode_label = tf.argmax(oneHot)
  return (img, encode_label)
  
train_ds = train_ds.map(encode_decode, num_parallel_calls = AUTOTUNE)
val_ds = val_ds.map(encode_decode, num_parallel_calls = AUTOTUNE)
test_ds = test_ds.map(encode_decode, num_parallel_calls = AUTOTUNE)

In [9]:
def configure_for_performance(ds):
  ds = ds.cache()
  ds = ds.shuffle(buffer_size = 1000)
  ds = ds.repeat()
  ds = ds.batch(batch_size)
  ds = ds.prefetch(buffer_size = AUTOTUNE)
  return ds

train_ds = configure_for_performance(train_ds)
val_ds = configure_for_performance(val_ds)
test_ds = test_ds.batch(batch_size).prefetch(buffer_size=AUTOTUNE)

In [10]:
img_batch, label_batch = next(iter(train_ds))
plt.figure(figsize = (10,10))
for i in range(9):
  ax = plt.subplot(3, 3, i+1)
  plt.imshow(img_batch[i].numpy().astype('uint8'))
  label = label_batch[i]
  plt.title(class_names[label])
  plt.axis("off")

In [11]:
train_steps = train_image_count // batch_size
val_steps = val_image_count // batch_size
data_augment = tf.keras.Sequential([tf.keras.layers.Rescaling(1./255),
                                    tf.keras.layers.RandomZoom(height_factor=(-0.05, -0.15), width_factor=(-0.05, -0.15)),
	                                  tf.keras.layers.RandomRotation(0.2)                                    
])
METRICS = [tf.keras.metrics.BinaryAccuracy(name='accuracy'),
           tf.keras.metrics.Precision(name='precision'),
           tf.keras.metrics.Recall(name='recall'),
]
weight_for_normal = (1 / normal_image_count) * (total / 2.0)
weight_for_pneumonic = (1 / pneumonic_image_count) * (total / 2.0)

class_weights = {0: weight_for_normal, 1: weight_for_pneumonic}

print('Weight for class 0: {:.2f}'.format(weight_for_normal))
print('Weight for class 1: {:.2f}'.format(weight_for_pneumonic))

In [12]:
test_labels = [(tf.strings.split(i, '/')[-2].numpy().decode('utf-8')) for i in test_images]
labels = []
for i in test_labels:
  if i == 'NORMAL':
    labels.append(0)
  elif i == 'PNEUMONIA':
    labels.append(1)

In [13]:
def plot_graph(history):
  acc = history.history['accuracy']
  val_acc = history.history['val_accuracy']

  loss = history.history['loss']
  val_loss = history.history['val_loss']

  plt.figure(figsize=(8, 8))
  plt.subplot(2, 1, 1)
  plt.plot(acc, label='Training Accuracy')
  plt.plot(val_acc, label='Validation Accuracy')
  plt.legend(loc='lower right')
  plt.ylabel('Accuracy')
  plt.ylim([min(plt.ylim()),1])
  plt.title('Training and Validation Accuracy')

  plt.subplot(2, 1, 2)
  plt.plot(loss, label='Training Loss')
  plt.plot(val_loss, label='Validation Loss')
  plt.legend(loc='upper right')
  plt.ylabel('Cross Entropy')
  plt.ylim([0,1.0])
  plt.title('Training and Validation Loss')
  plt.xlabel('epoch')
  plt.show()

In [14]:
def conv_block(filters):
    block = tf.keras.Sequential([
        tf.keras.layers.Conv2D(filters, 3, activation='relu', padding='same'),
        tf.keras.layers.SeparableConv2D(filters, 3, activation='relu', padding='same'),
        tf.keras.layers.BatchNormalization(),
    ])
    return block
def dense_block(units, dropout_rate):
    block = tf.keras.Sequential([
        tf.keras.layers.Dense(units, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(dropout_rate)
    ])
    return block

In [15]:
from tensorflow.keras.applications import DenseNet201

IMG_SHAPE = (img_height, img_width, 3)
base_model_1 = DenseNet201(include_top = False, input_shape = IMG_SHAPE, weights = 'imagenet')

base_model_1.trainable = False

model_1 = tf.keras.Sequential([data_augment,
                               base_model_1,
                               conv_block(64),
                               tf.keras.layers.Dropout(0.3),
                               conv_block(128),
                               tf.keras.layers.Dropout(0.3),
                               conv_block(256),
                               tf.keras.layers.Dropout(0.3),
                               conv_block(512),
                               tf.keras.layers.Dropout(0.3),
                               dense_block(512, 0.2),
                               dense_block(256, 0.5),
                               dense_block(128, 0.5),
                               dense_block(64, 0.2),
                               tf.keras.layers.Flatten(),
                               tf.keras.layers.Dense(1, activation = 'sigmoid')
                             ])

In [16]:
model_1.compile(optimizer = 'adam',
                loss = 'binary_crossentropy',
                metrics = METRICS
                )

history_1 = model_1.fit(train_ds,
                        validation_data = val_ds,
                        epochs = 50 ,
                        steps_per_epoch = train_steps,
                        validation_steps = val_steps,
                        class_weight=class_weights,
                        # callbacks=[dense_checkpoint]
                      )

In [17]:
model_1.evaluate(test_ds)

In [18]:
model_1.save('dense_net_1.h5')

In [19]:
plot_graph(history_1)

In [20]:
pred_1 = (model_1.predict(test_ds) > 0.5).astype("int32")
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score
print(classification_report(pred_1, labels))

In [33]:
from tensorflow.keras.utils import plot_model
plot_model(model_1,show_shapes=True, show_layer_names=True, rankdir='TB', expand_nested=True, to_file = 'dense_net_1.png')

In [22]:
from tensorflow.keras.applications import MobileNetV2

IMG_SHAPE = (img_height, img_width, 3)
base_model_2 = MobileNetV2(include_top = False, input_shape = IMG_SHAPE, weights = 'imagenet')

base_model_2.trainable = False

model_2 = tf.keras.Sequential([data_augment,
                               base_model_1,
                               conv_block(64),
                               tf.keras.layers.Dropout(0.3),
                               conv_block(128),
                               tf.keras.layers.Dropout(0.3),
                               conv_block(256),
                               tf.keras.layers.Dropout(0.3),
                               conv_block(512),
                               tf.keras.layers.Dropout(0.3),
                               dense_block(512, 0.2),
                               dense_block(256, 0.5),
                               dense_block(128, 0.5),
                               dense_block(64, 0.2),
                               tf.keras.layers.Flatten(),
                               tf.keras.layers.Dense(1, activation = 'sigmoid')
                             ])

In [23]:
model_2.compile(optimizer = 'adam',
                loss = 'binary_crossentropy',
                metrics = METRICS
                )

history_2 = model_2.fit(train_ds,
                        validation_data = val_ds,
                        epochs = 50 ,
                        steps_per_epoch = train_steps,
                        validation_steps = val_steps,
                        class_weight=class_weights,
                        # callbacks=[dense_checkpoint]
                      )

In [24]:
model_2.evaluate(test_ds)

In [25]:
plot_graph(history_2)

In [26]:
pred_2 = (model_2.predict(test_ds) > 0.5).astype("int32")
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score
print(classification_report(pred_2, labels))

In [27]:
model_2.save('mobile_net_1.h5')

In [32]:
from tensorflow.keras.utils import plot_model
plot_model(model_2,show_shapes=True, show_layer_names=True, rankdir='TB', expand_nested=True, to_file = 'mobile_net_1.png')

In [34]:
from tensorflow.keras.applications import Xception

IMG_SHAPE = (img_height, img_width, 3)
base_model_3 = Xception(include_top = False, input_shape = IMG_SHAPE, weights = 'imagenet')

base_model_3.trainable = False

model_3 = tf.keras.Sequential([data_augment,
                               base_model_3,
                               conv_block(64),
                               tf.keras.layers.Dropout(0.3),
                               conv_block(128),
                               tf.keras.layers.Dropout(0.3),
                               conv_block(256),
                               tf.keras.layers.Dropout(0.3),
                               conv_block(512),
                               tf.keras.layers.Dropout(0.3),
                               dense_block(512, 0.2),
                               dense_block(256, 0.5),
                               dense_block(128, 0.5),
                               dense_block(64, 0.2),
                               tf.keras.layers.Flatten(),
                               tf.keras.layers.Dense(1, activation = 'sigmoid')
                             ])

In [35]:
model_3.compile(optimizer = 'adam',
                loss = 'binary_crossentropy',
                metrics = METRICS
                )

history_3 = model_3.fit(train_ds,
                        validation_data = val_ds,
                        epochs = 50 ,
                        steps_per_epoch = train_steps,
                        validation_steps = val_steps,
                        class_weight=class_weights,
                        # callbacks=[dense_checkpoint]
                      )

In [36]:
model_3.evaluate(test_ds)

In [37]:
plot_graph(history_3)

In [38]:
pred_3 = (model_3.predict(test_ds) > 0.5).astype("int32")
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score
print(classification_report(pred_3, labels))

In [39]:
model_3.save('xception_1.h5')

In [40]:
from tensorflow.keras.utils import plot_model
plot_model(model_3,show_shapes=True, show_layer_names=True, rankdir='TB', expand_nested=True, to_file = 'xception_1.png')

In [41]:
en_models = [model_1, model_2]
en_model_input = tf.keras.Input(shape = (224, 224, 3))
en_model_outputs = [model(en_model_input) for model in en_models]

In [44]:
ensemble_output = tf.keras.layers.Average()(en_model_outputs)
ensemble_model = tf.keras.Model(inputs = en_model_input, outputs=ensemble_output)

In [45]:
ensemble_model.compile(optimizer = 'adam',
                loss = 'binary_crossentropy',
                metrics = METRICS
                )

history_4 = ensemble_model.fit(train_ds,
                        validation_data = val_ds,
                        epochs = 50 ,
                        steps_per_epoch = train_steps,
                        validation_steps = val_steps,
                        class_weight=class_weights,
                        # callbacks=[dense_checkpoint]
                      )

In [46]:
ensemble_model.evaluate(test_ds)

In [47]:
pred_4 = (ensemble_model.predict(test_ds) > 0.5).astype("int32")
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score
print(classification_report(pred_4, labels))

In [48]:
ensemble_model.save('ensemble_model.h5')

In [49]:
from tensorflow.keras.utils import plot_model
plot_model(ensemble_model,show_shapes=True, show_layer_names=True, rankdir='TB', expand_nested=True, to_file = 'ensemble.png')

In [50]:
plot_graph(history_4)

In [51]:
history_4