# Detecting Pneumonia from X-rays




> #### *Dataset from [Kaggle](https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia/data)*
*https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia/data*




## *Week 04 homework:*
---

### *Importing libraries*

In [0]:
# Library imports
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import shutil
import urllib.request
import seaborn as sns
import tensorflow as tf
import numpy as np
import random

from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense, BatchNormalization, \
  GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.inception_v3 import InceptionV3, \
  preprocess_input
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, \
  ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils.class_weight import compute_class_weight

from itertools import product
from functools import partial

try:
    from google.colab import drive
except:
    pass


### *Mounting and Defining Filepaths*

In [0]:
# Mount Google Drive to access the dataset from it
drive.mount('/content/drive')

In [0]:
# Define filepaths
filepath = '/content/drive/My Drive/Week_04/'

train_dir =  filepath + 'train/'
validation_dir = filepath + 'val/'
test_dir = filepath + 'test/'

### *Building the model with custom layers on top*

In [4]:
# Number of classes to predict (Normal or Pneumonia(2))
NUM_CLASSES = 2

# Build Model
def create_model(input_shape, num_classes):

  ## Create and return tensorflow model (building ontop of InceptionV3).  

  # Blank slate
  K.clear_session()

  # Inception v3 for base model
  base_model = InceptionV3(weights='imagenet', include_top=False,\
                           input_shape=input_shape)
  
  # Add 3 custom layers on top
  x = base_model.output
  x = GlobalAveragePooling2D(name='avg_pool')(x)
  x = Dense(512, activation='relu')(x)
  x = Dropout(0.3)(x)
  x = Dense(256, activation='relu')(x)
  x = Dropout(0.3)(x)
  x = Dense(128, activation='relu')(x)
  x = Dropout(0.3)(x)
  
  # Using base model for feature extraction. layer weights don't change
  for layer in base_model.layers:
    layer.trainable = False

  # Final prediction layer is dense
  predictions = Dense(num_classes, activation='sigmoid')(x)
  
  # Instantiate model 
  model = Model(inputs=base_model.inputs, outputs=predictions)

  return model

# Height & Width is 150 and 3 channels for Inception
model = create_model((150, 150, 3), NUM_CLASSES)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


### *Define training & testing accuracy / loss*

In [0]:
# Training Loss
training_loss = tf.keras.metrics.Mean('training_loss', dtype=tf.float32)

# Training Accuracy
training_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    'training_accuracy', dtype=tf.float32)

# Test Loss 
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)

# Test Accuracy 
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    'test_accuracy', dtype=tf.float32)

# Print model summary 
model.summary()

### *Define optimizer and compile the model*

In [0]:
# Adam Optimizer 
optimizer = Adam(lr=0.0001)

# Compile 
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer, 
              metrics=['accuracy'])

### *File count function*

In [0]:
def dir_file_count(directory):

  # Number of files present inside the 'directory'.  
  return sum([len(files) for r, d, files in os.walk(directory)])

### *Configure paramaters, setup generators*

In [8]:
# Config parameters 
rescale = 1./255
target_size = (150, 150)
batch_size = 500
class_mode = 'categorical'

# Augment the Training dataset images 
train_datagen = ImageDataGenerator(rescale=rescale,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   validation_split=0.2)
# Load the images in the generator 
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=target_size,
                                                    class_mode=class_mode,
                                                    batch_size=batch_size,
                                                    shuffle=True)
# Augment the validation dataset images
val_datagen = ImageDataGenerator(rescale=rescale)

# Load the images in the generator
val_generator = val_datagen.flow_from_directory(validation_dir, 
                                                target_size=target_size,
                                                class_mode=class_mode,
                                                batch_size=dir_file_count(validation_dir),
                                                shuffle=False)
# Augment the test dataset images
test_datagen = ImageDataGenerator(rescale=rescale)

# Load the images in the generator
test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=target_size,
                                                  class_mode=class_mode,
                                                  batch_size=dir_file_count(test_dir),
                                                  shuffle=False)

Found 5241 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


### *Weights*

In [9]:
y = train_generator.classes
labels = np.unique(y)

train_class_weights = compute_class_weight('balanced', labels, y)
print(train_class_weights)

[1.94111111 0.67347726]


### *Start training and save model file*

In [10]:
# Start the training
history = model.fit_generator(train_generator,
                              steps_per_epoch=len(train_generator),
                              epochs=10,
                              verbose=1,
                              validation_data=val_generator,
                              validation_steps=len(val_generator),
                              class_weight=train_class_weights,
                              workers=20)

# Save the model after the training is complete
MODEL_FILE = 'pneumonia_v1.hd5'
model.save(MODEL_FILE)

Epoch 1/10
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### *Move model file*

In [0]:
# Move Model file to Google Drive 
!mv {MODEL_FILE} '/content/drive/My Drive/Week_04'

### *Load model file*

In [0]:
# Load the Model File from Google Drive 
MODEL_FILE = f'/content/drive/My Drive/Week_04/{MODEL_FILE}'
model = tf.keras.models.load_model(MODEL_FILE)

### *Evaluate test generator*

In [13]:
# Evaluate test generator
result = model.evaluate_generator(test_generator, steps=len(test_generator),\
                                  verbose=1)

print("%s%.2f  "% ("Loss     : ", result[0]))
print("%s%.2f%s"% ("Accuracy : ", result[1]*100, "%"))

Loss     : 1.38  
Accuracy : 62.02%


### *Results / predict*

In [14]:
# Randomly generate Test Batch
num_of_batch = len(test_generator) # This is 1 in our case 
batch_no = random.randint(0, num_of_batch - 1)

# Fetch batch data
y_img_batch, y_true_batch = test_generator.__getitem__(batch_no)
y_true_batch = y_true_batch.argmax(axis=-1)

# Make predictions 
y_pred_batch = model.predict(y_img_batch)
y_pred_batch = y_pred_batch.argmax(axis=-1)

# Print results 
print("-"*35)
print("%s%d"%     ("Selected Batch No       : ", batch_no))
print("-"*35)
print("%s%d"%     ("Batch Size              : ", len(y_pred_batch)))
print("-"*35)
print("%s%.2f%s"% ("Accuracy                : ", np.mean(y_true_batch==y_pred_batch)*100, "%"))
print("-"*35)



-----------------------------------
Selected Batch No       : 0
-----------------------------------
Batch Size              : 624
-----------------------------------
Accuracy                : 62.02%
-----------------------------------
