<h1 style = "background:lightblue;border:0">Introduction</h1>

<p style = "color:black;font-weight:500;text-indent:20px;font-size:16px">The dataset is organized into 3 folders (train, test, val) and contains subfolders for each image category (Pneumonia/Normal). There are 5,863 X-Ray images (JPEG) and 2 categories (Pneumonia/Normal).</p>

<p style = "color:black;font-weight:500;text-indent:20px;font-size:16px">Chest X-ray images (anterior-posterior) were selected from retrospective cohorts of pediatric patients of one to five years old from Guangzhou Women and Children’s Medical Center, Guangzhou. All chest X-ray imaging was performed as part of patients’ routine clinical care.  </p>
    

<h2 style = "background:lightblue;border:0">Content :</h2>

<ul>
    <li style = "color:grey;font-size:15px"> <a href = "#1" style = "color:black;font-weight:bold"> File paths </a> </li>
    <li style = "color:grey;font-size:15px"> <a href = "#2" style = "color:black;font-weight:bold"> Reading pictures and Train - Test Split </a> </li>   
    <li style = "color:grey;font-size:15px"> <a href = "#3" style = "color:black;font-weight:bold"> Data Augmentation </a> </li>
    <li style = "color:grey;font-size:15px"> <a href = "#4" style = "color:black;font-weight:bold"> Examine transformed pictures </a> </li>
    <li style = "color:grey;font-size:15px"> <a href = "#5" style = "color:black;font-weight:bold"> Model Building (CNN) </a> </li>
    <li style = "color:grey;font-size:15px"> <a href = "#6" style = "color:black;font-weight:bold"> Visualization </a> </li>
    <li style = "color:grey;font-size:15px"> <a href = "#7" style = "color:black;font-weight:bold"> Using VGG16 Section </a> </li>
    <li style = "color:grey;font-size:15px"> <a href = "#8" style = "color:black;font-weight:bold"> Fine-tuning </a> </li>  
    
</ul>

<h2 style = "background:yellow;border:0">Import libraries</h2>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
'''
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
'''
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

import warnings
warnings.filterwarnings('ignore')
from keras.preprocessing import image
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPooling2D , Flatten , Dropout , BatchNormalization
from keras.callbacks import ReduceLROnPlateau,EarlyStopping
from keras import optimizers

<a id ='1' ></a>
<h2 style = "background:yellow;border:0">File paths</h2>

In [None]:
train_dir = '/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train'
valid_dir = '/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/val'
test_dir = '/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/test'

<a id ='2' ></a>
<h2 style = "background:yellow;border:0">Reading pictures and Train - Test Split</h2>


In [None]:
def data_preprocessing(_dir):
    
    x = []
    y = []
    
    for dir_name in os.listdir(_dir):
        if not dir_name.startswith('.'):
            if dir_name == 'NORMAL':
                label = 0
            elif dir_name == 'PNEUMONIA':
                label = 1
            else:
                label = 2
                

            for fname in os.listdir( _dir + '/' + dir_name ):
                if not fname.startswith('.'):

                    img = image.load_img( _dir + '/' + dir_name + '/' + fname , grayscale=True ,  target_size = (128,128))
                    img_array = image.img_to_array(img)
                    x.append(img_array)
                    y.append(label)
    
    X = np.asarray(x) 
    Y = np.asarray(y)

    return X,Y

In [None]:
train_X , train_y = data_preprocessing(train_dir)

In [None]:
val_X , val_y = data_preprocessing(valid_dir)

In [None]:
test_X , test_y = data_preprocessing(test_dir)

In [None]:
#Count number of occurrences of each class,0:NORMAL,1:PNEUMONIA
np.bincount(train_y)


In [None]:
np.bincount(val_y)

In [None]:
np.bincount(test_y)

In [None]:
train_X.shape

In [None]:
test_X.shape

<a id ='3' ></a>
<h2 style = "background:yellow;border:0">Data Augmentation</h2>

In [None]:
train_datagen = image.ImageDataGenerator(

                rescale=1./255,
                rotation_range=30,
                width_shift_range=0.1,
                height_shift_range=0.1,
                zoom_range=0.2,
                horizontal_flip=True)
                                                                                                   
      
        

In [None]:
test_datagen = image.ImageDataGenerator(rescale=1./255)
    
                

In [None]:
train_generator = train_datagen.flow(train_X , train_y , batch_size=32 , shuffle=True , seed=123 )


In [None]:
test_generator = test_datagen.flow(test_X , test_y  , batch_size=32 , shuffle=True , seed=123 )

<a id ='4' ></a>
<h2 style = "background:yellow;border:0">Examine transformed pictures</h2>

In [None]:
i=0
for batch in train_generator:
    plt.figure(figsize=(12,8))
    for j in range(8):
        plt.subplot(2,4,j+1)
        imgplot = plt.imshow(image.array_to_img(batch[0][j]),cmap = 'gray')
        plt.title(batch[1][j])
 
    i+=1
    if i != 0:
        break
        
plt.show()

<a id ='5' ></a>
<h2 style = "background:yellow;border:0">Model Building (CNN)</h2>

In [None]:

model = Sequential()

model.add(Conv2D(32,(3,3),padding = 'same',activation='relu',input_shape = train_X.shape[1:]))
model.add(BatchNormalization())
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64,(3,3),padding = 'same',activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64,(3,3),padding = 'same',activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(128,(3,3),padding = 'same',activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(256,(3,3),padding = 'same',activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(MaxPooling2D((2,2)))



model.add(Flatten())
model.add(Dropout(0.3))
model.add(Dense(512,activation='relu'))
model.add(Dense(1,activation='sigmoid'))




In [None]:
model.summary()

In [None]:
model.compile(loss = 'binary_crossentropy',  
              optimizer = optimizers.Adam(lr=1e-4),
              metrics = ['accuracy'] )

In [None]:
#callbacks 
callbacks_list = [EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=20),
                  ReduceLROnPlateau(monitor='val_accuracy', patience = 2, verbose=1,factor=0.3, min_lr=0.000001)
                 ]
 

In [None]:
history = model.fit_generator(train_generator,
                             steps_per_epoch = (train_X.shape[0]//32), 
                             epochs=100,
                             verbose=2,
                             validation_data=test_generator,
                             validation_steps = (test_X.shape[0]//32),
                             callbacks = callbacks_list
                            )

<a id ='6' ></a>
<h2 style = "background:yellow;border:0">Visualization</h2>

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1,len(acc)+1)
plt.plot(epochs,acc,'o',label='Training acc')
plt.plot(epochs,val_acc,label='Validation acc')
plt.title('Training and validation acc')
plt.legend()
plt.figure()

plt.plot(epochs,loss,'o',label='Training loss')
plt.plot(epochs,val_loss,label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:

model.evaluate(test_generator)


<a id ='7' ></a>
<h2 style = "background:pink;border:0">Using VGG16 Section</h2>


In [None]:
from keras.applications import VGG16



In [None]:
image_size = 256

In [None]:
vgg_model = VGG16(input_shape= (image_size, image_size,3), 
                  weights='imagenet',
                  include_top=False)

In [None]:
vgg_model.trainable = False

In [None]:
transfer_model = Sequential()
transfer_model.add(vgg_model)
transfer_model.add(Flatten())
transfer_model.add(Dense(512,activation='relu'))
transfer_model.add(Dense(128,activation='relu'))
transfer_model.add(Dense(1,activation='sigmoid'))

In [None]:
transfer_model.summary()

In [None]:
train_generator = train_datagen.flow_from_directory(train_dir, 
                                                    target_size = (image_size,image_size),
                                                    class_mode = 'binary',
                                                    batch_size=32 , 
                                                    shuffle=True , 
                                                    seed=123 )


In [None]:
test_generator = test_datagen.flow_from_directory(test_dir , 
                                                    target_size = (image_size,image_size),
                                                    class_mode = 'binary',
                                                    batch_size=32 , 
                                                    shuffle=True , 
                                                    seed=123 )

In [None]:
i=0
for batch in train_generator:
    plt.figure(figsize=(12,8))
    for j in range(8):
        
        plt.subplot(2,4,j+1)
        imgplot = plt.imshow(image.array_to_img(batch[0][j]))
        plt.title(batch[1][j])

 
    i+=1
    if i != 0:
        break
        
plt.show()

In [None]:
transfer_model.compile(loss = 'binary_crossentropy', 
                          optimizer = 'adam',
                          metrics = ['accuracy'] )

In [None]:
history = transfer_model.fit_generator(train_generator,
                                         steps_per_epoch = (train_X.shape[0]//32), 
                                         epochs=100,
                                         verbose=2,
                                         validation_data=test_generator,
                                         validation_steps = (test_X.shape[0]//32),
                                         callbacks = callbacks_list
                                        )

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1,len(acc)+1)
plt.plot(epochs,acc,'o',label='Training acc')
plt.plot(epochs,val_acc,label='Validation acc')
plt.title('Training and validation acc')
plt.legend()
plt.figure()

plt.plot(epochs,loss,'o',label='Training loss')
plt.plot(epochs,val_loss,label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
transfer_model.evaluate(test_generator)


<a id ='8' ></a>
<h2 style = "background:pink;border:0">Fine-tuning</h2>

In [None]:
vgg_model.trainable = True
set_trainable = False
for layer in vgg_model.layers:
    if layer.name == 'block5_conv1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

In [None]:
transfer_model.compile(loss = 'binary_crossentropy', 
                          optimizer = optimizers.Adam(lr=2e-5), 
                          metrics = ['accuracy'] )

In [None]:
transfer_model.summary()

In [None]:
history = transfer_model.fit_generator(train_generator,
                             steps_per_epoch = (train_X.shape[0]//32), 
                             epochs=100,
                             verbose=2,
                             validation_data=test_generator,
                             validation_steps = (test_X.shape[0]//32),
                             callbacks = callbacks_list
                            )

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1,len(acc)+1)
plt.plot(epochs,acc,'o',label='Training acc')
plt.plot(epochs,val_acc,label='Validation acc')
plt.title('Training and validation acc')
plt.legend()
plt.figure()

plt.plot(epochs,loss,'o',label='Training loss')
plt.plot(epochs,val_loss,label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
transfer_model.evaluate(test_generator)