<a href="https://colab.research.google.com/github/Stavrospanakakis/pneumonia-diagnosis/blob/master/pneumonia_diagnosis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Requirements
- Create a kaggle acount

- Go to https://www.kaggle.com/USERNAME/account
and create a new API token

![Imgur](https://i.imgur.com/4MyzhNM.png)

- Upload your API Info below

In [None]:
!echo "Please upload your API info"
from google.colab import files
files.upload()
!echo "Files Uploaded"

!echo "Creating the kaggle folder"
!mkdir ~/.kaggle

!echo "Copying the api info to the main folder"
!cp /content/kaggle.json ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json
%cd ~/.kaggle

!echo "Downloading the dataset"
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

!echo "Unziping the dataset"
!unzip \*.zip  && rm *.zip
!echo "Done."

In [None]:
import cv2
import os
import random
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import Conv2D, Dense, Flatten, Activation, MaxPooling2D, Dropout
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
def Get_dataset(datatype):
    # dataset directory
    directory = os.path.join("/root/.kaggle/chest_xray/chest_xray", datatype)

    # pneumonia categories
    categories = ["NORMAL", "PNEUMONIA"]

    # initialize data list
    data = []

    # image width and height
    image_size = 120

    for category in categories:  

        path = os.path.join(directory,category)  
        class_num = categories.index(category)  

        for img in tqdm(os.listdir(path)): 
            try:

                #convert image to grayscale and then to an array
                image_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE) 
              
                # resize the image
                resized_array = cv2.resize(image_array, (image_size, image_size))  
                
                # add image and its category to a a list
                data.append([resized_array, class_num]) 
            
            # just to keep the output clean
            except Exception as e:
                pass
    
    ##shuffle the data
    random.shuffle(data)

    # initialize the lists
    images = []
    labels = []    

    ##split the data as train images and train_labels 
    for features,label in data:
        images.append(features)
        labels.append(label)

    # reshape the arrays
    images = np.array(images).reshape(-1, image_size, image_size, 1)
    labels = np.array(labels)

    # normalize the data
    images = images/255.0

    return images, labels

In [None]:
# get train data
train_images, train_labels = Get_dataset("train")

# get test data
test_images, test_labels = Get_dataset("test")

In [None]:
# creates a data generator object that transforms images
datagen = ImageDataGenerator(
  rotation_range=10,
  width_shift_range=0.1,
  height_shift_range=0.1,
  zoom_range=0.1,
  horizontal_flip=True
)


data_augmentation = datagen.flow(train_images, train_labels)

In [None]:
# number of epochs
EPOCHS = 10

# optimizer
OPTIMIZER = Adam(learning_rate=0.0001)

# model type
model = Sequential()

# first convolutional and pooling layers
model.add(Conv2D(32, (3, 3), input_shape=train_images.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# second convolutional and pooling layers
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# dense layers
model.add(Dense(128))
model.add(Activation('relu'))

# dropout layer to prevent overfitting
model.add(Dropout(0.1))

# flatten the data
model.add(Flatten())
model.add(Dense(1))
model.add(Activation('sigmoid'))

# callbacks
early_stopping = EarlyStopping(
    monitor='val_loss', 
    mode='min', 
    verbose=1, 
    patience=5
)

model_checkpoint = ModelCheckpoint(
    '/content/model.h5',
    monitor='val_loss',
    mode='min',
    verbose=1,
    save_best_only=True
)

callbacks = [early_stopping, model_checkpoint]

# compile the model
model.compile(
    loss='binary_crossentropy',
    optimizer=OPTIMIZER,
    metrics=['accuracy'],
)

# train the model
history = model.fit(
    data_augmentation,
    epochs=EPOCHS,
    validation_data = (test_images, test_labels),
    callbacks=callbacks
)


In [None]:
# visualizing losses and accuracy
train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

plt.figure()
plt.plot(range(EPOCHS), train_loss)
plt.plot(range(EPOCHS), val_loss)
plt.title('Loss')
plt.xlabel('Epochs') 
plt.ylabel('Loss') 
plt.legend({'Test Data','Training Data'})

plt.figure()
plt.plot(range(EPOCHS), train_acc)
plt.plot(range(EPOCHS), val_acc)
plt.title('Accuracy')
plt.xlabel('Epochs') 
plt.ylabel('Accuracy') 
plt.legend({'Test Data','Training Data'})

In [None]:
# predictions about totally new data
pred_images, pred_labels = Get_dataset('val')

predictions = model.predict(pred_images)

for w in range(len(predictions)):
  if pred_labels[w] <= 0.5:
    print("\nNORMAL:", predictions[w])
  else:
    print("\nPNEUMONIA:", predictions[w])