Uploading the Dataset from my Drive

In [1]:
# ! gdown https://drive.google.com/drive/folders/1eJPlwS6bd_3SUlm-rh_t7KglTaR4zrIQ?usp=sharing

from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Importing libraries

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
import random
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

Creating the DataSet, it spelt to: training/testing/validation data

In [3]:
IMG_SIZE = 150
categories = ["NORMAL", "PNEUMONIA"]

def create_data(data_path="/content/gdrive/MyDrive/chest_xray/train"):
    Created_data = []
    for category in categories:

        path = os.path.join(data_path,category)
        class_num = categories.index(category) 
        count = 0
        for img in tqdm(os.listdir(path)):
            count += 1
            if class_num == 1 and count == 1352:
                break
            try:
                img_array = cv2.imread(os.path.join(path,img) ,cv2.IMREAD_GRAYSCALE)
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE)) 
                if class_num == 0:
                    array = [1, 0]
                else:
                    array = [0, 1]
                Created_data.append([new_array, array])
            except Exception as e: 
                pass

    return Created_data

print("Creating training Data: ")
training_data = create_data("/content/gdrive/MyDrive/chest_xray/train")

print("Creating testing Data: ")
test_data = create_data("/content/gdrive/MyDrive/chest_xray/test")

print("Creating validation Data: ")
val_data = create_data("/content/gdrive/MyDrive/chest_xray/val")

Creating training Data: 


100%|██████████| 1352/1352 [00:44<00:00, 30.27it/s]
 35%|███▍      | 1351/3876 [01:42<03:11, 13.18it/s]


Creating testing Data: 


100%|██████████| 244/244 [00:07<00:00, 30.99it/s]
100%|██████████| 390/390 [00:08<00:00, 43.47it/s]


Creating validation Data: 


100%|██████████| 9/9 [00:03<00:00,  2.99it/s]
100%|██████████| 9/9 [00:01<00:00,  4.81it/s]


Shuffling the Data to purpose of reducing variance and making sure that models remain general and overfit less.

In [4]:
random.shuffle(training_data)
random.shuffle(test_data)
random.shuffle(val_data)

Spilting data into categories

In [5]:
x_train, x_test, x_val = [], [], []
y_train, y_test, y_val = [], [], []

for features,label in training_data:
    x_train.append(features)
    y_train.append(label)
    
for features,label in test_data:
    x_test.append(features)
    y_test.append(label)

for features,label in val_data:
    x_val.append(features)
    y_val.append(label)

Saving data model for using it in another algorithms

In [6]:
np.save("/content/saved_data/x_train.npy", x_train)
np.save("/content/saved_data/y_train.npy", y_train)

np.save("/content/saved_data/x_test.npy", x_test)
np.save("/content/saved_data/y_test.npy", y_test)

np.save("/content/saved_data/x_val.npy", x_val)
np.save("/content/saved_data/y_val.npy", y_val)

reshaping the data to maximize the accuracy

In [7]:
x_train = np.array(x_train).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y_train = np.array(y_train)

x_test = np.array(x_test).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y_test = np.array(y_test)

x_val = np.array(x_val).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y_val = np.array(y_val)

x_train = x_train/255.
x_test = x_test/255.
x_val = x_val/255.

Running CNN algorithm and checking model accuracy

In [11]:
x_train = np.load("/content/gdrive/MyDrive/saved_data1/x_train.npy", allow_pickle=True)
x_test = np.load("/content/gdrive/MyDrive/saved_data1/x_test.npy", allow_pickle=True)
y_train = np.load("/content/gdrive/MyDrive/saved_data1/y_train.npy", allow_pickle=True)
y_test = np.load("/content/gdrive/MyDrive/saved_data1/y_test.npy", allow_pickle=True)
y_val = np.load("/content/gdrive/MyDrive/saved_data1/y_val.npy", allow_pickle=True)
y_val = np.load("/content/gdrive/MyDrive/saved_data1/y_val.npy", allow_pickle=True)

x_train = np.array(x_train).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y_train = np.array(y_train)

x_test = np.array(x_test).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y_test = np.array(y_test)

x_val = np.array(x_val).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y_val = np.array(y_val)

x_train = x_train/255.
x_test = x_test/255.
x_val = x_val/255.

In [12]:
model = Sequential()
model.add(Conv2D(16, (3, 3), activation='relu', padding="same", input_shape=x_train.shape[1:]))
model.add(Conv2D(16, (3, 3), padding="same", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), activation='relu', padding="same", input_shape=x_train.shape[1:]))
model.add(Conv2D(32, (3, 3), padding="same", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu', padding="same"))
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(96, (3, 3), dilation_rate=(2, 2), activation='relu', padding="same"))
model.add(Conv2D(96, (3, 3), padding="valid", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), dilation_rate=(2, 2), activation='relu', padding="same"))
model.add(Conv2D(128, (3, 3), padding="valid", activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(2 , activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.fit(x_train, y_train, batch_size=32, epochs=8, validation_data=(x_val, y_val))

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f87f1e4a310>