In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
import math
import shutil
import glob
import cv2
import imutils
from tqdm import tqdm
from sklearn.utils import shuffle
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

In [3]:
ROOT_DIR = './dataset/'
number_images = {}

for dir in os.listdir(os.path.join(ROOT_DIR,'Training')):
    number_images[dir] = len(os.listdir(os.path.join(ROOT_DIR,'Training',dir)))
print(number_images)

{'glioma': 1321, 'meningioma': 1339, 'notumor': 1595, 'pituitary': 1457}


## Brain Tumor

### What is Brain Tumor
   
A Brain Tumor is a collection, or mass, of abnormal cells in your brain. It is growth inside your head than can be cause serious problems.

As I calculated before there are four type of brain tumors. Brain tumors can be cancerous(malignment) or noncancerous (benign). When benign or malignant tumors grow, they can cause the pressure inside your skull to increase. This can cause brain damage, and it can be life-threatening.

# About Dataset

dataset extracted from https://www.kaggle.com/datasets/masoudnickparvar/brain-tumor-mri-dataset

dataset contains 7023 images of human brain MRI images which are classified into 4 classes:
`'glioma': 1321, 'meningioma': 1339, 'notumor': 1595, 'pituitary': 1457`


In [4]:
# identify size of each images 
path = np.random.choice(os.listdir(os.path.join(ROOT_DIR,'Training')))

for img in np.random.choice(os.listdir(os.path.join(ROOT_DIR,'Training', path)),size=10):
    arr = cv2.imread(os.path.join(ROOT_DIR, 'Training',path,img))
    print(arr.shape)

(326, 276, 3)
(361, 642, 3)
(442, 442, 3)
(215, 235, 3)
(225, 225, 3)
(1024, 1024, 3)
(269, 236, 3)
(280, 420, 3)
(251, 201, 3)
(198, 150, 3)


Dataset have different size of images. So need to convert all images same size.  

In [5]:
IMAGE_SIZE = 256
SAVE_PATH = './dataset/Training_clean/'

def crop_img(img):

    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    gray = cv2.GaussianBlur(gray, (3, 3), 0)


    thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.erode(thresh, None, iterations=2)
    thresh = cv2.dilate(thresh, None, iterations=2)


    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    c = max(cnts, key=cv2.contourArea)


    extLeft = tuple(c[c[:, :, 0].argmin()][0])
    extRight = tuple(c[c[:, :, 0].argmax()][0])
    extTop = tuple(c[c[:, :, 1].argmin()][0])
    extBot = tuple(c[c[:, :, 1].argmax()][0])
    ADD_PIXELS = 0
    new_img = img[extTop[1]-ADD_PIXELS:extBot[1]+ADD_PIXELS, extLeft[0]-ADD_PIXELS:extRight[0]+ADD_PIXELS].copy()
    
    return new_img



if __name__ == "__main__":
    
    for i in os.listdir(os.path.join(ROOT_DIR, 'Training')):
        os.chdir(SAVE_PATH)
        if not os.path.exists(i):
            os.mkdir(i)
        os.chdir('../../')
        for j in os.listdir(os.path.join(ROOT_DIR,'Training',i)):
            image = cv2.imread(os.path.join(ROOT_DIR,'Training',i, j))
            new_img = crop_img(image)
            new_img = cv2.resize(new_img, (IMAGE_SIZE,IMAGE_SIZE))
            cv2.imwrite(os.path.join(SAVE_PATH, i, j), new_img)
        

In [6]:
## save test data

SAVE_PATH_TESTING = './dataset/Testing_clean/'
for i in os.listdir(os.path.join(ROOT_DIR, 'Testing')):
    os.chdir(SAVE_PATH_TESTING)
    if not os.path.exists(i):
        os.mkdir(i)
    os.chdir('../../')
    for j in os.listdir(os.path.join(ROOT_DIR,'Testing',i)):
        image = cv2.imread(os.path.join(ROOT_DIR,'Testing',i, j))
        new_img = crop_img(image)
        new_img = cv2.resize(new_img, (IMAGE_SIZE,IMAGE_SIZE))
        cv2.imwrite(os.path.join(SAVE_PATH_TESTING, i, j), new_img)

In [7]:
number_images

{'glioma': 1321, 'meningioma': 1339, 'notumor': 1595, 'pituitary': 1457}

In [8]:
number_images.keys()

dict_keys(['glioma', 'meningioma', 'notumor', 'pituitary'])

In [9]:
# get train data
images = []
lables = []
for i in os.listdir(SAVE_PATH):
    for j in os.listdir(os.path.join(SAVE_PATH, i)):
        images.append(cv2.imread(os.path.join(SAVE_PATH, i, j)))
        lables.append(i)

In [10]:
images = np.array(images)
lables = np.array(lables)

In [11]:
# now lables variable have many 4 labels with form of categorical. Now convert this categorical variables int numeric values
# ohe = OneHotEncoder(drop='first')
# y_train = ohe.fit_transform(lables.reshape(-1,1))
# y_train = y_train.toarray()

lb = LabelEncoder()
y_train = lb.fit_transform(lables.reshape(-1,1))


In [12]:
images.shape, y_train.shape

((5712, 256, 256, 3), (5712,))

In [13]:
X_train = images / 255

In [14]:
# get test data 
images_test = []
lables_test = []
for i in os.listdir(SAVE_PATH_TESTING):
    for j in os.listdir(os.path.join(SAVE_PATH_TESTING, i)):
        images_test.append(cv2.imread(os.path.join(SAVE_PATH_TESTING, i, j)))
        lables_test.append(i)
images_test = np.array(images_test)
lables_test = np.array(lables_test)

# ohe = OneHotEncoder(drop='first')
y_test = lb.fit_transform(lables_test.reshape(-1,1))


X_test = images_test / 255

In [15]:
## now training data need to shuffle
X_train, y_train = shuffle(X_train, y_train, random_state=42)

In [16]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
    
    tf.keras.layers.Conv2D(64, (3,3) , activation='relu'),
    tf.keras.layers.MaxPool2D((2,2)),
    
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPool2D((2,2)),
    
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Flatten(),
    
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(4, activation='softmax'),
    
])

In [17]:
model.summary()

In [18]:
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(),
    metrics = ['accuracy']
)

In [19]:
history = model.fit(X_train , y_train, batch_size=32, validation_data=(X_test, y_test), epochs=5)

Epoch 1/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m616s[0m 3s/step - accuracy: 0.6488 - loss: 0.8356 - val_accuracy: 0.6415 - val_loss: 0.9497
Epoch 2/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m621s[0m 3s/step - accuracy: 0.8307 - loss: 0.4838 - val_accuracy: 0.5111 - val_loss: 2.3850
Epoch 3/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m639s[0m 4s/step - accuracy: 0.9106 - loss: 0.2814 - val_accuracy: 0.5118 - val_loss: 1.3989
Epoch 4/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m642s[0m 4s/step - accuracy: 0.9540 - loss: 0.1558 - val_accuracy: 0.5050 - val_loss: 1.8070
Epoch 5/5
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m586s[0m 3s/step - accuracy: 0.9822 - loss: 0.0745 - val_accuracy: 0.8574 - val_loss: 0.4785


In [20]:
y_pred = model.predict(X_test)

[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 502ms/step


In [25]:
y_arg = []
for i in y_pred:
    y_arg.append(np.argmax(i))

In [29]:
# y_arg

In [23]:
from sklearn.metrics import accuracy_score

In [28]:
accuracy_score(y_arg, y_test)

0.8573607932875668