## Introduction

This document is part of a project which compares the performance of deep learning models in classificaiton of a disease or disorder using 3D and 2D brain images.
In this document, I train models using 2D brain images of people with and without brain tumor.

The data is accessible here: https://www.kaggle.com/datasets/ahmedhamada0/brain-tumor-detection

The original dataset contains 3000 images. However, to have a closer comparison with the 3D image classification model (which I used 3D autism brain image dataset), I only included 628 images.
The subset data was equally distributed between the two classes of the data, that is: 

- **Tumor**
- **No Tumor**

In [None]:
import os
import cv2
from PIL import Image
import numpy as np
import tensorflow as tf
from tensorflow import keras 
from sklearn.model_selection import train_test_split
from keras.utils import normalize
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import recall_score, precision_score, f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import seaborn as sns
import matplotlib.pyplot as plt

## Data preparation

In [None]:
image_directory = ' BrainTumor_subsetData/'

In [None]:
no_tumor = os.listdir(image_directory + 'no')
yes_tumor = os.listdir(image_directory + 'yes')

In [None]:
print(len(no_tumor))

In [None]:
dataset = []
label = []

In [None]:
# label 0 for no_tumor images
for i, image_name in enumerate(no_tumor):
    if(image_name.split('.')[1] == 'jpg'):
        image = cv2.imread(image_directory + 'no/' + image_name)
        image = Image.fromarray(image, 'RGB')
        image = image.resize((64,64))
        dataset.append(np.array(image))
        label.append(0)
        

In [None]:
# label 1 for no_tumor images
for i, image_name in enumerate(yes_tumor):
    if(image_name.split('.')[1] == 'jpg'):
        image = cv2.imread(image_directory + 'yes/' + image_name)
        image = Image.fromarray(image, 'RGB')
        image = image.resize((64,64))
        dataset.append(np.array(image))
        label.append(1)

In [None]:
print(label)

In [None]:
print(len(label))

In [None]:
print(len(dataset))

In [None]:
dataset = np.array(dataset)
label = np.array(label)

In [None]:
# train-test split
x_train, x_test, y_train, y_test = train_test_split(dataset, label, test_size = 0.2, random_state = 0)

In [None]:
x_train = normalize(x_train, axis = 1)
x_test = normalize(x_test, axis = 1)

In [None]:
# use when we use categorical crossentropy
y_train = to_categorical(y_train, num_classes = 2)
y_test = to_categorical(y_test, num_classes = 2)

## Model building

In [None]:
print(x_train.shape)
print(x_test.shape)

In [None]:
print(y_train.shape)
print(y_test.shape)

In [None]:
model = Sequential()

In [None]:
#input_size = 64 
model.add(Conv2D(32, (3,3),input_shape = (64,64, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

In [None]:
model.add(Conv2D(32, (3,3),kernel_initializer = 'he_uniform'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

In [None]:
model.add(Conv2D(64, (3,3),kernel_initializer = 'he_uniform'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

In [None]:
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Dense(2)) #categorical cross entropy so we use 2
model.add(Activation('softmax')) #softmax for categorical entropy

In [None]:
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [None]:
# Create an instance of EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [None]:
%%time


model_train = model.fit(x_train, y_train, batch_size = 10, verbose = 1, epochs = 60, validation_data = (x_test, y_test), 
          shuffle = False, callbacks=[early_stopping])

In [None]:
model.save('Braintumor_categorical.h5')

## Model testing

In [None]:
model = load_model('Braintumor_categorical.h5')

In [None]:
pred_image = cv2.imread('Br35H-Brain Tumor Detection 2020/pred/pred8.jpg')

In [None]:
img = Image.fromarray(pred_image)

In [None]:
img = img.resize((64,64))


In [None]:
img = np.array(img)

In [None]:
print(img)

In [None]:
input_img = np.expand_dims(img, axis = 0)

In [None]:
result = model.predict(input_img)

In [None]:
print(result)
#the sum shows the sum of the probabilities for both classes will always be 1.0. 

## Testing model's classification accuracy

The numbder of test dataset images was 1000 and equally distributed between the two classes. That is:

- **Tumor**: 500 images

- **No Tumor**: 500 images

In [None]:
# Load and preprocess the images
data_root = 'BrainTumor_TestDF'  
no_tumor_folder = os.path.join(data_root, 'NoTumor')
tumor_folder = os.path.join(data_root, 'Tumor')

def preprocess_image(image):
    img = cv2.resize(image, (64, 64))  # Resize the image
    img = normalize(img, axis=1)  # Apply the same normalization as used before training
    return img

test_images = []
test_labels = []

# Load and preprocess no tumor images
for image_filename in os.listdir(no_tumor_folder):
    image_path = os.path.join(no_tumor_folder, image_filename)
    image = cv2.imread(image_path)
    preprocessed_image = preprocess_image(image)
    test_images.append(preprocessed_image)
    test_labels.append(0)  # Label 0 for no tumor

# Load and preprocess tumor images
for image_filename in os.listdir(tumor_folder):
    image_path = os.path.join(tumor_folder, image_filename)
    image = cv2.imread(image_path)
    preprocessed_image = preprocess_image(image)
    test_images.append(preprocessed_image)
    test_labels.append(1)  # Label 1 for tumor

# Convert test_images and test_labels to numpy arrays
test_images = np.array(test_images)
test_labels = to_categorical(np.array(test_labels), num_classes=2)  # Convert to one-hot encoded format

# Load the trained model
model = load_model('Braintumor_categorical.h5')  # Replace with the actual path to your model

# Make predictions using the model
predictions = model.predict(test_images)
predicted_labels = np.argmax(predictions, axis=1)

# Calculate accuracy
accuracy = np.mean(predicted_labels == np.argmax(test_labels, axis=1)) * 100
print(f"Accuracy: {accuracy:.2f}%")

# Print actual and predicted classifications
for i in range(len(test_images)):
    actual_classification = 'No Tumor' if test_labels[i][0] == 1 else 'Tumor'
    predicted_classification = 'No Tumor' if predicted_labels[i] == 0 else 'Tumor'
    
    print(f"Image {i+1} - Actual: {actual_classification} - Predicted: {predicted_classification}")


## Confusion matrix

In [None]:
# Confusion matrix
conf_matrix = confusion_matrix(test_labels.argmax(axis=1), predicted_labels)
print("Confusion Matrix:")
print(conf_matrix)


In [None]:
# heatmap
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.5)
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="RdPu", xticklabels=["No Tumor", "Tumor"], yticklabels=["No Tumor", "Tumor"])
plt.xlabel("Predicted",  fontsize=14, fontweight='bold')
plt.ylabel("Actual",  fontsize=14, fontweight='bold')
plt.title("Confusion Matrix",  fontsize=14, fontweight='bold')
plt.savefig("Tumor_CategoricalClass_Confusion Matrix.png", dpi = 300)
plt.show()


In [None]:
# Calculating classification report
class_report = classification_report(np.argmax(test_labels, axis=1), predicted_labels)
print("Classification Report:")
print(class_report)


In [None]:
recall = recall_score(test_labels.argmax(axis=1), predicted_labels)
precision = precision_score(test_labels.argmax(axis=1), predicted_labels)
f1 = f1_score(test_labels.argmax(axis=1), predicted_labels)

print(f"Recall: {recall:.2f}")
print(f"Precision: {precision:.2f}")
print(f"F1-Score: {f1:.2f}")


## Results


**Accuracy: 92.70%**

**Recall: 89%**

**Precision: 96%**