In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Dense, Flatten 
from sklearn import metrics
from keras.callbacks import EarlyStopping

2024-04-17 19:14:47.937464: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
curr_dir = os.getcwd()
data_folder_path = os.path.join(curr_dir, 'dataset')
dataset_path = os.path.join(data_folder_path, 'Brain Tumor Data Set')

healthy_brain_images_path = os.path.join(dataset_path, 'Healthy')
brain_tumor_images_path = os.path.join(dataset_path, 'Brain Tumor')

training_metadata_file_path = os.path.join(data_folder_path, 'metadata.csv')

In [3]:
training_metdata_df = pd.read_csv(training_metadata_file_path)
training_metdata_df = training_metdata_df[training_metdata_df["image"].str.contains("jpg")]
training_metdata_df.head(5)

Unnamed: 0.1,Unnamed: 0,image,class,format,mode,shape
0,0,Cancer (1).jpg,tumor,JPEG,RGB,"(512, 512, 3)"
3,3,Cancer (10).jpg,tumor,JPEG,RGB,"(512, 512, 3)"
5,5,Cancer (100).jpg,tumor,JPEG,RGB,"(512, 512, 3)"
6,6,Cancer (1000).jpg,tumor,JPEG,RGB,"(290, 250, 3)"
7,7,Cancer (1001).jpg,tumor,JPEG,RGB,"(620, 620, 3)"


In [4]:
print(len(training_metdata_df))

4432


In [5]:
new_training_metadata_df = training_metdata_df

In [6]:
# # For faster training reduce dataset size
# import random

# new_training_metadata_df = pd.DataFrame()

# non_healthy_data = training_metdata_df[training_metdata_df["class"] == "tumor"]
# healthy_data = training_metdata_df[training_metdata_df["class"] == "normal"]
# print(len(healthy_data), len(non_healthy_data), len(non_healthy_data) + len(healthy_data))

# random_non_healthy_data = non_healthy_data.sample(n=200)
# random_healthy_data = healthy_data.sample(n=300)

# new_training_metadata_df = pd.concat([random_non_healthy_data, random_healthy_data], ignore_index=True)

In [7]:
TEST_SIZE = 0.2
RANDOM_STATE = 42
EPOCHS = 3
BATCH_SIZE = 32
INPUT_SHAPE = (256, 256, 3)

In [8]:
train_ids, test_ids = train_test_split(new_training_metadata_df["image"], test_size = TEST_SIZE, random_state = RANDOM_STATE)

In [9]:
len(train_ids), len(test_ids)

(3545, 887)

In [10]:
def resize_image(image, size=(256, 256)):
    resized_image = image.resize(size)
    return resized_image.convert("RGB")

def load_image(image_path):
    return Image.open(image_path)

def rotate_image(image, angle):
    return image.rotate(angle, expand=True)

def flip_image(image):
    return image.transpose(Image.FLIP_LEFT_RIGHT)

def convert_image_to_numpy_array(image):
    return np.array(image)

def load_images(image_ids):
    X_classification = []
    y_classification = []

    for image_id in image_ids:
        if "Not Cancer" in image_id:
            image_path = healthy_brain_images_path + "/" + image_id
        else:
            image_path = brain_tumor_images_path + "/" + image_id

        image = load_image(image_path)
        image_resized = resize_image(image)

        classification_label = new_training_metadata_df[new_training_metadata_df['image'] == image_id]['class'].values[0]
        if classification_label == "tumor":
            classification_label = 1
        else:
            classification_label = 0

        # rotated_image_60 = resize_image(rotate_image(image, 60))
        # rotated_image_120 = resize_image(rotate_image(image, 120))

        # flipped_image_original = flip_image(image_resized)
        # flipped_image_60 = flip_image(rotated_image_60)
        # flipped_image_120 = flip_image(rotated_image_120)

        # print("Image Resized Shape:", np.array(image_resized).shape)
        # print("Rotated Image 60 Shape:", np.array(rotated_image_60).shape)
        # print("Rotated Image 120 Shape:", np.array(rotated_image_120).shape)
        # print("Flipped Image Original Shape:", np.array(flipped_image_original).shape)
        # print("Flipped Image 60 Shape:", np.array(flipped_image_60).shape)
        # print("Flipped Image 120 Shape:", np.array(flipped_image_120).shape)

        # X_classification.extend([convert_image_to_numpy_array(image_resized),
        #                          convert_image_to_numpy_array(rotated_image_60),
        #                          convert_image_to_numpy_array(rotated_image_120),
        #                          convert_image_to_numpy_array(flipped_image_original),
        #                          convert_image_to_numpy_array(flipped_image_60),
        #                          convert_image_to_numpy_array(flipped_image_120)])
        # y_classification.extend([classification_label] * 6)

        X_classification.append(convert_image_to_numpy_array(image_resized))
        y_classification.append(classification_label)

    return np.array(X_classification), np.array(y_classification)

In [11]:
X_train, y_train = load_images(train_ids)

In [12]:
X_test, y_test = load_images(test_ids)

In [13]:
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)

Shape of X_train: (21270, 256, 256, 3)
Shape of y_train: (21270,)


In [14]:
print("Shape of X_val:", X_test.shape)
print("Shape of y_val:", y_test.shape)

Shape of X_val: (5322, 256, 256, 3)
Shape of y_val: (5322,)


In [15]:
early_stopping_callback = EarlyStopping(monitor='accuracy', patience=2, restore_best_weights=True)

In [16]:
# Basic convolutional neural network (CNN) Model
def create_classification_model(input_shape):
    inputs = Input(shape=input_shape)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    conv2 = Conv2D(32, (3, 3), activation='relu', padding='same')(pool1)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    flatten = Flatten()(pool2)
    dense1 = Dense(64, activation='relu')(flatten)
    outputs = Dense(1, activation='sigmoid')(dense1)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

classification_model = create_classification_model(INPUT_SHAPE)
classification_model.summary()

In [17]:
classification_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [18]:
classification_model.fit(X_train, y_train, epochs=EPOCHS, batch_size = BATCH_SIZE, callbacks=[early_stopping_callback])

Epoch 1/3
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m968s[0m 1s/step - accuracy: 0.7301 - loss: 15.5633
Epoch 2/3
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m791s[0m 1s/step - accuracy: 0.9600 - loss: 0.1220
Epoch 3/3
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m730s[0m 1s/step - accuracy: 0.9797 - loss: 0.0781


<keras.src.callbacks.history.History at 0x7fd4d4deaaf0>

In [19]:
y_pred = classification_model.predict(X_test)

[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 274ms/step


In [20]:
y_pred_class = (y_pred > 0.5).astype(int)
# y_pred_class

In [21]:
accuracy = metrics.accuracy_score(y_test, y_pred_class)
precision = metrics.precision_score(y_test, y_pred_class)
recall = metrics.recall_score(y_test, y_pred_class)
f1_score = metrics.f1_score(y_test, y_pred_class)
auc_score = metrics.roc_auc_score(y_test, y_pred_class)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1_score}")
print(f"AUC Score: {auc_score}")

Accuracy: 0.9573468620819241
Precision: 0.9516704508595524
Recall: 0.9741035856573705
F1 Score: 0.9627563576702215
AUC Score: 0.9548007105776031


In [22]:
confusion_matrix = metrics.confusion_matrix(y_test, y_pred_class)
print(confusion_matrix)

[[2161  149]
 [  78 2934]]
