In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Dense, Flatten 
from sklearn import metrics
from keras.callbacks import EarlyStopping

In [2]:
curr_dir = os.getcwd()
data_folder_path = os.path.join(curr_dir, 'dataset')
dataset_path = os.path.join(data_folder_path, 'Brain Tumor Data Set')

healthy_brain_images_path = os.path.join(dataset_path, 'Healthy')
brain_tumor_images_path = os.path.join(dataset_path, 'Brain Tumor')

training_metadata_file_path = os.path.join(data_folder_path, 'metadata.csv')

In [3]:
training_metdata_df = pd.read_csv(training_metadata_file_path)
training_metdata_df = training_metdata_df[training_metdata_df["image"].str.contains("jpg")]
training_metdata_df.head(5)

Unnamed: 0.1,Unnamed: 0,image,class,format,mode,shape
0,0,Cancer (1).jpg,tumor,JPEG,RGB,"(512, 512, 3)"
3,3,Cancer (10).jpg,tumor,JPEG,RGB,"(512, 512, 3)"
5,5,Cancer (100).jpg,tumor,JPEG,RGB,"(512, 512, 3)"
6,6,Cancer (1000).jpg,tumor,JPEG,RGB,"(290, 250, 3)"
7,7,Cancer (1001).jpg,tumor,JPEG,RGB,"(620, 620, 3)"


In [4]:
print(len(training_metdata_df))

4432


In [5]:
new_training_metadata_df = training_metdata_df

In [6]:
# # For faster training reduce dataset size
# import random

# new_training_metadata_df = pd.DataFrame()

# non_healthy_data = training_metdata_df[training_metdata_df["class"] == "tumor"]
# healthy_data = training_metdata_df[training_metdata_df["class"] == "normal"]
# print(len(healthy_data), len(non_healthy_data), len(non_healthy_data) + len(healthy_data))

# random_non_healthy_data = random.sample(list(non_healthy_data["image"].values), 1000)
# random_healthy_data = random.sample(list(non_healthy_data["image"].values), 1200)

# def get_row_from_image_id(image_id):
#     return training_metdata_df[training_metdata_df["image"] == image_id]

# for image_id in random_non_healthy_data:
#     row = get_row_from_image_id(image_id)
#     new_training_metadata_df = new_training_metadata_df.append(row, ignore_index=True)

# for image_id in random_healthy_data:
#     row = get_row_from_image_id(image_id)
#     new_training_metadata_df = new_training_metadata_df.append(row, ignore_index=True)

In [7]:
TEST_SIZE = 0.2
RANDOM_STATE = 42
EPOCHS = 3 
INPUT_SHAPE = (256, 256, 3)

In [8]:
train_ids, test_ids = train_test_split(new_training_metadata_df["image"], test_size = TEST_SIZE, random_state = RANDOM_STATE)

In [9]:
len(train_ids), len(test_ids)

(3545, 887)

In [10]:
def open_image_and_convert_to_numpy_array(path):
    return np.array(Image.open(path))

def resize_image(image):
    return resize(image, (256, 256, 3), preserve_range=True, anti_aliasing=True)

def load_image_and_mask(image_path):
    return open_image_and_convert_to_numpy_array(image_path)

def load_images(image_ids):
    X_classification = []
    y_classification = []

    for image_id in image_ids:
        if "Not Cancer" in image_id:
            image_path = healthy_brain_images_path + "/" + image_id
        else:
            image_path = brain_tumor_images_path + "/" + image_id

        image = load_image_and_mask(image_path)

        image_resized = resize_image(image) / 255
        
        classification_label = new_training_metadata_df[new_training_metadata_df['image'] == image_id]['class'].values[0]
        if classification_label == "tumor":
            classification_label = 1
        else:
            classification_label = 0

        X_classification.append(image_resized)
        y_classification.append(classification_label) 

    return np.array(X_classification), np.array(y_classification)
        

In [11]:
X_train, y_train = load_images(train_ids)

In [12]:
X_test, y_test = load_images(test_ids)

In [13]:
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)

Shape of X_train: (3545, 256, 256, 3)
Shape of y_train: (3545,)


In [14]:
print("Shape of X_val:", X_test.shape)
print("Shape of y_val:", y_test.shape)

Shape of X_val: (887, 256, 256, 3)
Shape of y_val: (887,)


In [15]:
early_stopping_callback = EarlyStopping(monitor='accuracy', patience=2, restore_best_weights=True)

In [16]:
def create_classification_model(input_shape):
    inputs = Input(shape=input_shape)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    flatten = Flatten()(pool1)
    dense1 = Dense(64, activation='relu')(flatten)
    outputs = Dense(1, activation='sigmoid')(dense1)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

classification_model = create_classification_model(INPUT_SHAPE)
classification_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 256, 256, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 128, 128, 32)      0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 524288)            0         
                                                                 
 dense (Dense)               (None, 64)                33554496  
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                             

In [17]:
classification_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [18]:
classification_model.fit(X_train, y_train, epochs=EPOCHS, callbacks=[early_stopping_callback])

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7fcac2d5afa0>

In [19]:
y_pred = classification_model.predict(X_test)



In [20]:
y_pred_class = (y_pred > 0.5).astype(int)
# y_pred_class

In [21]:
accuracy = metrics.accuracy_score(y_test, y_pred_class)
precision = metrics.precision_score(y_test, y_pred_class)
recall = metrics.recall_score(y_test, y_pred_class)
f1_score = metrics.f1_score(y_test, y_pred_class)
auc_score = metrics.roc_auc_score(y_test, y_pred_class)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1_score}")
print(f"AUC Score: {auc_score}")

Accuracy: 0.8410372040586246
Precision: 0.7963875205254516
Recall: 0.9661354581673307
F1 Score: 0.8730873087308731
AUC Score: 0.8220287680447044


In [22]:
confusion_matrix = metrics.confusion_matrix(y_test, y_pred_class)
print(confusion_matrix)

[[261 124]
 [ 17 485]]
