In [45]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('brain-tumor-mri-dataset'):
    for filename in filenames:
        os.path.join(dirname, filename)

In [46]:
import keras
from keras.models import Sequential
from keras.layers import Conv2D,Flatten,Dense,MaxPooling2D,Dropout
from keras import backend as K

import ipywidgets as widgets
import io
from PIL import Image
import tqdm
from sklearn.model_selection import train_test_split
import cv2
from sklearn.utils import shuffle
import tensorflow as tf

import matplotlib.pyplot as plt
import seaborn as sns

In [47]:
labels = ['glioma', 'meningioma', 'notumor', 'pituitary']
image_size = 75
train_folder_path = 'brain-tumor-mri-dataset/Training'
test_folder_path = 'brain-tumor-mri-dataset/Testing'

train_images = []
train_labels = []
for i, label in enumerate(labels):
    folder_path = os.path.join(train_folder_path, label)
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        img = cv2.resize(img, (image_size, image_size))
        train_images.append(img)
        train_labels.append(i)

test_images = []
test_labels = []
for i, label in enumerate(labels):
    folder_path = os.path.join(test_folder_path, label)
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        img = cv2.resize(img, (image_size, image_size))
        test_images.append(img)
        test_labels.append(i)

X_train = np.array(train_images)
Y_train = np.array(train_labels)
X_test = np.array(test_images)
Y_test = np.array(test_labels)

Y_train = Y_train.astype(int)
Y_test = Y_test.astype(int)
    
X_train,Y_train = shuffle(X_train,Y_train,random_state=101)
X_train.shape

(5712, 75, 75, 3)

In [48]:
from keras.preprocessing.image import ImageDataGenerator

train_dir = 'brain-tumor-mri-dataset/Training'
test_dir = 'brain-tumor-mri-dataset/Testing'

img_height = 128
img_width = 128

train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=False,
    rescale=1./255
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    class_mode='categorical')

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    class_mode='categorical')

Found 5712 images belonging to 4 classes.
Found 1311 images belonging to 4 classes.


In [49]:
X_train,X_test,Y_train,Y_test = train_test_split(X_train,Y_train,test_size=0.1,random_state=101)

Y_train = tf.keras.utils.to_categorical(Y_train, num_classes = len(labels))
Y_test = tf.keras.utils.to_categorical(Y_test, num_classes = len(labels))

In [50]:
from tensorflow.keras.layers import BatchNormalization
model = Sequential()
model.add(Conv2D(64, (3, 3), activation='softmax', input_shape=(75, 75, 3)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(4, activation='softmax'))

In [51]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 73, 73, 64)        1792      
_________________________________________________________________
batch_normalization_6 (Batch (None, 73, 73, 64)        256       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 34, 34, 128)       73856     
_________________________________________________________________
batch_normalization_7 (Batch (None, 34, 34, 128)       512       
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 17, 17, 128)       0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 17, 17, 128)      

In [52]:
from keras.metrics import Precision, Recall
from keras import backend as K

def f1_score(y_true, y_pred):
    y_true = K.round(y_true)
    y_pred = K.round(y_pred)
    tp = K.sum(y_true * y_pred)
    fp = K.sum ((1-y_true) * y_pred)
    fn = K.sum(y_true * (1-y_pred))
    precison = tp / (tp + fn + K.epsilon())
    recall = tp / (tp + fn + K.epsilon())
    f1_score = 2*((precison*recall)/ (precison+recall+K.epsilon()))
    return f1_score

from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor= 'val_loss', patience = 3)

from tensorflow.keras.optimizers import RMSprop
model.compile(loss= 'categorical_crossentropy',optimizer= RMSprop(learning_rate= 0.001),metrics=['accuracy', f1_score, Precision(), Recall()])

In [53]:
model.save('projectbraintumor.h5')

In [54]:
history = model.fit(X_train, Y_train, batch_size = 64, epochs= 20, validation_split=0.1, callbacks=[early_stopping])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20


In [None]:
labels = ['glioma', 'meningioma', 'notumor', 'pituitary']
image_size = 75
train_folder_path = 'brain-tumor-mri-dataset/Training'
test_folder_path = 'brain-tumor-mri-dataset/Testing'

# Load the training dataset
train_images = []
train_labels = []
for i, label in enumerate(labels):
    folder_path = os.path.join(train_folder_path, label)
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        img = cv2.resize(img, (image_size, image_size))
        train_images.append(img)
        train_labels.append(i)

# Load the test dataset for use in model.evaluate
test_images = []
test_labels = []
for i, label in enumerate(labels):
    folder_path = os.path.join(test_folder_path, label)
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        img = cv2.resize(img, (image_size, image_size))
        test_images.append(img)
        test_labels.append(i)

# Convert the image data and label arrays to NumPy arrays
X_train = np.array(train_images)
Y_train = np.array(train_labels)
X_test = np.array(test_images)
Y_test = np.array(test_labels)

# Convert the label arrays to integers
Y_train = Y_train.astype(int)
Y_test = Y_test.astype(int)
    
#shuffle training dataset
X_train,Y_train = shuffle(X_train,Y_train,random_state=101)
X_train.shape

In [None]:
from sklearn.metrics import classification_report

Y_true = test_generator.classes
Y_pred = model.predict(X_test)
predicted_labels = np.argmax(Y_pred, axis=-1)

print(classification_report(Y_true, predicted_labels, target_names=labels))

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
epochs = range(len(acc)) # def range of vakues for x-axis as number of epochs
fig = plt.figure(figsize=(14,7))
plt.plot(epochs, acc,'r',label="Training Accuracy")
plt.plot(epochs, val_acc,'b',label="Validation Accuracy")
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='upper left')
plt.savefig('Graphs of Training and Validation Accuracy.png') # to download img
plt.show()

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(loss))
fig = plt.figure(figsize=(14,7))
plt.plot(epochs,loss,'r',label="Training loss")
plt.plot(epochs,val_loss,'b',label="Validation loss")
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.ylim(0, 1.1)
plt.savefig('Graphs of Training and Validation Loss.png') # to download img
plt.show()

In [None]:
img = cv2.imread('brain-tumor-mri-dataset/Testing/pituitary/Te-piTr_0000.jpg')
img = cv2.resize(img,(75,75))
img_array = np.array(img)
img_array.shape

img_array = img_array.reshape(1,75,75,3)
img_array.shape

from tensorflow.keras.preprocessing import image
img = image.load_img('brain-tumor-mri-dataset/Testing/pituitary/Te-piTr_0000.jpg')
plt.imshow(img,interpolation='nearest')
plt.savefig('Pituitary.png') # to download img
plt.show()

In [None]:
a=model.predict(img_array)
indices = a.argmax()
indices

In [None]:
from collections import defaultdict

dataset = np.random.randint(0, 10, size=(7022, 2))  
labels = np.random.randint(0, 4, size=(7022,))  

train_ratio = 0.8
test_ratio = 0.1
val_ratio = 0.1

class_samples = defaultdict(list)
for i, label in enumerate(labels):
    class_samples[label].append(dataset[i])

train_set = []
test_set = []
val_set = []

for class_label, samples in class_samples.items():
    num_samples = len(samples)
    num_train_samples = int(num_samples * train_ratio)
    num_test_samples = int(num_samples * test_ratio)
    num_val_samples = num_samples - num_train_samples - num_test_samples

    np.random.seed(101)  
    
    train_set.extend(samples[:num_train_samples])
    test_set.extend(samples[num_train_samples:num_train_samples+num_test_samples])
    val_set.extend(samples[num_train_samples+num_test_samples:])

train_set = np.array(train_set)
test_set = np.array(test_set)
val_set = np.array(val_set)

for class_label, samples in class_samples.items():
    num_train_samples = int(len(samples) * train_ratio)
    num_test_samples = int(len(samples) * test_ratio)
    num_val_samples = len(samples) - num_train_samples - num_test_samples

    print("Class Label:", class_label)
    print("Number of samples in train set:", num_train_samples)
    print("Number of samples in test set:", num_test_samples)
    print("Number of samples in validation set:", num_val_samples)

In [None]:
numbers = [1621, 1645, 2000, 1757] 
total = sum(numbers)
percent = [(num /total) * 100 for num in numbers]
colors = ['#99c2ff', '#6699ff', '#3377ff', '#0052cc']
labels=['Glioma tumor','Meningioma tumor', 'No tumor', 'Pituitary tumor'] #needed cus labels is defined again below but with diff elements

fig1, ax1 = plt.subplots()
ax1.pie(percent, colors = colors, labels = labels, autopct = '%1.1f%%', startangle = 90)
ax1.axis('equal')
plt.title("Class Distribution of the Dataset")
plt.savefig('Class Distribution of the Dataset.png') # to download img
plt.show()

In [None]:
train = 5712
test = 1311
total = train + test
train_percent = (train / total) * 100
test_percent = (test / total) * 100
labels = ['Training', 'Testing']
percent = [train_percent, test_percent]
colors = ['#3377ff', '#cc6666']

fig1, ax1 = plt.subplots()
ax1.pie(percent, colors=colors, labels=labels, autopct='%1.1f%%', startangle=90)
ax1.axis('equal')
plt.title("Distribution of Images in the Training and Testing Folders")
plt.savefig('Distribution of Images in the Training and Testing Folders.png') # to download img
plt.show()

In [None]:

true_labels = test_generator.classes
y_pred = model.predict(X_test)
predicted_labels = np.argmax(y_pred, axis=-1)

print("Shape of true_labels:", true_labels.shape)
print("Data type of true_labels:", true_labels.dtype)
print("Shape of predicted_labels:", predicted_labels.shape)
print("Data type of predicted_labels:", predicted_labels.dtype)

labels = ['Glioma tumor', 'Meningioma tumor', 'No tumor', 'Pituitary tumor']

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(true_labels, predicted_labels)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, cmap="Blues", fmt="d", cbar=False)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.xticks(ticks=np.arange(len(labels)) + 0.5, labels=labels)
plt.yticks(rotation=0, ticks=np.arange(len(labels)) + 0.5, labels=labels)
plt.savefig('Confusion Matrix.png')
plt.show()