# Import Some libraries to use it in our project

In [20]:
from PIL import Image
import os
import hashlib
import cv2 #module from the OpenCV (Open Source Computer Vision) library, . It provides functions for reading, writing, and manipulating images .  cv2 module is used to read the image files from the dataset directory, resize them to a fixed size, convert them to RGB color space, and normalize their pixel values to range [0, 1].
import numpy as np
from skimage import io, exposure
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.layers import Flatten
from keras.layers import Dense
from keras.models import Sequential
from keras.layers import BatchNormalization
from sklearn.metrics import classification_report
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import SGD

# Read data device 

In [2]:
categories = os.listdir(r"F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset")
num_categories = len(categories)
print("Number of categories:", num_categories)


Number of categories: 2


In [3]:
category_to_label = {}
for i, category in enumerate(categories):
    category_to_label[category] = i
print("Category to label mapping:", category_to_label)


Category to label mapping: {'no': 0, 'yes': 1}


# Data Cleaning

## Remove corrupt

In [4]:
# Define a function to remove corrupt images
def remove_corrupt_images(directory):
    # Loop through all files in the directory
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        try:
            # Attempt to open the image file
            with Image.open(filepath) as img:
                img.verify()
        except (IOError, SyntaxError) as e:
            # Delete the corrupt image file
            os.remove(filepath)
            print("Removed corrupt image:", filepath)

remove_corrupt_images(r'F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset\yes')

remove_corrupt_images(r'F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset\no')

## Resize images

In [5]:
target_size = (224, 224)
# Define a function to resize images in a directory
def resize_images(directory):
    
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        # Check if the file is an image
        if filepath.endswith(".jpg") or filepath.endswith(".png") or filepath.endswith(".jpeg"):
            
            with Image.open(filepath) as img:
                # Resize the image to the target size
                resized_img = img.resize(target_size)
                
                # Convert RGBA to RGB if the image has an alpha channel
                if resized_img.mode == 'RGBA':
                    resized_img = resized_img.convert('RGB')
                
        
                resized_img.save(filepath)



resize_images(r'F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset\yes')

resize_images(r'F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset\no')


## Normalize images

In [6]:
# Define a function to normalize images in a directory
def normalize_images(directory):

    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
    
        if filepath.endswith(".jpg") or filepath.endswith(".png"):
            
            img = io.imread(filepath)
            
            normalized_img = exposure.rescale_intensity(img, in_range='image', out_range=(0,1)) 
            
            io.imsave(filepath, normalized_img)

normalize_images(r'F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset\yes')

normalize_images(r'F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset\no')



# Data Agumation

In [7]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   rotation_range=45,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   validation_split = 0.3)
test_datagen = ImageDataGenerator(rescale = 1./255,
                                  validation_split = 0.3)

# Split our data to training and Validation sets

In [8]:
training_set = train_datagen.flow_from_directory(r"F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset",
                                                 target_size=target_size,
                                                batch_size=32,
                                                 class_mode='categorical',
                                                subset='training')

validation_set = test_datagen.flow_from_directory(r"F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset",
                                                  target_size=target_size,
                                                 batch_size=32,
                                      class_mode='categorical',
                                               shuffle = False,
                                           subset='validation')

Found 177 images belonging to 2 classes.
Found 75 images belonging to 2 classes.


# ANN ALGORITHM 

In [9]:
# ANN ALGORITHM 
model = Sequential()
model.add(Flatten(input_shape=(224, 224, 3)))
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(16, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(2, activation='softmax'))


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(training_set,
          epochs=100,
          validation_data=validation_set)

  super().__init__(**kwargs)


Epoch 1/100


  self._warn_if_super_not_called()


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 220ms/step - accuracy: 0.4889 - loss: 1.0771 - val_accuracy: 0.6133 - val_loss: 3.3956
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 123ms/step - accuracy: 0.6694 - loss: 0.7478 - val_accuracy: 0.6267 - val_loss: 2.6960
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 121ms/step - accuracy: 0.6817 - loss: 0.6014 - val_accuracy: 0.6533 - val_loss: 1.5669
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 121ms/step - accuracy: 0.6164 - loss: 0.6810 - val_accuracy: 0.6533 - val_loss: 1.1119
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 121ms/step - accuracy: 0.6594 - loss: 0.6898 - val_accuracy: 0.6533 - val_loss: 1.1882
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 119ms/step - accuracy: 0.6945 - loss: 0.7372 - val_accuracy: 0.6533 - val_loss: 1.2648
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1d95025a390>

## Test accuracy for ANN algorithm

In [10]:
test_generator = test_datagen.flow_from_directory(
    directory=r"F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset",
    target_size=target_size,
    color_mode='rgb',
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

test_loss, test_acc = model.evaluate(test_generator)
print("Test accuracy for ANN ALGORITHM :", test_acc)

Found 252 images belonging to 2 classes.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.7369 - loss: 0.5545
Test accuracy for ANN ALGORITHM : 0.7976190447807312


### The classification_report provides the following metrics for each class:

1. Precision: The proportion of correctly predicted positive instances (true positives) out of all instances predicted as positive (true positives + false positives).
2. Recall: The proportion of correctly predicted positive instances (true positives) out of all actual positive instances (true positives + false negatives).
3. F1-score: The harmonic mean of precision and recall, giving a balance between the two metrics. It is calculated as 2 * (precision * recall) / (precision + recall).
4. Support: The number of actual occurrences of the class in the specified dataset.
5. Accuracy: The proportion of correctly classified instances out of the total instances.

In [11]:
# Get the predicted labels for the validation set
y_pred = model.predict(validation_set)
y_pred = np.argmax(y_pred, axis=1)

# Get the true labels for the validation set
y_true = validation_set.classes

# Compute the precision, recall, f1-score, and support
target_names = ['no', 'yes']

print("Report for ANN ALGORITHM ")

print(classification_report(y_true, y_pred, target_names=target_names))

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Report for ANN ALGORITHM 
              precision    recall  f1-score   support

          no       0.77      0.59      0.67        29
         yes       0.77      0.89      0.83        46

    accuracy                           0.77        75
   macro avg       0.77      0.74      0.75        75
weighted avg       0.77      0.77      0.77        75



# Tests each ANN model with new input data.

In [12]:
# Load the image
img = cv2.imread(r"Downloads\yes2.jpeg")

# Preprocess the image

img = cv2.resize(img, target_size)
img = img.astype('float32') / 255.0
img = np.expand_dims(img, axis=0)

#Make the prediction
prediction = model.predict(img)

# Print the prediction
if prediction[0][0] > 0.5:
       print('The model predicts that the image shows no tumor.')
else:
       print('The model predicts that the image shows yes tumor.')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
The model predicts that the image shows yes tumor.


In [13]:
# Load the image
img = cv2.imread(r"Downloads\no.jpg")

# Preprocess the image

img = cv2.resize(img, target_size)
img = img.astype('float32') / 255.0
img = np.expand_dims(img, axis=0)

#Make the prediction
prediction = model.predict(img)

# Print the prediction
if prediction[0][0] > 0.5:
       print('The model predicts that the image shows no tumor.')
else:
       print('The model predicts that the image shows yes tumor.')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
The model predicts that the image shows no tumor.


# CNN ALGORITHM 

In [14]:
# CNN ALGORITHM 
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224,224, 3)))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(training_set,
          epochs=100,
          validation_data=validation_set)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 1s/step - accuracy: 0.5451 - loss: 1.8166 - val_accuracy: 0.6533 - val_loss: 0.5969
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.6701 - loss: 0.5834 - val_accuracy: 0.7600 - val_loss: 0.5791
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step - accuracy: 0.7284 - loss: 0.5936 - val_accuracy: 0.7733 - val_loss: 0.5640
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step - accuracy: 0.7440 - loss: 0.5370 - val_accuracy: 0.7733 - val_loss: 0.5535
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 955ms/step - accuracy: 0.7591 - loss: 0.5551 - val_accuracy: 0.7733 - val_loss: 0.5408
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 954ms/step - accuracy: 0.7565 - loss: 0.5557 - val_accuracy: 0.7733 - val_loss: 0.5352
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1d963130990>

## Test accuracy for CNN algorithm

In [15]:
test_generator = test_datagen.flow_from_directory(
    directory=r"F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset",
    target_size=target_size,
    color_mode='rgb',
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

test_loss, test_acc = model.evaluate(test_generator)
print("Test accuracy for CNN ALGORITHM :", test_acc)


Found 252 images belonging to 2 classes.
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 232ms/step - accuracy: 0.5625 - loss: 1.0706

  self._warn_if_super_not_called()


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 167ms/step - accuracy: 0.7959 - loss: 0.5062
Test accuracy for CNN ALGORITHM : 0.8888888955116272


### The classification_report provides the following metrics for each class:

1. Precision: The proportion of correctly predicted positive instances (true positives) out of all instances predicted as positive (true positives + false positives).
2. Recall: The proportion of correctly predicted positive instances (true positives) out of all actual positive instances (true positives + false negatives).
3. F1-score: The harmonic mean of precision and recall, giving a balance between the two metrics. It is calculated as 2 * (precision * recall) / (precision + recall).
4. Support: The number of actual occurrences of the class in the specified dataset.
5. Accuracy: The proportion of correctly classified instances out of the total instances.

In [16]:
# Get the predicted labels for the validation set
y_pred = model.predict(validation_set)
y_pred = np.argmax(y_pred, axis=1)

# Get the true labels for the validation set
y_true = validation_set.classes

# Compute the precision, recall, f1-score, and support
target_names = ['no', 'yes']

print("Report for CNN ALGORITHM ")

print(classification_report(y_true, y_pred, target_names=target_names))

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 146ms/step
Report for CNN ALGORITHM 
              precision    recall  f1-score   support

          no       0.89      0.55      0.68        29
         yes       0.77      0.96      0.85        46

    accuracy                           0.80        75
   macro avg       0.83      0.75      0.77        75
weighted avg       0.82      0.80      0.79        75



# Tests each CNN model with new input data.

In [17]:
# Load the image
img = cv2.imread("Downloads\yes.jpeg")

# Preprocess the image
img = cv2.resize(img, target_size)
img = img.astype('float32') / 255.0
img = np.expand_dims(img, axis=0)

#Make the prediction
prediction = model.predict(img)

# Print the prediction
if prediction[0][0] > 0.5:
       print('The model predicts that the image shows no tumor.')
else:
       print('The model predicts that the image shows yes tumor.')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step
The model predicts that the image shows yes tumor.


In [18]:
# Load the image
img = cv2.imread("Downloads\yes2.jpeg")

# Preprocess the image
img = cv2.resize(img, target_size)
img = img.astype('float32') / 255.0
img = np.expand_dims(img, axis=0)

#Make the prediction
prediction = model.predict(img)

# Print the prediction
if prediction[0][0] > 0.5:
       print('The model predicts that the image shows no tumor.')
else:
       print('The model predicts that the image shows yes tumor.')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
The model predicts that the image shows yes tumor.


# SVM ALGORITHM 

In [21]:
model = Sequential()
model.add(Flatten(input_shape=(224, 224, 3)))
model.add(Dense(64, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(16, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(2, activation='softmax'))

# Compile the model with a custom loss function for SVM
model.compile(optimizer=SGD(), loss='hinge', metrics=['accuracy'])

# Train the SVM model using fit_generator
history = model.fit(training_set,
          epochs=100,
          validation_data=validation_set)

Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 107ms/step - accuracy: 0.5228 - loss: 0.9936 - val_accuracy: 0.5867 - val_loss: 0.9664
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 59ms/step - accuracy: 0.5696 - loss: 0.9326 - val_accuracy: 0.6000 - val_loss: 0.9639
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 56ms/step - accuracy: 0.6136 - loss: 0.9069 - val_accuracy: 0.6667 - val_loss: 0.9179
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - accuracy: 0.6784 - loss: 0.8531 - val_accuracy: 0.6133 - val_loss: 0.9463
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 52ms/step - accuracy: 0.6499 - loss: 0.8863 - val_accuracy: 0.6533 - val_loss: 0.9428
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.7161 - loss: 0.8333 - val_accuracy: 0.5600 - val_loss: 0.9398
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━

# Test accuracy for SVM algorithm

In [22]:
test_generator = test_datagen.flow_from_directory(
    directory=r"F:\LEVEL 3 1\Data Science Tools and Software\brain mri\brain_tumor_dataset",
    target_size=target_size,
    color_mode='rgb',
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

test_loss, test_acc = model.evaluate(test_generator)
print("Test accuracy for SVM ALGORITHM :", test_acc)


Found 252 images belonging to 2 classes.
[1m5/8[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m0s[0m 35ms/step - accuracy: 0.6901 - loss: 0.8133

  self._warn_if_super_not_called()


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.7409 - loss: 0.7745
Test accuracy for SVM ALGORITHM : 0.8095238208770752


### The classification_report provides the following metrics for each class:

1. Precision: The proportion of correctly predicted positive instances (true positives) out of all instances predicted as positive (true positives + false positives).
2. Recall: The proportion of correctly predicted positive instances (true positives) out of all actual positive instances (true positives + false negatives).
3. F1-score: The harmonic mean of precision and recall, giving a balance between the two metrics. It is calculated as 2 * (precision * recall) / (precision + recall).
4. Support: The number of actual occurrences of the class in the specified dataset.
5. Accuracy: The proportion of correctly classified instances out of the total instances.

In [23]:
# Get the predicted labels for the validation set
y_pred = model.predict(validation_set)
y_pred = np.argmax(y_pred, axis=1)

# Get the true labels for the validation set
y_true = validation_set.classes

# Compute the precision, recall, f1-score, and support
target_names = ['no', 'yes']

print("Report for SVM ALGORITHM ")

print(classification_report(y_true, y_pred, target_names=target_names))

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Report for SVM ALGORITHM 
              precision    recall  f1-score   support

          no       0.76      0.66      0.70        29
         yes       0.80      0.87      0.83        46

    accuracy                           0.79        75
   macro avg       0.78      0.76      0.77        75
weighted avg       0.78      0.79      0.78        75



# Tests each SVM model with new input data.

In [24]:
# Load the image
img = cv2.imread(r"Downloads\yes2.jpeg")

# Preprocess the image
img = cv2.resize(img, target_size)
img = img.astype('float32') / 255.0
img = np.expand_dims(img, axis=0)

#Make the prediction
prediction = model.predict(img)

# Print the prediction
if prediction[0][0] > 0.5:
       print('The model predicts that the image shows no tumor.')
else:
       print('The model predicts that the image shows yes tumor.')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
The model predicts that the image shows yes tumor.


In [25]:
# Load the image
img = cv2.imread(r"Downloads\yes.jpeg")

# Preprocess the image
img = cv2.resize(img, target_size)
img = img.astype('float32') / 255.0
img = np.expand_dims(img, axis=0)

#Make the prediction
prediction = model.predict(img)

# Print the prediction
if prediction[0][0] > 0.5:
       print('The model predicts that the image shows no tumor.')
else:
       print('The model predicts that the image shows yes tumor.')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
The model predicts that the image shows yes tumor.
