<a href="https://www.kaggle.com/code/mohitsingh351/brain-tumor-mri-classification?scriptVersionId=190235820" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# **Import necessary libraries**

In [16]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="whitegrid")
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.models import Model
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16

import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt

# **Collecting File Paths and Labels from Directory**

In [17]:
parent_dir = "/kaggle/input/brain-mri-images-for-brain-tumor-detection"

filepaths = []
labels = []

# Directory containing the "brain_tumor_dataset" folder
directory = "/kaggle/input/brain-mri-images-for-brain-tumor-detection/brain_tumor_dataset"
folders = os.listdir(directory)  #['no', 'yes']

for folder in folders:
    file_path = os.path.join(parent_dir, folder)
    images = os.listdir(file_path)
    for image in images:
        image_path = os.path.join(file_path, image)
        filepaths.append(image_path)
        labels.append(folder)

# **Creating DataFrame from File Paths and Labels**

In [18]:
file_path_series = pd.Series(filepaths, name="filepath")
label_series = pd.Series(labels, name="label")

df_train = pd.concat([file_path_series, label_series], axis=1)

In [19]:
print(f"Shape of 'df_train': {df_train.shape}")
df_train

Shape of 'df_train': (253, 2)


Unnamed: 0,filepath,label
0,/kaggle/input/brain-mri-images-for-brain-tumor...,no
1,/kaggle/input/brain-mri-images-for-brain-tumor...,no
2,/kaggle/input/brain-mri-images-for-brain-tumor...,no
3,/kaggle/input/brain-mri-images-for-brain-tumor...,no
4,/kaggle/input/brain-mri-images-for-brain-tumor...,no
...,...,...
248,/kaggle/input/brain-mri-images-for-brain-tumor...,yes
249,/kaggle/input/brain-mri-images-for-brain-tumor...,yes
250,/kaggle/input/brain-mri-images-for-brain-tumor...,yes
251,/kaggle/input/brain-mri-images-for-brain-tumor...,yes


# **Splitting Data into Training, Validation, and Test Sets**

In [20]:
# Spliting in Train and Dummy 
train_df , dummy_df = train_test_split(df_train ,train_size = 0.8 , shuffle = True ,random_state = 42 )
# Spliting in Validation and Test From Dummy 
valid_df , test_df = train_test_split(dummy_df ,test_size= 0.5 , shuffle = True ,random_state = 42)

In [21]:
print(f"The shape of train data is: {train_df.shape}")
print(f"The shape of validation data is: {valid_df.shape}")
print(f"The shape of test data is: {test_df.shape}")

The shape of train data is: (202, 2)
The shape of validation data is: (25, 2)
The shape of test data is: (26, 2)


# **Data Augmentation and Generators for Training, Validation, and Test Sets**

In [22]:
image_size = 224
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255, rotation_range=20, zoom_range=0.2, horizontal_flip=True, shear_range=0.15
)

validation_test_datagenerator = ImageDataGenerator(rescale=1./255)

print("Training data")
train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='filepath',
    y_col='label',
    target_size=(image_size, image_size),
    batch_size=batch_size,
    class_mode='binary'
)

print('Validation data')
validation_generator = validation_test_datagenerator.flow_from_dataframe(
    valid_df,
    x_col='filepath',
    y_col='label',
    target_size=(image_size, image_size),
    batch_size=batch_size,
    class_mode='binary'
)

print('Test data')
test_generator = validation_test_datagenerator.flow_from_dataframe(
    test_df,
    x_col='filepath',
    y_col='label',
    target_size=(image_size, image_size),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)

Training data
Found 202 validated image filenames belonging to 2 classes.
Validation data
Found 25 validated image filenames belonging to 2 classes.
Test data
Found 26 validated image filenames belonging to 2 classes.


# **Building and Compiling the VGG16-Based Model**

In [23]:
base_model1 = VGG16(weights="imagenet", include_top=False, input_shape=(image_size, image_size, 3))

model1 = Sequential()
model1.add(base_model1)
model1.add(Flatten())
model1.add(Dense(256, activation="relu"))
model1.add(Dropout(0.5))
model1.add(Dense(1, activation='sigmoid'))

base_model1.trainable = False  # Freeze the base model

model1.compile(optimizer="Adam", loss='binary_crossentropy', metrics=['accuracy'])

# **Training the Model with Early Stopping**

In [24]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model1.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=20,
    callbacks=[early_stopping]
)

Epoch 1/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 8s/step - accuracy: 0.5801 - loss: 1.6771 - val_accuracy: 0.3600 - val_loss: 2.5225
Epoch 2/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 8s/step - accuracy: 0.5743 - loss: 1.9744 - val_accuracy: 0.8000 - val_loss: 0.6910
Epoch 3/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 8s/step - accuracy: 0.6734 - loss: 1.0400 - val_accuracy: 0.7200 - val_loss: 0.7340
Epoch 4/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 8s/step - accuracy: 0.7657 - loss: 0.5886 - val_accuracy: 0.8800 - val_loss: 0.4906
Epoch 6/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 8s/step - accuracy: 0.7561 - loss: 0.4054 - val_accuracy: 0.8400 - val_loss: 0.4426
Epoch 7/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 8s/step - accuracy: 0.8068 - loss: 0.3648 - val_accuracy: 0.8400 - val_loss: 0.4329
Epoch 8/20
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

# **Evaluating Model Performance on Test Set**

In [25]:
loss, accuracy = model1.evaluate(test_generator)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step - accuracy: 0.9615 - loss: 0.1568


In [26]:
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Test Accuracy: 96.15%


# **Generating and Processing Predictions on Test Set**

In [27]:
predictions = model.predict(test_generator)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step


In [28]:
predicted_labels = (predictions > 0.5).astype(int)

# **Confusion Matrix**

In [29]:
confusion_matrix(test_generator.labels, predicted_labels)

array([[11,  0],
       [ 2, 13]])

# **Classification Report**

In [30]:
print(classification_report(test_generator.labels, predicted_labels, target_names=['No Tumor', 'Tumor']))

              precision    recall  f1-score   support

    No Tumor       0.85      1.00      0.92        11
       Tumor       1.00      0.87      0.93        15

    accuracy                           0.92        26
   macro avg       0.92      0.93      0.92        26
weighted avg       0.93      0.92      0.92        26

