In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline
import os

In [None]:
from keras.models import Sequential, Model, load_model
import cv2
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import img_to_array, load_img
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
import tensorflow as tf
from tensorflow.keras import regularizers
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam,SGD
from keras.applications.efficientnet import EfficientNetB3
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from tensorflow.keras.backend import clear_session
from keras.applications.vgg16 import VGG16
from keras.applications.vgg19 import VGG19
from keras.applications.resnet import ResNet50

In [None]:
path_dataset1 = '/kaggle/input/brain-tumor-classification-mri'
path_dataset2 = '/kaggle/input/brain-tumor'

# Initialize an empty dataframe
df = pd.DataFrame(columns=["image_paths", "label"])

# Function to add data to the dataframe
def add_data_to_df(path, label_map):
    for label in os.listdir(path):
        label_path = os.path.join(path, label)
        if os.path.isdir(label_path):
            for image in os.listdir(label_path):
                image_path = os.path.join(label_path, image)
                df.loc[len(df)] = [image_path, label_map[label]]

# Mapping for dataset1
label_map_dataset1 = {
    'glioma_tumor': 'glioma',
    'meningioma_tumor': 'meningioma',
    'no_tumor': 'no_tumor',
    'pituitary_tumor': 'pituitary'
}

# Mapping for dataset2
label_map_dataset2 = {
    '1': 'glioma',
    '2': 'meningioma',
    '3': 'pituitary'
}

# Add data from both datasets
for subfolder in ['Training', 'Testing']:
    add_data_to_df(os.path.join(path_dataset1, subfolder), label_map_dataset1)

add_data_to_df(path_dataset2, label_map_dataset2)

In [None]:
df.tail(10)

In [None]:
df.head(10)

In [None]:
train, test = train_test_split(df, test_size= 0.2, random_state= 42)
training, validation = train_test_split(train, test_size= 0.25, random_state= 42, shuffle = True)

# Preprocessing of MRI Images and Data augmentation

In [None]:
target_size = (224, 224)

# Data Augmentation for the training data
train_datagen = ImageDataGenerator( 
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.4,1.2],
    fill_mode='nearest'
)

# Basic generator for validation and test data (no augmentation, just rescaling)
valid_test_datagen = ImageDataGenerator()

def create_dataset(df, is_training=False):
    if is_training:
        datagen = train_datagen
    else:
        datagen = valid_test_datagen
    return datagen.flow_from_dataframe(df, x_col='image_paths', y_col='label', class_mode='categorical', shuffle=is_training, target_size=target_size)

# Creating datasets
train_dataset = create_dataset(training, is_training=True)
valid_dataset = create_dataset(validation)
test_dataset = create_dataset(test)

In [None]:
label_dict = train_dataset.class_indices
tumor_type = list(label_dict.keys())
images,labels=next(train_dataset)

In [None]:
from matplotlib import rcParams

plt.figure(figsize= (20,20))
rcParams['axes.titlepad'] = 20

for i in range(20):
    plt.subplot(4,5,i+1)
    image = images[i] / 255
    plt.imshow(image)
    index = np.argmax(labels[i])
    tumor_type_name = tumor_type[index]
    color = 'green' if tumor_type_name == 'no_tumor' else 'red'
    plt.title(tumor_type_name, fontsize=16, fontweight='bold', color=color)
    plt.axis('off')

plt.show()

In [None]:
# Fixing the seed for random number generators
import random
clear_session()
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

# CNN MODEL 

In [None]:
"""def create_model(trial):
    # Load the pre-trained model
    loaded_model = load_model('Tumor_pitu_glioma_menin_model.h5')


    # Unfreeze the top N layers for fine-tuning
    N = trial.suggest_int('unfreeze_layers', 1, 5)  # Adjust the range as needed
    for layer in loaded_model.layers[-N:]:
        layer.trainable = True

    # Suggest hyperparameters
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)

    # Modify the model (if needed, e.g., change dropout rate)
    model = Sequential()
    for layer in loaded_model.layers[:-1]:  # Exclude the last Dense layer
        model.add(layer)
    model.add(Dropout(0.24377169383011632))
    model.add(Dense(4, activation='softmax'))  # Assuming 4 classes

    # Compile the model
    optimizer = SGD(learning_rate=learning_rate, momentum=0.9)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

def objective(trial):
    model = create_model(trial)
    early_stop = EarlyStopping(monitor='val_loss', patience=3)

    # Train the model
    history = model.fit(train_dataset, 
                        validation_data=valid_dataset, 
                        epochs=1,
                        callbacks = [early_stop],
                        verbose=1)  # Adjust as per your dataset

    # Evaluate the model
    val_loss, val_accuracy = model.evaluate(valid_dataset, verbose=0)

    return val_accuracy

study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=60))
study.enqueue_trial({'learning_rate': 0.008853510984551376})

study.optimize(objective, n_trials=40)

print("Best hyperparameters: ", study.best_trial.params)"""

# Optuna Hyperparamter Tuning Results: 

* Number of unfreezed top layers: 5
* Learning Rate: 0.00379778

Number of Units in Dense layer: 432

In [None]:
loaded_model = load_model('/kaggle/input/tumor-detection-model/Tumor_pitu_glioma_menin_model.h5')

In [None]:
loaded_model.summary()

In [None]:
unfreezed_layers = 5
learning_rate = 0.00379778

for layer in loaded_model.layers[-unfreezed_layers:]:
    layer.trainable = True

optimizer = SGD(learning_rate=learning_rate, momentum=0.9)
loaded_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', patience=3)

history = loaded_model.fit(train_dataset,
                    epochs = 100,
                    validation_data = valid_dataset,
                    callbacks = [early_stop])

In [None]:
# Extracting accuracy and loss from the training history
train_acc, train_loss = history.history['accuracy'], history.history['loss']
val_acc, val_loss = history.history['val_accuracy'], history.history['val_loss']


# Generating a sequence of epoch numbers
epochs = range(1, len(train_acc) + 1)

# Plotting training and validation loss
plt.figure(figsize=(20, 10))
plt.style.use('Solarize_Light2')

plt.subplot(1, 2, 1)
plt.plot(epochs, train_loss, 'r', label='Training')
plt.plot(epochs, val_loss, 'g', label='Validation')
plt.title('MODEL LOSS', fontsize= 20, fontweight = 'bold')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plotting training and validation accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, train_acc, 'r', label='Training')
plt.plot(epochs, val_acc, 'g', label='Validation')
plt.title('MODEL ACCURACY', fontsize= 20, fontweight = 'bold')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
train_score = loaded_model.evaluate(train_dataset , steps =10 , verbose = 1)
valid_score = loaded_model.evaluate(valid_dataset , steps = 10 , verbose = 1)
test_score = loaded_model.evaluate(test_dataset , steps = 10 , verbose = 1)

cnn_eval_dict = {
    'Evaluation': ['Loss', 'Accuracy'],
    'Training': [train_score[0], train_score[1]],
    'Testing': [test_score[0], test_score[1]],
    'Validation': [valid_score[0], valid_score[1]],

}

cnn_eval_df = pd.DataFrame(cnn_eval_dict)

In [None]:
cnn_eval_df

In [None]:
loaded_model.save('Tumor_GliMeninPitu_model.h5')

In [None]:
from sklearn.metrics import confusion_matrix

def make_confusion_matrix(cf,
                          group_names=None,
                          categories='auto',
                          count=True,
                          percent=True,
                          cbar=True,
                          xyticks=True,
                          xyplotlabels=True,
                          sum_stats=True,
                          figsize=None,
                          cmap='Blues',
                          title=None):

    # Check if the input is a DataFrame
    if isinstance(cf, pd.DataFrame):
        cf = cf.values

    # Add 'Total Predicted' column and 'Total Actual' row to the matrix
    total_predicted = np.sum(cf, axis=0)
    total_actual = np.sum(cf, axis=1)
    
    cf = np.vstack([cf, total_predicted])
    cf = np.hstack([cf, np.append(total_actual, np.sum(total_actual)).reshape(-1, 1)])
    grand_total = cf[-1, -1]

    # Set the last row and column to custom colormap
    c = cf.copy()
    c[:-1, :-1] = 0
    masked_cmap = sns.cubehelix_palette(start=.5, rot=-.75, as_cmap=True)

    # Adjust box labels
    blanks = ['' for i in range(cf.size)]

    if group_names and len(group_names) == cf.size:
        group_labels = ["{}\n".format(value) for value in group_names]
    else:
        group_labels = blanks

    if count:
        group_counts = ["{0:0.0f}\n".format(value) for value in cf.flatten()]
    else:
        group_counts = blanks

    if percent:
        group_percentages = ["{0:.2%}".format(value) for value in cf.flatten() / grand_total]
    else:
        group_percentages = blanks

    box_labels = [f"{v1}{v2}{v3}".strip() for v1, v2, v3 in zip(group_labels, group_counts, group_percentages)]
    box_labels = np.asarray(box_labels).reshape(cf.shape[0], cf.shape[1])

    
    # SET FIGURE PARAMETERS ACCORDING TO OTHER ARGUMENTS
    if figsize is None:
        # Get default figure size if not set
        figsize = plt.rcParams.get('figure.figsize')

    if not xyticks:
        categories = False
    
    if categories == 'auto':
        x_labels = list(range(cf.shape[1] - 1))  # excluding the 'Total' column
        y_labels = list(range(cf.shape[0] - 1))  # excluding the 'Total' row
    else:
        x_labels = categories.copy()
        y_labels = categories.copy()
        
    x_labels.append("Total Predicted")
    y_labels.append("Total Actual")


    # MAKE THE HEATMAP VISUALIZATION
    plt.figure(figsize=figsize)
    sns.heatmap(cf, annot=box_labels, fmt="", cbar=cbar, xticklabels=x_labels, cmap=masked_cmap, yticklabels=y_labels, linewidths=0.5, linecolor="white")
    
    plt.gca().xaxis.tick_top()

    plt.gca().add_patch(plt.Rectangle((cf.shape[1]-1, 0), 1, cf.shape[0], fill=True, color='lightgrey', edgecolor='white', linewidth=0.5))
    plt.gca().add_patch(plt.Rectangle((0, cf.shape[0]-1), cf.shape[1], 1, fill=True, color='lightgrey', edgecolor='white', linewidth=0.5))
    plt.gca().add_patch(plt.Rectangle((2, cf.shape[1]-1), cf.shape[1], 1, fill=True, color='grey', edgecolor='white', linewidth=0.5))

    for i in range(cf.shape[1]):
        plt.gca().add_patch(plt.Rectangle((i, cf.shape[0]-1), 1, 1, fill=False, edgecolor='white', linewidth=0.5))
        plt.gca().add_patch(plt.Rectangle((cf.shape[1]-1, i), 1, 1, fill=False, edgecolor='white', linewidth=0.5))

    if title:
        plt.title(title)
    
    plt.show()


# Performance of Model in Testing Dataset

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

predicted_probabilities = loaded_model.predict(test_dataset)

In [None]:
predicted_labels = predicted_probabilities.argmax(axis=1)
true_labels = test_dataset.classes

In [None]:
cm = confusion_matrix(true_labels, predicted_labels)

In [None]:
make_confusion_matrix(cm, 
                      categories= ['glioma', 'meningioma', 'no Tumor', 'Pituitary'],
                      figsize = (12,10),
                      cbar = False
                          )

In [None]:
class_report = classification_report(true_labels, predicted_labels)
print("\nClassification Report:")
print(class_report)