<a href="https://colab.research.google.com/github/SibiShanmuga/Tumor-Detection-Algorithm/blob/main/brain_tumor_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# --- Setup and Imports ---
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import numpy as np
import matplotlib.pyplot as plt
import os

print("TensorFlow version:", tf.__version__)


In [None]:
!pip -q install kaggle

import os
from google.colab import files
from google.colab import userdata


kaggle_json = userdata.get('KAGGLE_JSON')
if kaggle_json is None:
    files.upload()

# Write kaggle.json securely
kaggle_dir = os.path.expanduser("~/.kaggle")
os.makedirs(kaggle_dir, exist_ok=True)

with open(os.path.join(kaggle_dir, "kaggle.json"), "w") as f:
    f.write(kaggle_json)

# Lock down permissions
os.chmod(os.path.join(kaggle_dir, "kaggle.json"), 0o600)

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
print("kaggle.json installed.")

# Download + unzip dataset
!kaggle datasets download -d masoudnickparvar/brain-tumor-mri-dataset
!unzip -o brain-tumor-mri-dataset.zip -d data


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Enhanced data generators with stronger augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=12,
    width_shift_range=0.10,
    height_shift_range=0.10,
    zoom_range=0.15,
    horizontal_flip=True,
    vertical_flip=False,         # medical images shouldn't flip vertically
    shear_range=0.00,
    brightness_range=(0.85, 1.15),
    fill_mode='nearest',
    validation_split=0.2         # 20% for validation
)

#validation data generator without any augmentation
val_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
    )

train_dir = '/content/data/Training'

#the training split
train_gen = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=42
)

#the validation split
val_gen = val_datagen.flow_from_directory(
    train_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=42
)

#the testing datesets

test_dir = '/content/data/Testing'

test_datagen = ImageDataGenerator(rescale=1./255)


test_gen = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical',
    shuffle=False,
    seed=42
)



In [None]:
# --- Visualize sample images correctly ---
images, labels = next(train_gen)  # Get one batch of images and labels

# Get class names from the generator
class_names = list(train_gen.class_indices.keys())
plt.figure(figsize=(8, 8))
for i in range(9):
    plt.subplot(3, 3, i + 1)
    plt.imshow(images[i])
    # Get the class index from the one-hot encoded label
    class_idx = np.argmax(labels[i])
    plt.title(class_names[class_idx]) # Display the actual class name
    plt.axis("off")
plt.show()

Training the dataset with the loaded data

In [None]:
# --- Optimized CNN Model Definition with Batch Normalization ---
from tensorflow.keras.regularizers import l2

model = models.Sequential([
    # Block 1
    layers.Conv2D(32, (3, 3), strides = (1,1), activation='relu', kernel_regularizer=l2(0.0001), input_shape=(128, 128, 3)),
    layers.BatchNormalization(),
    layers.Conv2D(32, (3, 3), strides = (1,1), activation='relu', kernel_regularizer=l2(0.0001)),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.2),

    # Block 2
    layers.Conv2D(64, (3, 3),strides = (1,1), activation='relu', kernel_regularizer=l2(0.0005)),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3, 3),strides = (1,1), activation='relu', kernel_regularizer=l2(0.0005)),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),

    # Block 3
    layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
    layers.BatchNormalization(),
    layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.35),

    # Block 4 - Additional depth
    layers.Conv2D(192, (3, 3), activation='relu', kernel_regularizer=l2(0.0015)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.4),

    # Head Layers
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.4),
    layers.Dense(4, activation='softmax')
])

model.summary()

In [None]:
from PIL.Image import init
# --- Model Training with Advanced Callbacks ---
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.models import load_model

# Adjusted class_weight for 4 classes
class_weight = {0: 1.0, 1: 1.67, 2: 1.0, 3: 1.1}  # adjust if you detect imbalance

# Callbacks for improved training
callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=7,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.6,
        patience=3,
        min_lr=1e-7,
        verbose=1
    ),
    ModelCheckpoint(
        "best_brain_tumor_model.keras",
        monitor="val_loss",
        save_best_only=True,
        verbose=1
    )
]

TOTAL_EPOCHS = 100
history_list = []

# Compile with optimized settings
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.04),
    metrics=['accuracy'],
    run_eagerly=False
)

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=TOTAL_EPOCHS,
    callbacks=callbacks,
    verbose=1,
    class_weight=class_weight
)
history_list.append(history)

#If the model stopped early we restart training with reset optimizers
current_epoch = len(history.history['accuracy'])
restarts=1

while current_epoch < TOTAL_EPOCHS:

  print("Restarting Model\n")
  print(f"Starting at Epoch: {current_epoch}\n")

  restarted_model = load_model('best_brain_tumor_model.keras')

  restarted_model.compile(
    #optimizer now restarts with 0.9% of the initial learning rate, decreasing each restart
    optimizer=keras.optimizers.Adam(learning_rate=(0.0004)* (0.9**restarts)),
    loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.01),
    metrics=['accuracy'],
    run_eagerly=False
  )

  train_gen.reset()
  history = restarted_model.fit(
      train_gen,
      validation_data=val_gen,
      epochs= TOTAL_EPOCHS,
      initial_epoch=current_epoch,
      callbacks=callbacks,
      verbose=1,
      class_weight=class_weight
  )

  history_list.append(history)
  current_epoch += len(history.history['accuracy'])
  restarts+=1


In [None]:
# --- Plot Training Results with Detailed Analysis ---
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 5))

# Accuracy Plot
epoch_offset=0
accuracy_first=True
plt.subplot(1, 2, 1)
for history in history_list:
  epochs = range(
        epoch_offset,
        epoch_offset + len(history.history['accuracy'])
  )
  plt.plot(epochs, history.history['accuracy'], color='darkslategrey', label= 'Training Accuracy' if accuracy_first else None, linewidth=2)
  plt.plot(epochs, history.history['val_accuracy'], color='orangered', label= 'Validation Accuracy' if accuracy_first else None, linewidth=2)
  epoch_offset += len(history.history['accuracy'])
  plt.axvline(x=epoch_offset, linestyle=':', label= 'restarted model' if accuracy_first else None, linewidth=2)
  accuracy_first=False

plt.title('Model Accuracy', fontsize=14)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)



# Loss Plot
epoch_offset=0
loss_first=True
plt.subplot(1, 2, 2)
for history in history_list:
  epochs = range(
        epoch_offset,
        epoch_offset + len(history.history['accuracy'])
  )
  plt.plot(epochs,history.history['loss'], color='firebrick', label= 'Training Loss' if loss_first else None, linewidth=2)
  plt.plot(epochs,history.history['val_loss'], color='darkcyan', label= 'Validation Loss' if loss_first else None, linewidth=2)
  epoch_offset += len(history.history['accuracy'])
  plt.axvline(x=epoch_offset, linestyle=':', label= 'restarted model' if loss_first else None, linewidth=2)
  loss_first=False

plt.title('Model Loss', fontsize=14)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()

plt.text(-25, -0.05, "Statistics:", fontsize=15)
plt.text(-25,-0.10,f"Best Training Accuracy: {max(max(h.history["accuracy"]) for h in history_list):.4f}")
plt.text(-25,-0.15,f"Best Validation Accuracy: {max(max(h.history["val_accuracy"]) for h in history_list):.4f}")
plt.text(-25,-0.20,f"Best Training Loss: {min(min(h.history["loss"]) for h in history_list):.4f}")
plt.text(-25,-0.25,f"Best Validation Loss: {min(min(h.history["val_loss"]) for h in history_list):.4f}")
plt.show()

# Print summary
print(f"\nTraining Summary:")
print(f"Final Train Accuracy: {history.history['accuracy'][-1]:.4f}")
print(f"Final Validation Accuracy: {history.history['val_accuracy'][-1]:.4f}")
print(f"Final Train Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
print("\n")
print(f"Best Train Accuracy: {max(max(h.history["accuracy"]) for h in history_list):.4f}")
print(f"Best Validation Accuracy: {max(max(h.history["val_accuracy"]) for h in history_list):.4f}")
print(f"Best Train Loss: {min(min(h.history["loss"]) for h in history_list):.4f}")
print(f"Best Validation Loss: {min(min(h.history["val_loss"]) for h in history_list):.4f}")
print("\n")
print(f"Total Epochs Trained: {sum(len(h.history["accuracy"]) for h in history_list)}")
print(f"Total Restarts: {len(history_list)}")

print("\n");

import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

matrix_model=load_model('best_brain_tumor_model.keras')
val_gen.reset()
probs = matrix_model.predict(val_gen)
y_true = val_gen.classes

# If categorical (one-hot output)
if probs.shape[1] > 1:
    y_pred = np.argmax(probs, axis=1)
else:
    y_pred = (probs.ravel() >= 0.5).astype(int)

print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred, digits=4))



In [None]:
# --- Save Models and Evaluation ---
# Save the final trained model
model.save('brain_tumor_cnn_model_final.keras')
print("Final model saved as 'brain_tumor_cnn_model_final.keras'")

# The best model is already saved by ModelCheckpoint callback
print("Best model saved as 'best_brain_tumor_model.keras' (highest validation accuracy)")

# Load and evaluate the best model
from tensorflow.keras.models import load_model
best_model = load_model('best_brain_tumor_model.keras')

print("\n--- Best Model Performance ---")
eval_results = best_model.evaluate(test_gen)
print(f"Testing Accuracy: {eval_results[1]:.4f}")
print(f"Testing Loss: {eval_results[0]:.4f}")

import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

test_gen.reset()
probs = best_model.predict(test_gen)
y_true = test_gen.classes

# If categorical (one-hot output)
if probs.shape[1] > 1:
    y_pred = np.argmax(probs, axis=1)
else:
    y_pred = (probs.ravel() >= 0.5).astype(int)

print(confusion_matrix(y_true, y_pred))
print(classification_report(y_true, y_pred, digits=4))
