In [None]:
# ===========================
# Google Colab – Dataset Setup
# ===========================

# 1. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# 2. Install kagglehub if not installed
!pip install -q kagglehub

# 3. Import
import kagglehub
import shutil
import os

# 4. Download dataset from Kaggle
print("Downloading dataset...")
source_path = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")
print("Downloaded to:", source_path)

# 5. Target path in your Drive
target_path = "/content/drive/MyDrive/brain_tumor_dataset"

# 6. Remove old folder if exists (optional)
if os.path.exists(target_path):
    print("Removing old dataset folder...")
    shutil.rmtree(target_path)

# 7. Copy dataset to Drive
print("Copying dataset to Google Drive...")
shutil.copytree(source_path, target_path)

print("\n✅ Dataset is ready at:")
print(target_path)


In [None]:
from google.colab import drive
import os
import json
import random
from pathlib import Path
from tqdm import tqdm

import numpy as np
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as T
import torchvision.models as models

from sklearn.metrics import classification_report, confusion_matrix

# Mount Drive first
drive.mount('/content/drive')

# --- Removed steps 2, 3, 4, 5 (Download and Copy) as the dataset already exists ---

# ---------------------------
# CONFIG
# ---------------------------
DATA_ROOT = "/content/drive/MyDrive/brain_tumor_dataset"
TRAIN_FOLDER = os.path.join(DATA_ROOT, "Training")
TEST_FOLDER = os.path.join(DATA_ROOT, "Testing")

BATCH_SIZE = 16
NUM_WORKERS = 4
NUM_EPOCHS = 15
LR = 1e-4
WEIGHT_DECAY = 1e-5
RANDOM_SEED = 42
INPUT_SIZE = 224  # model input
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# UPDATED: Use a full path for the model save file
MODEL_SAVE_PATH = "/content/drive/MyDrive/brain_tumor_model_best.pth"
# UPDATED: Use a full path for the labels save file
LABELS_SAVE = "/content/drive/MyDrive/label_map.json"

# ---------------------------
# Dataset
# ---------------------------
class ImageFolderDataset(Dataset):
    """
    Simple dataset that expects folders per class:
    root/class_x/xxx.png
    root/class_y/yyy.png
    """
    def __init__(self, root_dir, transform=None):
        self.root_dir = Path(root_dir)
        self.transform = transform
        self.samples = []
        self.class_to_idx = {}
        classes = sorted([d.name for d in self.root_dir.iterdir() if d.is_dir()])
        for idx, cls in enumerate(classes):
            self.class_to_idx[cls] = idx
            cls_dir = self.root_dir / cls
            for p in cls_dir.iterdir():
                if p.suffix.lower() in [".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff"]:
                    self.samples.append((str(p), idx))
        # sanity:
        if len(self.samples) == 0:
            raise RuntimeError(f"No images found in {root_dir}. Check path and file extensions.")
    def __len__(self):
        return len(self.samples)
    def __getitem__(self, i):
        path, label = self.samples[i]
        # Open and convert to RGB (some MRIs might be grayscale)
        image = Image.open(path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# ---------------------------
# Transforms
# ---------------------------
train_transform = T.Compose([
    T.RandomResizedCrop(INPUT_SIZE, scale=(0.8, 1.0)),
    T.RandomHorizontalFlip(),
    T.RandomRotation(15),
    T.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

val_transform = T.Compose([
    T.Resize((INPUT_SIZE, INPUT_SIZE)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# ---------------------------
# Helpers: model creation
# ---------------------------
def create_model(num_classes):
    # Try EfficientNet_B0 if available, otherwise ResNet50
    try:
        # torchvision.models.efficientnet_b0 is available in newer torchvision
        model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
        # Freeze all layers
        # for param in model.parameters():
        #     param.requires_grad = False

        # replace classifier
        # EfficientNet has a sequential classifier; the final layer is index 1
        model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
        print("Using EfficientNet-B0")
    except Exception as e:
        print("EfficientNet-B0 failed, falling back to ResNet50:", e)
        model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        # Freeze all layers
        # for param in model.parameters():
        #     param.requires_grad = False

        # replace final fully connected layer
        model.fc = nn.Linear(model.fc.in_features, num_classes)
        print("Using ResNet50")

    return model

# ---------------------------
# Training / Validation loops
# ---------------------------
def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(loader, desc="train", leave=False):
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, preds = outputs.max(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    avg_loss = running_loss / total
    acc = correct / total
    return avg_loss, acc

def eval_model(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="eval", leave=False):
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)
            _, preds = outputs.max(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            all_preds.extend(preds.cpu().numpy().tolist())
            all_labels.extend(labels.cpu().numpy().tolist())
    avg_loss = running_loss / total
    acc = correct / total
    return avg_loss, acc, np.array(all_preds), np.array(all_labels)

# ---------------------------
# Main training function
# ---------------------------
def main():
    torch.manual_seed(RANDOM_SEED)
    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)

    # create dataset
    try:
        full_train_dataset = ImageFolderDataset(TRAIN_FOLDER, transform=train_transform)
    except RuntimeError as e:
        print(f"ERROR: Could not load data. Please check if {TRAIN_FOLDER} exists and contains images.")
        print(f"Details: {e}")
        return

    # save class map
    class_to_idx = full_train_dataset.class_to_idx
    # The keys in idx_to_class should be integers, so we cast them back.
    idx_to_class = {str(v): k for k, v in class_to_idx.items()}
    with open(LABELS_SAVE, "w") as f:
        json.dump(idx_to_class, f, indent=2)
    print(f"Class map saved to: {LABELS_SAVE}")
    print("Classes:", class_to_idx)
    num_classes = len(class_to_idx)

    # train/val split (e.g., 90/10)
    n_total = len(full_train_dataset)
    # Ensure there is at least 1 image in the validation set
    n_val = max(1, int(0.10 * n_total))
    n_train = n_total - n_val

    # Make sure we have a valid split
    if n_train <= 0:
        print("ERROR: Not enough data for a train/validation split.")
        return

    train_dataset, val_dataset = random_split(full_train_dataset, [n_train, n_val],
                                              generator=torch.Generator().manual_seed(RANDOM_SEED))

    # A quirk of random_split: the transform needs to be set on the underlying dataset
    # and we need a way to ensure the validation split uses the validation transform.
    # The original approach is a common pattern but can be error-prone with DataLoaders.
    # Let's create two separate dataset objects to ensure correct transforms are used.

    # Re-initialize the validation dataset with the val_transform
    # This is more robust than changing the transform on the shared underlying dataset
    val_dataset_eval = ImageFolderDataset(TRAIN_FOLDER, transform=val_transform)
    # Filter the evaluation dataset to only include the indices from the validation split
    val_dataset_eval.samples = [val_dataset_eval.samples[i] for i in val_dataset.indices]


    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
    # Use the new val_dataset_eval for the loader
    val_loader = DataLoader(val_dataset_eval, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)


    model = create_model(num_classes).to(DEVICE)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2)

    best_val_acc = 0.0
    for epoch in range(1, NUM_EPOCHS+1):
        print(f"\n=== Epoch {epoch}/{NUM_EPOCHS} ===")
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_acc, val_preds, val_labels = eval_model(model, val_loader, criterion)
        print(f"Train loss: {train_loss:.4f}  acc: {train_acc:.4f}")
        print(f"Val   loss: {val_loss:.4f}  acc: {val_acc:.4f}")

        scheduler.step(val_acc)

        # print classification report for this epoch
        print("\nVal classification report:")
        # Corrected: target_names from the idx_to_class map
        sorted_class_names = [idx_to_class[str(i)] for i in range(num_classes)]
        print(classification_report(val_labels, val_preds, target_names=sorted_class_names, zero_division=0))

        # save best
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save({
                "model_state_dict": model.state_dict(),
                "class_to_idx": class_to_idx,
                "input_size": INPUT_SIZE
            }, MODEL_SAVE_PATH)
            print(f"Saved best model to {MODEL_SAVE_PATH} (val_acc={best_val_acc:.4f})")

    # final evaluation on TEST_FOLDER (if provided)
    if os.path.exists(TEST_FOLDER) and len(os.listdir(TEST_FOLDER)) > 0:
        print("\nRunning final evaluation on test set...")
        try:
            test_dataset = ImageFolderDataset(TEST_FOLDER, transform=val_transform)
        except RuntimeError as e:
            print(f"Could not load test data. Details: {e}")
            return

        test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
        # Ensure the number of classes is consistent
        if len(test_dataset.class_to_idx) != num_classes:
            print("WARNING: Test set has a different number of classes than the training set.")
            # Proceed, but use the class names from the training set for the report

        test_loss, test_acc, test_preds, test_labels = eval_model(model, test_loader, criterion)
        print(f"Test loss: {test_loss:.4f}  acc: {test_acc:.4f}")
        print("Test classification report:")
        # Use the training set class names for the report
        print(classification_report(test_labels, test_preds, target_names=sorted_class_names, zero_division=0))
    else:
        print(f"\nSkipping test evaluation: {TEST_FOLDER} does not exist or is empty.")

if __name__ == "__main__":
    main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Class map saved to: /content/drive/MyDrive/label_map.json
Classes: {'glioma': 0, 'meningioma': 1, 'notumor': 2, 'pituitary': 3}




Using EfficientNet-B0

=== Epoch 1/15 ===




Train loss: 0.4342  acc: 0.8599
Val   loss: 0.1566  acc: 0.9457

Val classification report:
              precision    recall  f1-score   support

      glioma       0.98      0.88      0.93       128
  meningioma       0.87      0.92      0.89       129
     notumor       0.96      1.00      0.98       149
   pituitary       0.97      0.96      0.97       165

    accuracy                           0.95       571
   macro avg       0.95      0.94      0.94       571
weighted avg       0.95      0.95      0.95       571

Saved best model to /content/drive/MyDrive/brain_tumor_model_best.pth (val_acc=0.9457)

=== Epoch 2/15 ===




Train loss: 0.1641  acc: 0.9459
Val   loss: 0.0879  acc: 0.9790

Val classification report:
              precision    recall  f1-score   support

      glioma       0.98      0.98      0.98       128
  meningioma       0.96      0.97      0.97       129
     notumor       0.98      1.00      0.99       149
   pituitary       0.99      0.97      0.98       165

    accuracy                           0.98       571
   macro avg       0.98      0.98      0.98       571
weighted avg       0.98      0.98      0.98       571

Saved best model to /content/drive/MyDrive/brain_tumor_model_best.pth (val_acc=0.9790)

=== Epoch 3/15 ===




Train loss: 0.1011  acc: 0.9654
Val   loss: 0.0559  acc: 0.9790

Val classification report:
              precision    recall  f1-score   support

      glioma       0.99      0.96      0.98       128
  meningioma       0.94      0.98      0.96       129
     notumor       0.99      1.00      1.00       149
   pituitary       0.99      0.98      0.98       165

    accuracy                           0.98       571
   macro avg       0.98      0.98      0.98       571
weighted avg       0.98      0.98      0.98       571


=== Epoch 4/15 ===




Train loss: 0.0751  acc: 0.9761
Val   loss: 0.0370  acc: 0.9895

Val classification report:
              precision    recall  f1-score   support

      glioma       0.99      0.98      0.98       128
  meningioma       0.96      0.99      0.98       129
     notumor       1.00      1.00      1.00       149
   pituitary       1.00      0.99      0.99       165

    accuracy                           0.99       571
   macro avg       0.99      0.99      0.99       571
weighted avg       0.99      0.99      0.99       571

Saved best model to /content/drive/MyDrive/brain_tumor_model_best.pth (val_acc=0.9895)

=== Epoch 5/15 ===




Train loss: 0.0498  acc: 0.9823
Val   loss: 0.0327  acc: 0.9912

Val classification report:
              precision    recall  f1-score   support

      glioma       0.99      0.99      0.99       128
  meningioma       0.99      0.99      0.99       129
     notumor       0.98      1.00      0.99       149
   pituitary       1.00      0.98      0.99       165

    accuracy                           0.99       571
   macro avg       0.99      0.99      0.99       571
weighted avg       0.99      0.99      0.99       571

Saved best model to /content/drive/MyDrive/brain_tumor_model_best.pth (val_acc=0.9912)

=== Epoch 6/15 ===




Train loss: 0.0436  acc: 0.9864
Val   loss: 0.0191  acc: 0.9965

Val classification report:
              precision    recall  f1-score   support

      glioma       1.00      0.99      1.00       128
  meningioma       0.98      1.00      0.99       129
     notumor       1.00      1.00      1.00       149
   pituitary       1.00      0.99      1.00       165

    accuracy                           1.00       571
   macro avg       1.00      1.00      1.00       571
weighted avg       1.00      1.00      1.00       571

Saved best model to /content/drive/MyDrive/brain_tumor_model_best.pth (val_acc=0.9965)

=== Epoch 7/15 ===




Train loss: 0.0327  acc: 0.9914
Val   loss: 0.0205  acc: 0.9965

Val classification report:
              precision    recall  f1-score   support

      glioma       1.00      0.99      1.00       128
  meningioma       0.98      1.00      0.99       129
     notumor       1.00      1.00      1.00       149
   pituitary       1.00      0.99      1.00       165

    accuracy                           1.00       571
   macro avg       1.00      1.00      1.00       571
weighted avg       1.00      1.00      1.00       571


=== Epoch 8/15 ===




Train loss: 0.0341  acc: 0.9905
Val   loss: 0.0267  acc: 0.9895

Val classification report:
              precision    recall  f1-score   support

      glioma       1.00      0.99      1.00       128
  meningioma       0.96      0.99      0.98       129
     notumor       1.00      1.00      1.00       149
   pituitary       0.99      0.98      0.98       165

    accuracy                           0.99       571
   macro avg       0.99      0.99      0.99       571
weighted avg       0.99      0.99      0.99       571


=== Epoch 9/15 ===




Train loss: 0.0290  acc: 0.9911
Val   loss: 0.0287  acc: 0.9895

Val classification report:
              precision    recall  f1-score   support

      glioma       1.00      0.98      0.99       128
  meningioma       0.98      0.98      0.98       129
     notumor       0.97      1.00      0.99       149
   pituitary       1.00      0.99      0.99       165

    accuracy                           0.99       571
   macro avg       0.99      0.99      0.99       571
weighted avg       0.99      0.99      0.99       571


=== Epoch 10/15 ===




Train loss: 0.0242  acc: 0.9928
Val   loss: 0.0207  acc: 0.9895

Val classification report:
              precision    recall  f1-score   support

      glioma       1.00      0.98      0.99       128
  meningioma       0.97      1.00      0.98       129
     notumor       0.99      1.00      0.99       149
   pituitary       1.00      0.98      0.99       165

    accuracy                           0.99       571
   macro avg       0.99      0.99      0.99       571
weighted avg       0.99      0.99      0.99       571


=== Epoch 11/15 ===




Train loss: 0.0137  acc: 0.9959
Val   loss: 0.0239  acc: 0.9930

Val classification report:
              precision    recall  f1-score   support

      glioma       1.00      0.98      0.99       128
  meningioma       0.98      1.00      0.99       129
     notumor       0.99      1.00      0.99       149
   pituitary       1.00      0.99      0.99       165

    accuracy                           0.99       571
   macro avg       0.99      0.99      0.99       571
weighted avg       0.99      0.99      0.99       571


=== Epoch 12/15 ===




Train loss: 0.0118  acc: 0.9971
Val   loss: 0.0243  acc: 0.9912

Val classification report:
              precision    recall  f1-score   support

      glioma       1.00      0.99      1.00       128
  meningioma       0.98      1.00      0.99       129
     notumor       0.99      1.00      0.99       149
   pituitary       1.00      0.98      0.99       165

    accuracy                           0.99       571
   macro avg       0.99      0.99      0.99       571
weighted avg       0.99      0.99      0.99       571


=== Epoch 13/15 ===




Train loss: 0.0134  acc: 0.9951
Val   loss: 0.0319  acc: 0.9877

Val classification report:
              precision    recall  f1-score   support

      glioma       0.99      0.99      0.99       128
  meningioma       0.97      0.99      0.98       129
     notumor       0.99      1.00      0.99       149
   pituitary       1.00      0.97      0.98       165

    accuracy                           0.99       571
   macro avg       0.99      0.99      0.99       571
weighted avg       0.99      0.99      0.99       571


=== Epoch 14/15 ===




Train loss: 0.0097  acc: 0.9975
Val   loss: 0.0182  acc: 0.9930

Val classification report:
              precision    recall  f1-score   support

      glioma       1.00      0.98      0.99       128
  meningioma       0.98      1.00      0.99       129
     notumor       0.99      1.00      0.99       149
   pituitary       1.00      0.99      0.99       165

    accuracy                           0.99       571
   macro avg       0.99      0.99      0.99       571
weighted avg       0.99      0.99      0.99       571


=== Epoch 15/15 ===




Train loss: 0.0101  acc: 0.9969
Val   loss: 0.0164  acc: 0.9947

Val classification report:
              precision    recall  f1-score   support

      glioma       1.00      0.99      1.00       128
  meningioma       0.99      1.00      1.00       129
     notumor       0.99      1.00      0.99       149
   pituitary       1.00      0.99      0.99       165

    accuracy                           0.99       571
   macro avg       0.99      1.00      0.99       571
weighted avg       0.99      0.99      0.99       571


Running final evaluation on test set...


                                                     

Test loss: 0.0217  acc: 0.9954
Test classification report:
              precision    recall  f1-score   support

      glioma       1.00      0.99      0.99       300
  meningioma       0.98      1.00      0.99       306
     notumor       1.00      1.00      1.00       405
   pituitary       1.00      0.99      0.99       300

    accuracy                           1.00      1311
   macro avg       1.00      1.00      1.00      1311
weighted avg       1.00      1.00      1.00      1311



