<a href="https://colab.research.google.com/github/Adnane-Ahroum/BrainTumorPipeline/blob/main/classificationfinal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Brain Tumor Classification

This notebook has been modified to:
1. Add Weights & Biases (wandb) integration for experiment tracking
2. Properly load MATLAB data files from the repository

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import random
from tqdm import tqdm
import scipy.io as sio
import wandb

import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torch.optim import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

import cv2
import PIL
from PIL import Image

## Data Loading - Fixed to work with repository files

In [None]:
def load_matlab_data(file_path):
    """Load data from a MATLAB .mat file.

    Args:
        file_path: Path to the .mat file.
    Returns:
        images: Numpy array of images
        labels: Numpy array of labels
    """
    # Make sure the file_path has .mat extension
    if not file_path.endswith('.mat'):
        file_path = file_path + '.mat'

    # Verify that the file exists
    if not os.path.isfile(file_path):
        raise FileNotFoundError(f"MATLAB file not found: {file_path}")

    mat_data = sio.loadmat(file_path)

    # Print available keys to debug
    print(f"Available keys in {file_path}: {list(mat_data.keys())}")

    # Common keys in MATLAB files - try different possibilities
    possible_image_keys = ['images', 'image', 'Images', 'Image', 'data', 'Data', 'X', 'features']
    possible_label_keys = ['labels', 'label', 'Labels', 'Label', 'y', 'classes', 'Categories']

    # Try to find image data
    images = None
    for key in possible_image_keys:
        if key in mat_data:
            images = mat_data[key]
            print(f"Found images under key: {key}")
            break

    # Try to find label data
    labels = None
    for key in possible_label_keys:
        if key in mat_data:
            labels = mat_data[key]
            print(f"Found labels under key: {key}")
            break

    if images is None or labels is None:
        raise ValueError("Could not find image or label data in the MATLAB file")

    # Handle potential dimensionality issues
    if labels.ndim > 1 and labels.shape[1] > 1:
        print("Warning: Labels have multiple columns, using first column")
        labels = labels[:, 0]

    return images, labels.ravel()  # Ensure labels are flattened

In [None]:
# Initialize wandb project
wandb.init(
    project="brain-tumor-classification",
    name="classification-experiment",
    config={
        "learning_rate": 0.001,
        "epochs": 30,
        "batch_size": 16,
        "model": "ResNet50"
    }
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33madnaneahroum69[0m ([33madnaneahroum69-al-akhawayn-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [7]:
from google.colab import files
uploaded = files.upload()        # choose your dataset.zip


KeyboardInterrupt: 

In [9]:

train_dir = '/content/drive/MyDrive/DATASETCLASSIFICATION/Training'
test_dir  = '/content/drive/MyDrive/DATASETCLASSIFICATION/Testing'


LOADING THE DATA FROM MATLAB FILES


In [5]:
!pip install mat73

Collecting mat73
  Downloading mat73-0.65-py3-none-any.whl.metadata (3.6 kB)
Downloading mat73-0.65-py3-none-any.whl (19 kB)
Installing collected packages: mat73
Successfully installed mat73-0.65


In [12]:

import os
import scipy.io as sio
import numpy as np
from sklearn.model_selection import train_test_split

# Optional: install mat73 (pip install mat73) for v7.3 files
import mat73

# Define tumor classes and labels
CLASSES = ['glioma', 'pituitary', 'meningioma']
CLASS_TO_LABEL = {cls: idx for idx, cls in enumerate(CLASSES)}

def load_mat_data(path, data_key='cjdata'):
    """Load a single .mat file, using scipy for <=7.2 or mat73 for v7.3."""
    try:
        mat = sio.loadmat(path)
        if data_key in mat:
            return mat[data_key]
        raise KeyError(f"Key '{data_key}' not found in {path}")
    except NotImplementedError:
        # v7.3 file, fallback to mat73
        mat = mat73.loadmat(path)
        if data_key in mat:
            return mat[data_key]
        raise KeyError(f"Key '{data_key}' not found in {path} (v7.3)")


def load_data_for_three_tumors(root_dir, data_key='cjdata'):
    """Load images and labels for three tumor classes from subfolders."""
    images, labels = [], []
    for cls in CLASSES:
        subfolder = os.path.join(root_dir, cls)
        if not os.path.isdir(subfolder):
            raise FileNotFoundError(f"Missing folder: {subfolder}")
        for fname in os.listdir(subfolder):
            if not fname.endswith('.mat'): continue
            path = os.path.join(subfolder, fname)
            try:
                img = load_mat_data(path, data_key)
                images.append(img)
                labels.append(CLASS_TO_LABEL[cls])
            except Exception as e:
                print(f"Error loading {fname}: {e}")
    return np.array(images), np.array(labels)

# Usage in Colab after extracting dataset.zip
train_dir = '/content/drive/MyDrive/DATASETCLASSIFICATION/Training'
test_dir  = '/content/drive/MyDrive/DATASETCLASSIFICATION/Testing'

X_train, y_train = load_data_for_three_tumors(train_dir)
print(f"Training: {X_train.shape}, Labels: {y_train.shape}")

X_test, y_test = load_data_for_three_tumors(test_dir)
print(f"Testing:  {X_test.shape}, Labels: {y_test.shape}")

# Split train into train/val
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42, shuffle=True
)
print(f"After split → Train: {X_train.shape}, Val: {X_val.shape}")


Training: (2452,), Labels: (2452,)
Testing:  (612,), Labels: (612,)
After split → Train: (1961,), Val: (491,)


In [1]:
class BrainTumorDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        # Ensure the image is in the correct format (RGB, normalized, etc.)
        # This depends on your data format
        if image.ndim == 2:  # Convert grayscale to RGB if needed
            image = np.stack([image] * 3, axis=-1)

        # Convert to PIL Image for transformations
        image = Image.fromarray(image.astype('uint8'))

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create datasets and dataloaders
train_dataset = BrainTumorDataset(X_train, y_train, transform=train_transform)
val_dataset = BrainTumorDataset(X_val, y_val, transform=val_transform)

batch_size = wandb.config.batch_size
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

NameError: name 'Dataset' is not defined

##MODEL DEFINITION


In [None]:
class BrainTumorClassifier(nn.Module):
    def __init__(self, num_classes):
        super(BrainTumorClassifier, self).__init__()
        # Load pretrained ResNet50 model
        self.resnet = models.resnet50(pretrained=True)

        # Freeze all layers except the last few
        for param in list(self.resnet.parameters())[:-10]:
            param.requires_grad = False

        # Replace the final fully connected layer
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Linear(num_features, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        return self.resnet(x)

# Determine number of classes based on your data
num_classes = len(np.unique(labels))
print(f"Number of classes: {num_classes}")

# Initialize the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model = BrainTumorClassifier(num_classes).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=wandb.config.learning_rate)

## Training Function with wandb Integration

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    best_val_acc = 0.0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        train_correct = 0
        train_total = 0

        # Training phase
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

        train_loss = running_loss / train_total
        train_acc = train_correct / train_total

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        val_loss = val_loss / val_total
        val_acc = val_correct / val_total

        # Calculate confusion matrix
        conf_matrix = confusion_matrix(all_labels, all_preds)
        plt.figure(figsize=(10, 8))
        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')

        # Log to wandb
        wandb.log({
            'epoch': epoch + 1,
            'train_loss': train_loss,
            'train_accuracy': train_acc,
            'val_loss': val_loss,
            'val_accuracy': val_acc,
            'confusion_matrix': wandb.Image(plt)
        })

        plt.close()

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

        # Save the best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), 'best_brain_tumor_classifier.pth')
            wandb.save('best_brain_tumor_classifier.pth')
            print(f"Saved best model with validation accuracy: {val_acc:.4f}")

    return model

##TRAIN THE MODEL

In [None]:
# Train the model
num_epochs = wandb.config.epochs
trained_model = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs)

## EVALUATE THEMODEL

In [None]:
# Load the best model
model.load_state_dict(torch.load('best_brain_tumor_classifier.pth'))
model.eval()

# Evaluate on validation set
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Calculate and display metrics
accuracy = accuracy_score(all_labels, all_preds)
class_report = classification_report(all_labels, all_preds)
conf_mat = confusion_matrix(all_labels, all_preds)

print(f"Final Validation Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(class_report)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues')
plt.title('Final Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Log final metrics to wandb
wandb.log({
    'final_accuracy': accuracy,
    'final_confusion_matrix': wandb.Image(plt),
    'classification_report': wandb.Table(
        columns=["Class", "Precision", "Recall", "F1-Score", "Support"],
        data=[[i, *list(row.values())] for i, row in pd.DataFrame(classification_report(all_labels, all_preds, output_dict=True)).T.iterrows()]
    )
})

# Finish the wandb run
wandb.finish()