<a href="https://colab.research.google.com/github/RiyadhBenhallou/npaic-25/blob/main/brain_tumor_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Brain Tumor Segmentation Competition

This notebook contains the complete pipeline for the Brain Tumor Segmentation competition.

**Steps:**
1. Install Dependencies
2. Setup Config & Utils
3. Define Dataset & Model
4. Train Model
5. Run Inference on Public Test

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# 1. Install Dependencies
!pip install segmentation-models-pytorch albumentations

Collecting segmentation-models-pytorch
  Downloading segmentation_models_pytorch-0.5.0-py3-none-any.whl.metadata (17 kB)
Downloading segmentation_models_pytorch-0.5.0-py3-none-any.whl (154 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.8/154.8 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: segmentation-models-pytorch
Successfully installed segmentation-models-pytorch-0.5.0


In [None]:
# 2. Unzip Data (Adjust paths if you uploaded differently)
# !unzip -q train.zip
# !unzip -q publictest.zip

In [3]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader, Dataset as BaseDataset
from sklearn.model_selection import train_test_split
import segmentation_models_pytorch as smp
import albumentations as albu
import warnings

warnings.filterwarnings("ignore")

In [4]:
# 3. Configuration
DATA_DIR = '/content/drive/MyDrive/BBA AI Challenge 2025/train'
IMAGES_DIR = os.path.join(DATA_DIR, 'images')
MASKS_DIR = os.path.join(DATA_DIR, 'masks')
TEST_IMGS_DIR = './content/drive/MyDrive/BBA AI Challenge 2025/public_test/images'
OUTPUT_DIR = '/content/drive/MyDrive/BBA AI Challenge 2025/public_test/masks'

ENCODER = 'resnet34'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['tumor']
ACTIVATION = 'sigmoid'
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

BATCH_SIZE = 8
EPOCHS = 20
LR = 0.0001
NUM_WORKERS = 0

In [5]:
def get_training_augmentation():
    train_transform = [
        albu.HorizontalFlip(p=0.5),
        albu.VerticalFlip(p=0.5),
        albu.ShiftScaleRotate(scale_limit=0.1, rotate_limit=15, shift_limit=0.1, p=0.5, border_mode=0),
        albu.PadIfNeeded(min_height=512, min_width=512, always_apply=True, border_mode=0),
        albu.RandomCrop(height=512, width=512, always_apply=True),
        albu.GaussNoise(p=0.2),
        albu.Perspective(p=0.5),
        albu.OneOf(
            [
                albu.CLAHE(p=1),
                albu.RandomBrightnessContrast(p=1),
            ],
            p=0.9,
        ),
        albu.OneOf(
            [
                albu.Sharpen(p=1),
                albu.Blur(blur_limit=3, p=1),
                albu.MotionBlur(blur_limit=3, p=1),
            ],
            p=0.9,
        ),
        albu.OneOf(
            [
                albu.RandomBrightnessContrast(p=1),
                albu.HueSaturationValue(p=1),
            ],
            p=0.9,
        ),
    ]
    return albu.Compose(train_transform)

def get_validation_augmentation():
    # Standardize size
    test_transform = [
        albu.PadIfNeeded(512, 512)
    ]
    return albu.Compose(test_transform)

def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')

def get_preprocessing(preprocessing_fn):
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)

In [6]:
# 5. Dataset Class

class BrainTumorDataset(BaseDataset):
    def __init__(
            self,
            images_dir,
            masks_dir,
            image_ids=None,
            augmentation=None,
            preprocessing=None,
    ):
        self.ids = image_ids if image_ids is not None else os.listdir(images_dir)
        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
        self.masks_fps = [os.path.join(masks_dir, image_id) for image_id in self.ids]

        self.augmentation = augmentation
        self.preprocessing = preprocessing

    def __getitem__(self, i):
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        mask = cv2.imread(self.masks_fps[i], 0)
        # Binary mask > 127 is tumor
        mask = np.where(mask > 127, 1.0, 0.0)
        mask = np.expand_dims(mask, axis=-1)

        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']

        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']

        return image, mask

    def __len__(self):
        return len(self.ids)

In [7]:
# 6. Model & Loop Helpers

def get_custom_model():
    model = smp.Unet(
        encoder_name=ENCODER,
        encoder_weights=ENCODER_WEIGHTS,
        classes=1,
        activation=ACTIVATION,
    )
    return model

def train_epoch(model, loss, metrics, optimizer, loader, device):
    model.train()
    epoch_loss = 0
    epoch_iou = 0
    epoch_f1 = 0
    epoch_acc = 0

    for i, (images, masks) in enumerate(loader):
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        prediction = model(images)
        loss_value = loss(prediction, masks)
        loss_value.backward()
        optimizer.step()

        epoch_loss += loss_value.item()

        tp, fp, fn, tn = smp.metrics.get_stats(prediction, masks.long(), mode='binary', threshold=0.5)
        iou = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro")
        f1 = smp.metrics.f1_score(tp, fp, fn, tn, reduction="micro")
        acc = smp.metrics.accuracy(tp, fp, fn, tn, reduction="micro")

        epoch_iou += iou
        epoch_f1 += f1
        epoch_acc += acc

    return epoch_loss / len(loader), epoch_iou / len(loader), epoch_f1 / len(loader), epoch_acc / len(loader)

def valid_epoch(model, loss, metrics, loader, device):
    model.eval()
    epoch_loss = 0
    epoch_iou = 0
    epoch_f1 = 0
    epoch_acc = 0

    with torch.no_grad():
        for images, masks in loader:
            images = images.to(device)
            masks = masks.to(device)

            prediction = model(images)
            loss_value = loss(prediction, masks)

            epoch_loss += loss_value.item()

            tp, fp, fn, tn = smp.metrics.get_stats(prediction, masks.long(), mode='binary', threshold=0.5)
            iou = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro")
            f1 = smp.metrics.f1_score(tp, fp, fn, tn, reduction="micro")
            acc = smp.metrics.accuracy(tp, fp, fn, tn, reduction="micro")

            epoch_iou += iou
            epoch_f1 += f1
            epoch_acc += acc

    return epoch_loss / len(loader), epoch_iou / len(loader), epoch_f1 / len(loader), epoch_acc / len(loader)

In [None]:
# 7. Train
print(f"Using device: {DEVICE}")

preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

all_files = os.listdir(IMAGES_DIR)
train_ids, valid_ids = train_test_split(all_files, test_size=0.2, random_state=42)
print(f"Train size: {len(train_ids)}, Valid size: {len(valid_ids)}")

train_dataset = BrainTumorDataset(
    IMAGES_DIR,
    MASKS_DIR,
    image_ids=train_ids,
    augmentation=get_training_augmentation(),
    preprocessing=get_preprocessing(preprocessing_fn),
)

valid_dataset = BrainTumorDataset(
    IMAGES_DIR,
    MASKS_DIR,
    image_ids=valid_ids,
    augmentation=get_validation_augmentation(),
    preprocessing=get_preprocessing(preprocessing_fn),
)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

model = get_custom_model()
model.to(DEVICE)

loss = smp.losses.DiceLoss(mode='binary')
optimizer = torch.optim.Adam([dict(params=model.parameters(), lr=LR)])
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1, eta_min=1e-5, last_epoch=-1)

max_score = 0

print("Starting Training...")
for i in range(EPOCHS):
    print(f'\nEpoch: {i+1}/{EPOCHS} ({(i+1)/EPOCHS*100:.1f}%)')
    train_logs = train_epoch(model, loss, [], optimizer, train_loader, DEVICE)
    valid_logs = valid_epoch(model, loss, [], valid_loader, DEVICE)

    print(f"Train - Loss: {train_logs[0]:.4f}, IoU: {train_logs[1]:.4f}, F1: {train_logs[2]:.4f}, Acc: {train_logs[3]:.4f}")
    print(f"Valid - Loss: {valid_logs[0]:.4f}, IoU: {valid_logs[1]:.4f}, F1: {valid_logs[2]:.4f}, Acc: {valid_logs[3]:.4f}")

    scheduler.step()

    if valid_logs[1] > max_score:
        max_score = valid_logs[1]
        torch.save(model, './best_model.pth')
        print('Model saved!')

Using device: cuda
Train size: 1960, Valid size: 491
Starting Training...

Epoch: 1/20 (5.0%)
Train - Loss: 0.9513, IoU: 0.0719, F1: 0.1323, Acc: 0.6934
Valid - Loss: 0.9596, IoU: 0.1956, F1: 0.3239, Acc: 0.9366
Model saved!

Epoch: 2/20 (10.0%)
Train - Loss: 0.9479, IoU: 0.1416, F1: 0.2451, Acc: 0.8792
Valid - Loss: 0.9579, IoU: 0.2008, F1: 0.3301, Acc: 0.9351
Model saved!

Epoch: 3/20 (15.0%)
Train - Loss: 0.9457, IoU: 0.1829, F1: 0.3050, Acc: 0.9098
Valid - Loss: 0.9574, IoU: 0.2262, F1: 0.3638, Acc: 0.9447
Model saved!

Epoch: 4/20 (20.0%)
Train - Loss: 0.9446, IoU: 0.1856, F1: 0.3081, Acc: 0.9088
Valid - Loss: 0.9566, IoU: 0.2548, F1: 0.4017, Acc: 0.9533
Model saved!

Epoch: 5/20 (25.0%)
Train - Loss: 0.9444, IoU: 0.2071, F1: 0.3376, Acc: 0.9219
Valid - Loss: 0.9565, IoU: 0.2554, F1: 0.4020, Acc: 0.9528
Model saved!

Epoch: 6/20 (30.0%)
Train - Loss: 0.9442, IoU: 0.2288, F1: 0.3654, Acc: 0.9299
Valid - Loss: 0.9564, IoU: 0.2900, F1: 0.4431, Acc: 0.9598
Model saved!

Epoch: 7/20 (3

In [None]:
# 8. Inference
class TestDataset(Dataset):  # Re-define to capture notebook scope (preprocessing_fn)
    def __init__(self, images_dir):
        self.ids = os.listdir(images_dir)
        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]

        preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)
        self.preprocessing = self.get_preprocessing(preprocessing_fn)

    def get_preprocessing(self, preprocessing_fn):
        _transform = [
            albu.PadIfNeeded(512, 512),
            albu.Lambda(image=preprocessing_fn),
            albu.Lambda(image=to_tensor),
        ]
        return albu.Compose(_transform)

    def __getitem__(self, i):
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.preprocessing:
            sample = self.preprocessing(image=image)
            image = sample['image']
        return image, self.ids[i]
    def __len__(self):
        return len(self.ids)

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

print("Loading Best Model...")
best_model = torch.load('./best_model.pth', map_location=DEVICE)
best_model.eval()

test_dataset = TestDataset(TEST_IMGS_DIR)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

print("Generating Predictions...")
with torch.no_grad():
    for images, ids in test_loader:
        images = images.to(DEVICE)

        # TTA Horizontal Flip
        pr_mask = best_model(images)
        images_flip = torch.flip(images, dims=[3])
        pr_mask_flip = best_model(images_flip)
        pr_mask_flip = torch.flip(pr_mask_flip, dims=[3])
        pr_mask = (pr_mask + pr_mask_flip) / 2.0

        pr_mask = (pr_mask > 0.5).float().cpu().numpy().squeeze()

        for i, image_id in enumerate(ids):
            mask = pr_mask[i]
            mask = (mask * 255).astype(np.uint8)
            # Resize if necessary (if input was padding, we might need to crop, but 512 is target)
            if mask.shape != (512, 512):
                 mask = cv2.resize(mask, (512, 512), interpolation=cv2.INTER_NEAREST)
            cv2.imwrite(os.path.join(OUTPUT_DIR, image_id), mask)

print("Done!")

!zip -r predictions.zip predictions