# 2D Convolutional Network for Brain Tumor Segmentation

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# neural imaging
import nibabel as nib

import os
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append("../../")
import utils.hpc
if not utils.hpc.running_on_hpc():
    import kagglehub

import torch
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

from utils.datasets import BraTSDataset_2D
from utils.models import UNet
from utils.losses import CombinedLoss

## config

In [3]:
# Training configuration
NUM_EPOCHS = 10
MODEL_NAME = 'v1_3d' # change for each model!!
MODEL_SAVE_PATH = f'checkpoints/{MODEL_NAME}.pth'
BATCH_SIZE = 64
NUM_WORKERS = 1
SLICE_RANGE = (22, 122)
N_SLICES = 5 # for 2.5D model

## Download the brats dataset

In [4]:
if utils.hpc.running_on_hpc():
    TRAIN_DATASET_PATH = utils.hpc.load_dataset_into_ram()
else:
    # Download latest version
    path = kagglehub.dataset_download("awsaf49/brats20-dataset-training-validation")
    print("Path to dataset files:", path)
    TRAIN_DATASET_PATH = path + '/BraTS2020_TrainingData/MICCAI_BraTS2020_TrainingData/'

## Create the dataset class

In [5]:
all_patients = sorted([d for d in os.listdir(TRAIN_DATASET_PATH)
                      if os.path.isdir(os.path.join(TRAIN_DATASET_PATH, d))])

# First split: 70% train, 30% temp
train_patients, temp_patients = train_test_split(
    all_patients, test_size=0.30, random_state=42
)

# Second split: split temp into 50/50 (15% val, 15% test of original)
val_patients, test_patients = train_test_split(
    temp_patients, test_size=0.50, random_state=42
)

print(f"Train: {len(train_patients)} patients")
print(f"Val: {len(val_patients)} patients")
print(f"Test: {len(test_patients)} patients")


# Create 2.5D dataset
train_dataset = BraTSDataset_2D(train_patients, TRAIN_DATASET_PATH, slice_range=SLICE_RANGE)
val_dataset = BraTSDataset_2D(val_patients, TRAIN_DATASET_PATH, slice_range=SLICE_RANGE)
test_dataset = BraTSDataset_2D(test_patients, TRAIN_DATASET_PATH, slice_range=SLICE_RANGE)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

Train: 258 patients
Val: 55 patients
Test: 56 patients


In [6]:
# Set device - prioritize MPS for Mac, then CUDA, then CPU
if torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    
print(f"Using device: {device}")

Using device: cuda


In [7]:
# Initialize model
model = UNet(n_channels=4, n_classes=4).to(device)

# Loss function - CrossEntropyLoss for multi-class segmentation
criterion = CombinedLoss()

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5, factor=0.5)

print(f"Model has {sum(p.numel() for p in model.parameters())} parameters")

Model has 31044292 parameters


## Training Loop

In [8]:
save_history_fn = lambda history : utils.visualizations.history_to_json(
    history=history,
    model_name=MODEL_NAME,
    save_dir='logs',
    # Configuration parameters
    batch_size=BATCH_SIZE,
    slice_range=SLICE_RANGE,
    num_epochs=NUM_EPOCHS,
    optimizer='Adam',
    loss_function='CombinedLoss',
    model_type='UNet',
    model_parameters=sum(p.numel() for p in model.parameters()),
    dataset_type='2D',
    early_stopping_patience=10,
    augmentations=False,
    device=str(device),
)

In [None]:
# Run training loop
history = utils.training.train_loop(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    criterion=criterion,
    optimizer=optimizer,
    num_epochs=NUM_EPOCHS,
    device=device,
    primary_metric='dice',
    scheduler=scheduler,
    save_best_model=True,
    model_save_path=MODEL_SAVE_PATH,
    early_stopping_patience=10,
    save_history_fn=save_history_fn
)

Epoch 1/10


Training:   9%|▊         | 35/404 [01:48<18:50,  3.06s/it, loss=0.9705]

In [None]:
utils.visualizations.plot_detailed_training_history(
    history=history,
    save_path=f'assets/{MODEL_NAME}/detailed_training_history.png'
)

NameError: name 'plot_detailed_training_history' is not defined

In [None]:
# Save history to JSON
utils.visualizations.history_to_json(
    history=history,
    model_name=MODEL_NAME,
    save_dir='logs',
    # Configuration parameters
    batch_size=BATCH_SIZE,
    slice_range=SLICE_RANGE,
    num_epochs=NUM_EPOCHS,
    optimizer='Adam',
    loss_function='CombinedLoss',
    model_type='UNet',
    model_parameters=sum(p.numel() for p in model.parameters()),
    dataset_type='2D',  # oder '2.5D'
    early_stopping_patience=10,
    augmentations=False,
    device=str(device),
)

✓ Saved training history to logs/v1_2d_history.json


'logs/v1_2d_history.json'