In [None]:
# %% [markdown]
# # LongiTumorSense Model Training
# **Training on MU-Glioma-Post Dataset**
# - Segmentation: nnUNet
# - Classification: 3D DenseNet
# - Survival: CoxPH Model

In [None]:
!pip install monai torch torchvision nnunet pyradiomics lifelines pydicom nibabel wandb -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m276.6/276.6 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.5/34.5 MB[0m [31m53.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.5/34.5 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.3/156.3 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... 

In [None]:
import nibabel as nib
import numpy  as np
from sklearn.model_selection import train_test_split
import torch
import os
import monai
from monai.data import Dataset ,DataLoader
from monai.transforms import ( Compose , LoadImaged , EnsureChannelFirstd, ScaleIntensityd,RandRotated,RandFlipd,RandZoomd,ToTensord)
from monai.networks.nets import DenseNet121,Unet
from monai.metrics import DiceMetric
from monai.losses import DiceLoss, FocalLoss
import wandb
import pandas as pd
from lifelines import CoxPHFitter


In [None]:
import torch

In [None]:
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(f"Using {device} device.")


Using cpu device.


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


**Data Clearner and saved into the drive**

In [None]:
import os
import shutil


raw_root = "/content/drive/My Drive/MU-Glioma-Post"

output_root="/content/drive/My Drive/clean_data"



imagesTr = os.path.join(output_root, "imagesTr")
labelsTr = os.path.join(output_root, "labelsTr")
os.makedirs(os.path.join(output_root, "imagesTs"), exist_ok=True)
os.makedirs(imagesTr, exist_ok=True)
os.makedirs(labelsTr, exist_ok=True)

print("Raw dataset path:", raw_root)
print("nnU-Net dataset path:", output_root)


progress_file = os.path.join(output_root, "converted_cases.txt")

if os.path.exists(progress_file):
    with open(progress_file, "r") as f:
        converted_cases = set(line.strip() for line in f)
else:
    converted_cases = set()
print(f"Found {len(converted_cases)} cases already processed.")

Raw dataset path: /content/drive/My Drive/MU-Glioma-Post
nnU-Net dataset path: /content/drive/My Drive/clean_data
Found 4 cases already processed.


In [None]:
def is_nifti(fname):
  return fname.endswith(".nii") or fname.endswith(".nii.gz")

In [None]:
mod_priority=[
      't1c','t1gd','t1ce',  # contrast-enhanced T1 variants
    't1n','t1',           # native T1
    'flair','t2f','t2flair','t2w','t2' # T2 /flair variants
]

In [None]:
def file_priority(fname):
  lf=fname.lower()
  for i,k in enumerate(mod_priority):
    if k in lf:
      return i
  return len(mod_priority) + hash(lf) % 1000

In [None]:
import re
import os
import shutil
from tqdm import tqdm
canonical_modalities = None




skipped = []
new_cases_count = 0





total_timepoints = sum(
    1 for p in sorted(os.listdir(raw_root))
    if os.path.isdir(os.path.join(raw_root, p))
    for tp in sorted(os.listdir(os.path.join(raw_root, p)))
    if os.path.isdir(os.path.join(raw_root, p, tp))
)







with tqdm(total=total_timepoints, desc="Processing cases") as pbar:
    for patient_id in sorted(os.listdir(raw_root)):
        patient_path = os.path.join(raw_root, patient_id)
        if not os.path.isdir(patient_path):
            pbar.update(1)
            continue






        for tp in sorted(os.listdir(patient_path)):
            tp_path = os.path.join(patient_path, tp)
            if not os.path.isdir(tp_path):
                pbar.update(1)
                continue



            tp_clean = re.sub(r"\s+", "_", tp)
            tp_clean = re.sub(r"[^A-Za-z0-9_-]", "_", tp_clean)
            case_id = f"{patient_id}_{tp_clean}"



            if case_id in converted_cases:
                pbar.update(1)
                continue


            files = [f for f in os.listdir(tp_path) if is_nifti(f)]
            if not files:
                skipped.append((patient_id, tp, "no nifti files"))
                pbar.update(1)
                continue



            label_candidates = [f for f in files if any(x in f.lower() for x in ["mask", "tumor", "seg", "label"])]
            if len(label_candidates) == 0:
                skipped.append((patient_id, tp, "no label found"))
                pbar.update(1)
                continue


            label_file = label_candidates[0]



            image_files = [f for f in files if f != label_file]
            if len(image_files) == 0:
                skipped.append((patient_id, tp, "no image files"))
                pbar.update(1)
                continue



            image_files_sorted = sorted(image_files, key=file_priority)
            if canonical_modalities is None:
                canonical_modalities = image_files_sorted.copy()
                print("\nDetected modality order (from first sample)")
                for idx, nm in enumerate(canonical_modalities):
                    print(f"{idx}: {nm}")
                print("If this order is wrong adjust mod_priority list in the script.")


            else:
                if len(image_files_sorted) != len(canonical_modalities):
                    skipped.append(
                        (patient_id, tp, f"modality count mismatch {len(image_files_sorted)} vs {len(canonical_modalities)}")
                    )
                    pbar.update(1)
                    continue



            for i, fname in enumerate(image_files_sorted):
                src = os.path.join(tp_path, fname)
                destination = os.path.join(imagesTr, f"{case_id}_{i:04d}.nii.gz")
                shutil.copy(src, destination)

            shutil.copy2(os.path.join(tp_path, label_file), os.path.join(labelsTr, f"{case_id}.nii.gz"))



            converted_cases.add(case_id)
            with open(progress_file, "a") as f:
                f.write(case_id + "\n")

            new_cases_count += 1
            pbar.update(1)

print(f"\nConversion finished. {len(converted_cases)} total cases processed so far.")
if skipped:
    print(f"{len(skipped)} timepoints skipped (see sample):")
    for s in skipped[:10]:
        print(" ", s)

print(f"imagesTr files: {len(os.listdir(imagesTr))}, labelsTr files: {len(os.listdir(labelsTr))}")
print(f"Newly processed this run: {new_cases_count}")

**Get clean data from drive into local colab for further processing**

In [None]:
from tqdm import tqdm
import os
import shutil


drive_clean_path = "/content/drive/MyDrive/clean_data"
local_clean_path = "/content/clean_data_local"

os.makedirs(local_clean_path,exist_ok=True)


all_files=[]

for root,dirs,files in os.walk(drive_clean_path):
    for file in files:
      source_file=os.path.join(root,file)
      relative_path=os.path.relpath(source_file,drive_clean_path)
      destination_file=os.path.join(local_clean_path,relative_path)
      all_files.append((source_file, destination_file))


remaining_files=[]
for source_file,destination_file in all_files:
    if os.path.exists(destination_file)and os.path.getsize(destination_file)==os.path.getsize(source_file):
       continue
    remaining_files.append((source_file,destination_file))

for source_file,destination_file in tqdm(remaining_files, desc="copying files", unit="files"):
    os.makedirs(os.path.dirname(destination_file),exist_ok=True)
    shutil.copy2(source_file , destination_file)

print(f"copy complete!{len(all_files)-len(remaining_files)} files already exists,{len(remaining_files)} new files copied")
print(" Clean dataset loaded from Drive.")


copying files: 100%|██████████| 2972/2972 [13:06<00:00,  3.78files/s]

copy complete!0 files already exists,2972 new files copied
 Clean dataset loaded from Drive.





**This is for checking the length of file for each imageTr and labelTr**

In [None]:
imagesTr_path=os.path.join(local_clean_path,"imagesTr")
labelsTr_path=os.path.join(local_clean_path,"labelsTr")

length_imageTr=len([f for f in os.listdir(imagesTr_path) if os.path.isfile(os.path.join(imagesTr_path,f))])
length_labelsTr=len([f for f in os.listdir(labelsTr_path) if os.path.isfile(os.path.join(labelsTr_path,f))])

print(f" imageTr files:{length_imageTr}")
print(f" labelsTr files:{length_labelsTr}")

 imageTr files:2376
 labelsTr files:594


**After Disconnect:**

In [None]:
!pip install monai torch torchvision nnunet pyradiomics lifelines pydicom nibabel wandb -q

**Install a Python package directly from its GitHub source code, not from the normal package store (PyPI).”**

**This function loads an MRI file, converts it to a NumPy array, and scales all values to between 0 and 1 for easier analysis.**

In [None]:
!pip install git+https://github.com/MIC-DKFZ/nnUNet.git

In [None]:
import nibabel as nib
import numpy  as np

def load_and_preprocess(patient_path):
    img = nib.load(patient_path)
    data = img.get_fdata()
    data = (data - np.min(data)) / (np.max(data) - np.min(data))
    return data

**Renames the file into nnuNet naming style**

**Copy and rename image files**

**Copy and rename label files**


In [None]:
import os
import re
import shutil
from glob import glob
from tqdm import tqdm

source_images = "/content/drive/MyDrive/clean_data/imagesTr"
source_labels = "/content/drive/MyDrive/clean_data/labelsTr"

destination_imagesTr = "/content/clean_data_local/imagesTr"
destination_labelsTr = "/content/clean_data_local/labelsTr"

os.makedirs(destination_imagesTr, exist_ok=True)
os.makedirs(destination_labelsTr, exist_ok=True)

image_files = glob(os.path.join(source_images, "*.nii.gz"))
label_files = glob(os.path.join(source_labels, "*.nii.gz"))



print(f"Copying {len(image_files)} image files...")


for scan_path in tqdm(image_files, desc="Images copied", unit="file"):
    filename = os.path.basename(scan_path)
    match = re.match(r"(PatientID_\d+)_Timepoint_(\d+)_(\d{4})\.nii\.gz", filename)
    if match:
        patient_id, timepoint, modality_idx = match.groups()
        case_id = f"{patient_id}_Timepoint_{timepoint}"
        destination_path = os.path.join(destination_imagesTr, f"{case_id}_{modality_idx}.nii.gz")
        if scan_path != destination_path:
            shutil.copy(scan_path, destination_path)

print(f"Copying {len(label_files)} label files...")


for label_path in tqdm(label_files, desc="Labels copied", unit="file"):
    filename = os.path.basename(label_path)
    match = re.match(r"(PatientID_\d+)_Timepoint_(\d+)\.nii\.gz", filename)
    if match:
        patient_id, timepoint = match.groups()
        case_id = f"{patient_id}_Timepoint_{timepoint}"
        dst_path = os.path.join(destination_labelsTr, f"{case_id}.nii.gz")
        if label_path != dst_path:
            shutil.copy(label_path, dst_path)

print(f"Total copied {len(os.listdir(destination_imagesTr))} scans to {destination_imagesTr}")
print(f"Total copied {len(os.listdir(destination_labelsTr))} labels to {destination_labelsTr}")


Copying 2376 image files...


Images copied: 100%|██████████| 2376/2376 [06:36<00:00,  6.00file/s]


Copying 594 label files...


Labels copied: 100%|██████████| 594/594 [02:21<00:00,  4.19file/s]

Total copied 2376 scans to /content/clean_data_local/imagesTr
Total copied 594 labels to /content/clean_data_local/labelsTr





In [None]:
imagesTr_path = "/content/clean_data_local/imagesTr"
labelsTr_path = "/content/clean_data_local/labelsTr"


def clean_zero_byte_files(folder_path):
     deleted_files=[]
     for root,_, files in os.walk(folder_path):
         for file in files:
             file_path=os.path.join(root,file)
             if os.path.getsize(file_path)==0:
                print(f"Deleting 0-byte file : {file_path}")
                os.remove(file_path)
                deleted_files.append(file_path)
     return deleted_files# Clean both images and labels
deleted_images = clean_zero_byte_files(imagesTr_path)
deleted_labels = clean_zero_byte_files(labelsTr_path)

print(f"\nDeleted {len(deleted_images)} image files and {len(deleted_labels)} label files.\n")



Deleted 0 image files and 0 label files.



In [None]:
import os

file_path = "/content/clean_data_local/imagesTr/PatientID_0021_Timepoint_6_0001.nii.gz"

if os.path.exists(file_path):
    if os.path.getsize(file_path) == 0:
        os.remove(file_path)
        print("Deleted 0-byte file:", file_path)
    else:
        print("File is not empty, skipping deletion.")
else:
    print("File not found.")


File not found.


**This code creates a dataset.json file that describes your medical imaging dataset for nnU-Net.**

In [None]:
import os
import json
import re


output_root = "/content/clean_data_local"
imagesTr_path = os.path.join(output_root, "imagesTr")
labelsTr_path = os.path.join(output_root, "labelsTr")


num_cases = len([f for f in os.listdir(labelsTr_path) if f.endswith(".nii.gz")])


first_case_files = sorted([f for f in os.listdir(imagesTr_path) if f.endswith(".nii.gz")])
modality_count = len(set([re.search(r'_(\d{4})\.nii\.gz$', f).group(1) for f in first_case_files]))


dataset_json = {
    "name": "MU-Glioma-Post",
    "description": "Post-operative glioma segmentation",
    "reference": "Your reference here",
    "licence": "Your license here",
    "release": "1.0",
    "modality": {str(i): f"MRI_modality_{i}" for i in range(modality_count)},
    "labels": {
        "0": "background",
        "1": "tumor"
    },
    "numTraining": num_cases,
    "file_ending": ".nii.gz"
}


with open(os.path.join(output_root, "dataset.json"), 'w') as f:
    json.dump(dataset_json, f, indent=4)

print(f"dataset.json created at: {os.path.join(output_root, 'dataset.json')}")
print(json.dumps(dataset_json, indent=4))


dataset.json created at: /content/clean_data_local/dataset.json
{
    "name": "MU-Glioma-Post",
    "description": "Post-operative glioma segmentation",
    "reference": "Your reference here",
    "licence": "Your license here",
    "release": "1.0",
    "modality": {
        "0": "MRI_modality_0",
        "1": "MRI_modality_1",
        "2": "MRI_modality_2",
        "3": "MRI_modality_3"
    },
    "labels": {
        "0": "background",
        "1": "tumor"
    },
    "numTraining": 593,
    "file_ending": ".nii.gz"
}


In [None]:
import wandb

# Print your default W&B username (entity)
print("Your W&B username:", wandb.Api().default_entity)

# Alternative: Check after login
wandb.login()
print("Logged in as:", wandb.Api().default_entity)

In [None]:
wandb.init(project="LongiTumorSense",entity="numl-f21-35629-numl")

In [None]:
import os
from sklearn.model_selection import train_test_split

def prepare_dataset(imagesTr, labelsTr, test_size=0.2):

    image_files = [f for f in os.listdir(imagesTr) if f.endswith(".nii.gz")]
    case_ids = sorted(list(set("_".join(f.split("_")[:-1]) for f in image_files)))

    print(f"Found {len(case_ids)} unique cases.")

    # Split into train and test
    train_cases, test_cases = train_test_split(case_ids, test_size=test_size, random_state=42)

    missing_labels = []

    def build_file_list(cases):
        file_list = []
        for case_id in cases:
            # Build list of all 4 modalities for this case
            modalities = [
                os.path.join(imagesTr, f"{case_id}_0000.nii.gz"),  # FLAIR
                os.path.join(imagesTr, f"{case_id}_0001.nii.gz"),  # T1
                os.path.join(imagesTr, f"{case_id}_0002.nii.gz"),  # T1ce
                os.path.join(imagesTr, f"{case_id}_0003.nii.gz")   # T2
            ]
            label_path = os.path.join(labelsTr, f"{case_id}.nii.gz")

            if not os.path.exists(label_path):
                missing_labels.append(case_id)
                continue

            file_list.append({
                "image": modalities,
                "label": label_path,
                "name": case_id
            })
        return file_list

    train_files = build_file_list(train_cases)
    test_files = build_file_list(test_cases)

    print(f"Length of training dataset: {len(train_files)}")
    print(f"Length of validation dataset: {len(test_files)}")

    if missing_labels:
        print(f"Missing labels for {len(missing_labels)} cases: {missing_labels[:10]}{'...' if len(missing_labels) > 10 else ''}")

    return train_files, test_files


In [None]:

train_files, test_files = prepare_dataset(
    "/content/clean_data_local/imagesTr",
    "/content/clean_data_local/labelsTr"
)


Found 594 unique cases.
Length of training dataset: 474
Length of validation dataset: 119
Missing labels for 1 cases: ['PatientID_0275_Timepoint_6']


In [None]:
from monai.transforms import LoadImaged
from monai.data import Dataset as MonaiDataset,DataLoader
class SafeDataset(MonaiDataset):
    def __getitem__(self, index):
        item = self.data[index]
        try:
            if self.transform is not None:
                item= self.transform(item)
            return item

        except (FileNotFoundError, nib.filebasedimages.ImageFileError, RuntimeError)  as e:
              image=item.get("image","unknown")
              print(f"Skipping sample at index: {index}:{image} ({e})")
              return None
        except Exception as e:
             image= item.get("image","unknown")
             print(f"Skipping sample at index:  {index} due to unexpected error: {image} ({e})")
             return None

In [None]:
from torch.utils.data._utils.collate import default_collate
def collate_skip_none(batch):
    batch = [item for item in batch if item is not None]
    if not batch:
        return None
    return default_collate(batch)

In [None]:
from monai.transforms import Compose

transform_basic=Compose([
    LoadImaged(keys=["image"],allow_missing_keys=True),
    EnsureChannelFirstd(keys=["image"]),
    ToTensord(keys=["image"])

])

In [136]:
batch_size=4
train_dataset_basic=SafeDataset(data=train_files,transform=transform_basic)
train_loader_basic=DataLoader(train_dataset_basic,batch_size=batch_size, collate_fn=collate_skip_none, shuffle=True)
batch_shape=next(iter(train_loader_basic))["image"].shape
print("Getting batches of shape:",batch_shape)

Getting batches of shape: torch.Size([4, 4, 240, 240, 155])


In [141]:
# def get_mean_std(dataset_loader_basic,resume_file="resume_index.txt"):
#   """ Computes the mean and std of image data.
#   Input :a DataLoader producing tesnors of shpae [batch_size, channesl , pixels_x, pixel_y]
#   Output: the mean of each channel as a tensor , the standard deviaton of each channel as a tensor
#   formatted as a tuple ( means[channels], std[channels])
#   """
#   start_index=0
#   if resume_file is not None and os.path.exists(resume_file): # Added check for None
#      with open(resume_file , "r") as file:
#           start_index=int(file.read().strip() or 0)
#      print(f"Resuming from batch index {start_index}....")

#   channels_sum,channels_squared_sum, num_batches=0,0,0

#   for  idx,batch_shape in enumerate(tqdm(dataset_loader_basic, desc="Computing mean and std", leave=False)):
#        if idx < start_index:
#           continue
#        try:

#           data = batch_shape["image"]
#           channels_sum += torch.mean(data,dim=[0,2,3])
#           channels_squared_sum+=torch.mean(data**2,dim=[0,2,3])
#           num_batches+=1

#           if resume_file is not None: # Only write to file if resume_file is not None
#             with open(resume_file,"w") as file:
#               file.write(str(idx +1))

#        except FileNotFoundError as e:
#           print(f"Skipping missing file in batch{idx}:{e}")
#        except Exception as e:
#           print(f"Error processing batch {idx}: {e}")

#   if num_batches == 0:
#     raise ValueError("No valid images found in the dataset.")

#   mean=channels_sum/num_batches
#   std=(channels_squared_sum/num_batches-mean**2)**0.5

#   return mean,std

**check original file in drive**

In [144]:
import torch
from tqdm import tqdm

def get_mean_std(dataset_loader_basic, resume_file=None):
    """
    Compute per-channel mean and std from a DataLoader that yields dicts with key 'image'.

    Args:
        loader: PyTorch DataLoader returning batches with ["image"] tensors of shape [B, C, H, W] or [B, C, D, H, W]
        resume_file (str or None): Optional file to store the last processed batch index for resuming.

    Returns:
        mean (torch.Tensor): Per-channel mean values.
        std (torch.Tensor): Per-channel standard deviations.
    """
    start_index = 0
    if resume_file is not None and os.path.exists(resume_file):
        with open(resume_file, "r") as f:
            start_index = int(f.read().strip() or 0)
        print(f"Resuming from batch index {start_index}...")

    channels_sum = 0
    channels_squared_sum = 0
    num_batches = 0

    for idx, batch in enumerate(tqdm(dataset_loader_basic, desc="Computing mean/std", unit="batch")):
        if idx < start_index:
            continue
        try:
            data = batch["image"]
            data = data.float()


            dims = list(range(0, data.ndim))
            dims.remove(1)
            channels_sum += data.mean(dim=dims)
            channels_squared_sum += (data ** 2).mean(dim=dims)
            num_batches += 1

            if resume_file is not None:
                with open(resume_file, "w") as f:
                    f.write(str(idx + 1))

        except Exception as e:
            print(f"Error processing batch {idx}: {e}")

    if num_batches == 0:
        raise ValueError("No valid images found in the dataset.")

    mean = channels_sum / num_batches
    std = torch.sqrt(channels_squared_sum / num_batches - mean ** 2)

    return mean, std




In [145]:

mean, std = get_mean_std(train_loader_basic,resume_file=None)
print("Mean:", mean)
print("Std:", std)

Computing mean/std:  60%|█████▉    | 71/119 [06:30<06:03,  7.57s/batch]

Skipping sample at index: 261:['/content/clean_data_local/imagesTr/PatientID_0021_Timepoint_6_0000.nii.gz', '/content/clean_data_local/imagesTr/PatientID_0021_Timepoint_6_0001.nii.gz', '/content/clean_data_local/imagesTr/PatientID_0021_Timepoint_6_0002.nii.gz', '/content/clean_data_local/imagesTr/PatientID_0021_Timepoint_6_0003.nii.gz'] (applying transform <monai.transforms.io.dictionary.LoadImaged object at 0x7a4f0afb27d0>)


Computing mean/std: 100%|██████████| 119/119 [10:40<00:00,  5.38s/batch]

Mean: metatensor([48.0662, 42.7708, 38.8622, 70.4219])
Std: metatensor([140.8673, 117.6777, 106.3751, 187.3814])





In [149]:
from monai.transforms import Compose, LoadImaged, EnsureChannelFirstd, ScaleIntensityd, NormalizeIntensityd, RandRotated, RandFlipd, RandZoomd, ToTensord

train_transforms = Compose([
    LoadImaged(keys=["image", "label"]),
    EnsureChannelFirstd(keys=["image", "label"]),
    ScaleIntensityd(keys=["image"]),
    NormalizeIntensityd(keys=["image"], subtrahend=mean.tolist(), divisor=std.tolist()),
    RandRotated(keys=["image", "label"], range_x=0.3, prob=0.5),
    RandFlipd(keys=["image", "label"], prob=0.5),
    RandZoomd(keys=["image", "label"], min_zoom=0.9, max_zoom=1.1, prob=0.5),
    ToTensord(keys=["image", "label"]),
])

test_transforms = Compose([
    LoadImaged(keys=["image", "label"]),
    EnsureChannelFirstd(keys=["image", "label"]),
    ScaleIntensityd(keys=["image"]),
    NormalizeIntensityd(keys=["image"], subtrahend=mean.tolist(), divisor=std.tolist()),
    ToTensord(keys=["image", "label"])
])


In [150]:
train_dataset_norm =SafeDataset(data=train_files,transform=train_transforms)
dataset_loader_norm=DataLoader(train_dataset_norm,batch_size=batch_size, collate_fn=collate_skip_none)
batch_shape=next(iter(dataset_loader_norm))["image"].shape
print("Getting batches of shape:",batch_shape)


Skipping sample at index: 0:['/content/clean_data_local/imagesTr/PatientID_0059_Timepoint_2_0000.nii.gz', '/content/clean_data_local/imagesTr/PatientID_0059_Timepoint_2_0001.nii.gz', '/content/clean_data_local/imagesTr/PatientID_0059_Timepoint_2_0002.nii.gz', '/content/clean_data_local/imagesTr/PatientID_0059_Timepoint_2_0003.nii.gz'] (applying transform <monai.transforms.intensity.dictionary.NormalizeIntensityd object at 0x7a4f00920c10>)
Skipping sample at index: 1:['/content/clean_data_local/imagesTr/PatientID_0252_Timepoint_1_0000.nii.gz', '/content/clean_data_local/imagesTr/PatientID_0252_Timepoint_1_0001.nii.gz', '/content/clean_data_local/imagesTr/PatientID_0252_Timepoint_1_0002.nii.gz', '/content/clean_data_local/imagesTr/PatientID_0252_Timepoint_1_0003.nii.gz'] (applying transform <monai.transforms.intensity.dictionary.NormalizeIntensityd object at 0x7a4f00920c10>)
Skipping sample at index: 2:['/content/clean_data_local/imagesTr/PatientID_0046_Timepoint_1_0000.nii.gz', '/conten

TypeError: 'NoneType' object is not subscriptable

In [None]:
batch_size=2
test_dataset_norm=Dataset.ImageFolder(data=test_files,transform=val_transforms)
dataset_loader_test_norm=DataLoader(test_dataset_norm,batch_size=batch_size,shuffle=False)
batch_shape=next(iter(dataset_loader_test_norm))["image"].shape
print("Getting batches of shape:",batch_shape)
print(type(test_dataset_norm))

In [None]:

norm_mean, norm_std = get_mean_std(dataset_loader_norm)

print(f"Mean: {norm_mean}")
print(f"Standard deviation: {norm_std}")


In [None]:
norm_mean, norm_std = get_mean_std(dataset_loader_test_norm)

print(f"Mean: {norm_mean}")
print(f"Standard deviation: {norm_std}")

In [None]:
train_loader=DataLoader(train_ds,batch_size=4,shuffle=True)
val_loader=DataLoader(val_ds,batch_size=2,shuffle=False)
print(type(train_loader))
print(type(val_loader))

<class 'monai.data.dataloader.DataLoader'>
<class 'monai.data.dataloader.DataLoader'>


In [None]:
train_files = random_split(train_dataset_norm ,[0.8])

length_train = len(train_dataset_norm)
length_dataset = len(train_dataset_norm)
percent_train = np.round(100 * length_train / length_dataset, 2)

print(f"Train data is {percent_train}% of full data")


In [None]:
test_files = random_split(test_dataset_norm,[0.2])
length_test = len(val_dataset)
length_dataset = len(test_dataset_norm)
percent_test = np.round(100 * length_test / length_dataset, 2)
print(f"Our Test data is {percent_test}% of full data")

In [None]:
import torch


**# Convert dataset to nnUNet format**

In [None]:
import os
os.environ['nnUNet_raw_data_base'] = '/content/clean_data_local/nnUNet_raw_data'
os.environ['nnUNet_preprocessed'] = '/content/nnUNet_preprocessed'
os.environ['RESULTS_FOLDER'] = '/content/nnUNet_results'


os.makedirs('/content/clean_data_local/nnUNet_raw_data', exist_ok=True)
os.makedirs('/content/nnUNet_preprocessed', exist_ok=True)
os.makedirs('/content/nnUNet_results', exist_ok=True)

print("nnUNet_raw_data_base =", os.environ['nnUNet_raw_data_base'])
print("nnUNet_preprocessed =", os.environ['nnUNet_preprocessed'])
print("RESULTS_FOLDER =", os.environ['RESULTS_FOLDER'])

In [None]:
!mkdir -p /content/clean_data_local/nnUNet_raw_data/Task001_Glioma

In [None]:
!mv /content/clean_data_local/imagesTr /content/clean_data_local/nnUNet_raw_data/Task001_Glioma/
!mv /content/clean_data_local/labelsTr /content/clean_data_local/nnUNet_raw_data/Task001_Glioma/
!mv /content/clean_data_local/dataset.json /content/clean_data_local/nnUNet_raw_data/Task001_Glioma/


In [None]:
!rm -rf /content/nnUNet_preprocessed/*


In [None]:
import os
os.environ['nnUNet_raw_data_base'] = '/content/clean_data_local'



In [None]:
import json

task_ids_path = "/content/clean_data_local/nnunet_task_ids.json"

task_ids = {
    "1": "Task001_Glioma"
}

with open(task_ids_path, "w") as f:
    json.dump(task_ids, f)

print(f"Created {task_ids_path}")


Created /content/clean_data_local/nnunet_task_ids.json


In [None]:
!nnUNet_plan_and_preprocess -t 1 --verify_dataset_integrity









Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet

Traceback (most recent call last):
  File "/usr/local/bin/nnUNet_plan_and_preprocess", line 8, in <module>
    sys.exit(main())
             ^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nnunet/experiment_planning/nnUNet_plan_and_preprocess.py", line 105, in main
    verify_dataset_integrity(join(nnUNet_raw_data, task_name))
  File "/usr/local/lib/python3.11/dist-packages/nnunet/preprocessing/sanity_checks.py", line 105, in verify_dataset_integrity
    training_cases = dataset['training']
                     ~~~~~~~^^^^^^^^^^^^
KeyError: 'training'


In [None]:
!nnUNet_train 3d_fullres nnUNetTrainerV2 Task001_Glioma 0 --npz

/bin/bash: line 1: nnUNet_train: command not found
