In [2]:
# Import required packages
import os
import shutil
from collections import OrderedDict

import numpy as np
import torch

from batchgenerators.utilities.file_and_folder_operations import *
import SimpleITK as sitk

In [3]:
# check whether GPU accelerated computing is available
assert torch.cuda.is_available() # if there is an error here, enable GPU in the Runtime

In [56]:
# install nnunet
# !pip install nnunet

In [57]:
# check if nnunet can be imported
import nnunet

#  Setting up nnU-Nets folder structure and environment variables
nnUnet expects a certain folder structure and environment variables.

Roughly they tell nnUnet:
1. Where to look for stuff
2. Where to put stuff

For more information about this please check: https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/setting_up_paths.md

In [58]:
def make_if_dont_exist(folder_path,overwrite=False):
    """
    creates a folder if it does not exists
    input:
    folder_path : relative path of the folder which needs to be created
    over_write :(default: False) if True overwrite the existing folder
    """
    if os.path.exists(folder_path):

        if not overwrite:
            print(f"{folder_path} exists.")
        else:
            print(f"{folder_path} overwritten")
            shutil.rmtree(folder_path)
            os.makedirs(folder_path)

    else:
      os.makedirs(folder_path)
      print(f"{folder_path} created!")

##  Set environment Variables and creating folders

In [59]:
# Set environment Variables and creating folders
# Maybe move path of preprocessed data directly on content - this may be signifcantely faster!
print("Current Working Directory {}".format(os.getcwd()))
mount_dir = "/home/workstation04/GP_MMMAI/Notebooks/Vanilla_nnUNet"
path_dict = {
     "nnUNet_raw_data_base" : os.path.join(mount_dir, "nnUNet_raw"),
     "nnUNet_preprocessed" : os.path.join(mount_dir, "nnUNet_preprocessed"),
     "RESULTS_FOLDER" : os.path.join(mount_dir, "nnUNet_Results_Folder"),
}

# Write paths to environment variables
for env_var, path in path_dict.items():
  os.environ[env_var] = path

# Check whether all environment variables are set correct!
for env_var, path in path_dict.items():
  if os.getenv(env_var) != path:
    print("Error:")
    print("Environment Variable {} is not set correctly!".format(env_var))
    print("Should be {}".format(path))
    print("Variable is {}".format(os.getenv(env_var)))
  make_if_dont_exist(path, overwrite=False)

print("If No Error Occured Continue Forward. =)")

Current Working Directory /home/workstation04/GP_MMMAI/Notebooks/Vanilla_nnUNet
/home/workstation04/GP_MMMAI/Notebooks/Vanilla_nnUNet/nnUNet_raw created!
/home/workstation04/GP_MMMAI/Notebooks/Vanilla_nnUNet/nnUNet_preprocessed created!
/home/workstation04/GP_MMMAI/Notebooks/Vanilla_nnUNet/nnUNet_Results_Folder exists.
If No Error Occured Continue Forward. =)


In [3]:
# extracting the contents of the "BraTS2021_Training_Data.tar"
# !tar -xvf  "/home/workstation04/GP_MMMAI/BraTSData/BraTS2021_Training_Data.tar" -C "/home/workstation04/GP_MMMAI/Brats"

./
./.DS_Store
./BraTS2021_00000/
./BraTS2021_00000/BraTS2021_00000_flair.nii.gz
./BraTS2021_00000/BraTS2021_00000_seg.nii.gz
./BraTS2021_00000/BraTS2021_00000_t1.nii.gz
./BraTS2021_00000/BraTS2021_00000_t1ce.nii.gz
./BraTS2021_00000/BraTS2021_00000_t2.nii.gz
./BraTS2021_00002/
./BraTS2021_00002/BraTS2021_00002_flair.nii.gz
./BraTS2021_00002/BraTS2021_00002_seg.nii.gz
./BraTS2021_00002/BraTS2021_00002_t1.nii.gz
./BraTS2021_00002/BraTS2021_00002_t1ce.nii.gz
./BraTS2021_00002/BraTS2021_00002_t2.nii.gz
./BraTS2021_00003/
./BraTS2021_00003/BraTS2021_00003_flair.nii.gz
./BraTS2021_00003/BraTS2021_00003_seg.nii.gz
./BraTS2021_00003/BraTS2021_00003_t1.nii.gz
./BraTS2021_00003/BraTS2021_00003_t1ce.nii.gz
./BraTS2021_00003/BraTS2021_00003_t2.nii.gz
./BraTS2021_00005/
./BraTS2021_00005/BraTS2021_00005_flair.nii.gz
./BraTS2021_00005/BraTS2021_00005_seg.nii.gz
./BraTS2021_00005/BraTS2021_00005_t1.nii.gz
./BraTS2021_00005/BraTS2021_00005_t1ce.nii.gz
./BraTS2021_00005/BraTS2021_00005_t2.nii.gz
./Bra

In [6]:
# counting the number of folders in Brats
# !ls "/home/workstation04/GP_MMMAI/Brats" | wc -l

1251


In [60]:
from nnunet.paths import nnUNet_raw_data

In [61]:
def copy_BraTS_segmentation_and_convert_labels(in_file, out_file):
    # use this for segmentation only!!!
    # nnUNet wants the labels to be continuous. BraTS is 0, 1, 2, 4 -> we make that into 0, 1, 2, 3
    img = sitk.ReadImage(in_file)
    img_npy = sitk.GetArrayFromImage(img)

    uniques = np.unique(img_npy)
    for u in uniques:
        if (u not in [0, 1, 2, 4]):
            print(u)
            raise RuntimeError('unexpected label')

    seg_new = np.zeros_like(img_npy)
    seg_new[img_npy == 4] = 3
    seg_new[img_npy == 1] = 1
    seg_new[img_npy == 2] = 2
    img_corr = sitk.GetImageFromArray(seg_new)
    img_corr.CopyInformation(img)
    sitk.WriteImage(img_corr, out_file)

In [62]:
# create json file
#  specify data properties that the model can understand
if __name__ == "__main__":
    """
    REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION!
    """
    # you can choose any name 
    task_name = "Task501_BraTS2021"
    """
     Path to unzipped data should be in the created Raw data folder
     """
    # path to unzipped data 
    # downloaded_data_dir = "/home/workstation04/GP_MMMAI/Brats"
    downloaded_data_dir = "/home/workstation04/GP_MMMAI/All_Data"


    target_base = join(nnUNet_raw_data, task_name)
    target_imagesTr = join(target_base, "imagesTr")
    target_imagesVal = join(target_base, "imagesVal")
    target_imagesTs = join(target_base, "imagesTs")
    target_labelsTr = join(target_base, "labelsTr")

    maybe_mkdir_p(target_imagesTr)
    maybe_mkdir_p(target_imagesVal)
    maybe_mkdir_p(target_imagesTs)
    maybe_mkdir_p(target_labelsTr)

    patient_names = []

    for p in subdirs(downloaded_data_dir, join=False):
        patdir = join(downloaded_data_dir, p)
        patient_name = p
        patient_names.append(patient_name)
        t1 = join(patdir, p + "_t1.nii.gz")
        t1c = join(patdir, p + "_t1ce.nii.gz")
        t2 = join(patdir, p + "_t2.nii.gz")
        flair = join(patdir, p + "_flair.nii.gz")
        seg = join(patdir, p + "_seg.nii.gz")

        assert all([
            isfile(t1),
            isfile(t1c),
            isfile(t2),
            isfile(flair),
            isfile(seg)
        ]), "%s" % patient_name
        # btghyr l naming convension
        shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz"))
        shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz"))
        shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz"))
        shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz"))

        copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz"))


    json_dict = OrderedDict()
    json_dict['name'] = "BraTS2021"
    json_dict['description'] = "nothing"
    json_dict['tensorImageSize'] = "4D"
    json_dict['reference'] = "see BraTS2021"
    json_dict['licence'] = "see BraTS2021 license"
    json_dict['release'] = "0.0"
    json_dict['modality'] = {
        "0": "T1",
        "1": "T1ce",
        "2": "T2",
        "3": "FLAIR"
    }

    # GD-enhancing tumor (ET — label 4  === label 3 ), the peritumoral edema (ED — label 2), and the necrotic and non-enhancing tumor core (NCR/NET — label 1)
    json_dict['labels'] = {
        "0": "background",
        "1": "edema",
        "2": "non-enhancing",
        "3": "enhancing",
    }
    json_dict['numTraining'] = len(patient_names)
    json_dict['numTest'] = 0
    json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in
                             patient_names]
    json_dict['test'] = []

    save_json(json_dict, join(target_base, "dataset.json"))



## Extracting Rule Based Parameters
This will preprocess the dataset to allow fast training and saves it into the "nnUNet_preprocessed" folder.
Further rule based parameters will be extracted in the planning step.

In [63]:
# Preprocessing
!nnUNet_plan_and_preprocess -t 501 --verify_dataset_integrity



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet

Verifying training set
checking case BraTS2021_00000
checking case BraTS2021_00002
checking case BraTS2021_00003
checking case BraTS2021_00005
checking case BraTS2021_00006
checking case BraTS2021_00008
checking case BraTS2021_00009
checking case BraTS2021_00011
checking case BraTS2021_00012
checking case BraTS2021_00014
checking case BraTS2021_00016
checking case BraTS2021_00017
checking case BraTS2021_00018
checking case BraTS2021_00019
checking case BraTS2021_00020
checking case BraTS2021_00021
checking case BraTS2021_00022
checking case BraTS2021_00024
checking case BraTS2021_00025
checking case BraTS2021_00026
checking case B

### To specify the validation and trainig samples manually (make your own split)
you need to make a pkl file containg a list of dict its length eqauls to the number of splits and put it in nnUNet_preprocessed/DATASETXXX_NAME
- example:
```
In [7]: print(splits[0])
{'train': ['la_003', 'la_004', 'la_005', 'la_009', 'la_010', 'la_011', 'la_014', 'la_017', 'la_018', 'la_019', 'la_020', 'la_022', 'la_023', 'la_026', 'la_029', 'la_030'],
'val': ['la_007', 'la_016', 'la_021', 'la_024']}
```

https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/manual_data_splits.md

In [64]:
# split the data into validation and trainig sets manually
import ast

def read_list_from_txt(file_path):
    with open(file_path, 'r') as file:
        content = file.read()
        return ast.literal_eval(content)


# Load training and validation filenames from text files
training_filenames_list = read_list_from_txt('/home/workstation04/GP_MMMAI/Notebooks/Vanilla_nnUNet/training.txt')
validation_filenames_list = read_list_from_txt('/home/workstation04/GP_MMMAI/Notebooks/Vanilla_nnUNet/validation.txt')


# Determine the length of the shorter list
min_length = 5

# Generate JSON structure
data = []
for i in range(min_length):
    train_set = training_filenames_list
    val_set = validation_filenames_list
    split = {"train": train_set, "val": val_set}
    data.append(split)

# Save to JSON file
with open('/home/workstation04/GP_MMMAI/Notebooks/Vanilla_nnUNet/nnUNet_preprocessed/Task501_BraTS2021/splits_final.json', 'w') as json_file:
    json.dump(data, json_file, indent=2)


In [65]:
# convert json to pkl 

import pickle
import json
import numpy as np

# Load data from JSON file
with open('/home/workstation04/GP_MMMAI/Notebooks/Vanilla_nnUNet/nnUNet_preprocessed/Task501_BraTS2021/splits_final.json', 'r') as json_file:
    json_data = json.load(json_file)

# Convert lists to NumPy arrays
def convert_list(obj):
    if isinstance(obj, list):
        return np.array(obj)
    return obj

# Apply conversion to the loaded JSON data
converted_data = json_data
if isinstance(converted_data, list):
    converted_data = [convert_list(item) for item in converted_data]
elif isinstance(converted_data, dict):
    converted_data = {key: convert_list(value) for key, value in converted_data.items()}

# Save to pickle file
with open('/home/workstation04/GP_MMMAI/Notebooks/Vanilla_nnUNet/nnUNet_preprocessed/Task501_BraTS2021/splits_final.pkl', 'wb') as pkl_file:
    pickle.dump(converted_data, pkl_file)


### to save every one epoch you can change it from this file 
/home/workstation04/anaconda3/lib/python3.9/site-packages/nnunet/training/network_training/network_trainer.py

In [66]:
# train the 3d nnUnet with Task 501 and Cross Validation Split 0
!nnUNet_train 3d_fullres nnUNetTrainerV2 501 0 -c



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet

###############################################
I am running the following nnUNet: 3d_fullres
My trainer class is:  <class 'nnunet.training.network_training.nnUNetTrainerV2.nnUNetTrainerV2'>
For that I will be using the following configuration:
num_classes:  3
modalities:  {0: 'T1', 1: 'T1ce', 2: 'T2', 3: 'FLAIR'}
use_mask_for_norm OrderedDict([(0, True), (1, True), (2, True), (3, True)])
keep_only_largest_region None
min_region_size_per_class None
min_size_per_class None
normalization_schemes OrderedDict([(0, 'nonCT'), (1, 'nonCT'), (2, 'nonCT'), (3, 'nonCT')])
stages...

stage:  0
{'batch_size': 2, 'num_pool_per_axis': [5, 5, 5]

In [80]:
!nnUNet_train -h



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet

usage: nnUNet_train [-h] [-val] [-c] [-p P] [--use_compressed_data]
                    [--deterministic] [--npz] [--find_lr] [--valbest] [--fp32]
                    [--val_folder VAL_FOLDER] [--disable_saving]
                    [--disable_postprocessing_on_folds]
                    [--disable_validation_inference] [--val_disable_overwrite]
                    [--disable_next_stage_pred]
                    [-pretrained_weights PRETRAINED_WEIGHTS]
                    network network_trainer task fold

positional arguments:
  network
  network_trainer
  task                  can be task name or task id
  fold                  0