In [52]:
import json
import os
from glob import glob
from glob2 import glob
from subprocess import call
import time

import nibabel
import numpy as np
from joblib import Parallel, delayed


def load_nifty(directory, example_id, suffix):
    return nibabel.load(glob(f'{directory}/{example_id}/**/{suffix}.nii.gz')[0])


def load_channels(d, example_id):
    return [load_nifty(d, example_id, suffix) for suffix in ["FLAIR", "T1", "T1GD", "T2"]]


def get_data(nifty, dtype="int16"):
    if dtype == "int16":
        data = np.abs(nifty.get_fdata().astype(np.int16))
        data[data == -32768] = 0
        return data
    return nifty.get_fdata().astype(np.uint8)


def prepare_nifty(sub, d, d_out):
    flair, t1, t1ce, t2 = load_channels(d, sub)
    affine, header = flair.affine, flair.header
    vol = np.stack([get_data(flair), get_data(t1), get_data(t1ce), get_data(t2)], axis=-1)
    vol = nibabel.nifti1.Nifti1Image(vol, affine, header=header)
#     print('l')
#     print( os.path.join(d_out, sub + ".nii.gz"))
    nibabel.save(vol, os.path.join(d_out, sub + ".nii.gz"))
#     print(glob(f'{d}/{sub}/**/MASK.nii.gz')[0])
    if os.path.exists(glob(f'{d}/{sub}/**/MASK.nii.gz')[0]):
        seg = load_nifty(d, sub, "MASK")
        affine, header = seg.affine, seg.header
        vol = get_data(seg, "unit8")
#         vol[vol == 4] = 3
        seg = nibabel.nifti1.Nifti1Image(vol, affine, header=header)
#         print(os.path.join(d_out, sub + "_seg.nii.gz"))
        nibabel.save(seg, os.path.join(d_out, sub + "_seg.nii.gz"))


def prepare_dirs(d_out, train):
    img_path, lbl_path = os.path.join(d_out, "images"), os.path.join(d_out, "labels")
    call(f"mkdir {img_path}", shell=True)
    if train:
        call(f"mkdir {lbl_path}", shell=True)
    dirs = glob(os.path.join(d_out, "*"))
    for d in dirs:
        if '.nii.gz' in d:
                if "FLAIR" in d or "T1" in d or "T1GD" in d or "T2_to_SRI" in d:
                    continue
                if "_seg" in d:
                    call(f"mv {d} {lbl_path}", shell=True)
                else:
                    call(f"mv {d} {img_path}", shell=True)
                
#         call(f"rm -rf {d}", shell=True)
         

def prepare_dataset_json(d_out, train):
    images, labels = glob(os.path.join(d_out, "images", "*")), glob(os.path.join(d_out, "labels", "*"))
    images = sorted([img.replace(d_out + "/", "") for img in images])
    labels = sorted([lbl.replace(d_out + "/", "") for lbl in labels])
    
    modality = {"0": "FLAIR", "1": "T1", "2": "T1CE", "3": "T2"}
    labels_dict = {"0": "background", "1": "wt"}
    if train:
        key = "training"
        data_pairs = [{"image": img, "label": lbl} for (img, lbl) in zip(images, labels)]
    else:
        key = "test"
        data_pairs = [{"image": img} for img in images]

    dataset = {
        "labels": labels_dict,
        "modality": modality,
        key: data_pairs,
    }

    with open(os.path.join(d_out, "dataset.json"), "w") as outfile:
        json.dump(dataset, outfile)


def run_parallel(func, args):
    return Parallel(n_jobs=os.cpu_count())(delayed(func)(arg) for arg in args)


def prepare_dataset(data, train):
    print(f"Preparing BraTS21 dataset from: {data}")
    d_out = os.path.join( '/', data.split("/")[1], data.split("/")[2]  + '_train')
    if not os.path.exists(d_out):
        call(f"mkdir {d_out}", shell=True)
    start = time.time()
#     run_parallel(prepare_nifty, sorted(glob(os.path.join(data, "*"))))
    for i,sub in enumerate(tab.Subject):
        if tab.iloc[i].Observer != 'AUTO':
            prepare_nifty(sub, data, d_out)
    prepare_dirs(d_out, train)
    prepare_dataset_json(d_out, train)
    end = time.time()
    print(f"Preparing time: {(end - start):.2f}")


In [53]:
prepare_dataset('/data/EGD', True)

Preparing BraTS21 dataset from: /data/EGD


mkdir: cannot create directory ‘/data/EGD_train/images’: File exists
mkdir: cannot create directory ‘/data/EGD_train/labels’: File exists


Preparing time: 1456.61


In [6]:
import pandas as pd
tab = pd.read_excel('/data/EGD/metadata/Segmentation_source.xlsx')

In [18]:
tab.Observer.unique()

array(['OBS1', 'AUTO', 'OBS2', 'OBS3', 'OBS4'], dtype=object)

In [54]:
!python3 ../preprocess.py --data /data --task 14 --ohe --exec_mode training --results /data

Preprocessing /data/EGD_train
Pre-processing time: 1135.59


In [1]:
!nvidia-smi

Sat Jan  1 15:26:14 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-PCIE...  On   | 00000000:21:01.0 Off |                    0 |
| N/A   28C    P0    24W / 250W |      0MiB / 32510MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-PCIE...  On   | 00000000:21:02.0 Off |                    0 |
| N/A   50C    P0   166W / 250W |  15425MiB / 32510MiB |    100%      Default |
|       

In [2]:
! cd /results/egd_results/fold-0 && ls

checkpoints  logs.json


In [None]:
! export CUDA_VISIBLE_DEVICES=1 &&  python ../main.py --data /data/14_3d --results /results/egd_results/fold-0 --resume_training --ckpt_path /results/egd_results/fold-0/checkpoints --deep_supervision --depth 6 --filters 64 96 128 192 256 384 512 --min_fmap 2 --scheduler --learning_rate 0.0003 --epochs 150 --nfolds 3 --fold 0 --amp --gpus 1 --task 14 --save_ckpt

Global seed set to 1
249 training, 125 validation, 374 test examples
Filters: [64, 96, 128, 192, 256, 384, 512],
Kernels: [[3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3]]
Strides: [[1, 1, 1], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]]
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name  | Type    | Params
----------------------------------
0 | model | DynUNet | 50.8 M
1 | loss  | Loss    | 0     
2 | dice  | Dice    | 0     
----------------------------------
50.8 M    Trainable params
0         Non-trainable params
50.8 M    Total params
203.360   Total estimated model params size (MB)
  rank_zero_deprecation(
Restored states from the checkpoint file at /results/egd_results/fold-0/checkpoints/last.ckpt
Epoch 77:  50%|█████████▌         | 125/250 [12:00<12:00,  5.76s/it, loss=0.121]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|

In [60]:
!cd /results/egd_results && ls

fold-0
