In [54]:
import os
import numpy as np
import glob
import torch
import torchio as tio
from sklearn.model_selection import KFold
from tqdm import tqdm

In [87]:

def get_split(data, idx):
    return list(np.array(data)[idx])


def load_data(path, files_pattern):
    return sorted(glob.glob(os.path.join(path, files_pattern)))


def get_kfold_splitter(nfolds):
    return KFold(n_splits=nfolds, shuffle=True, random_state=12345)


def get_test_fnames(args, data_path, meta=None):
    kfold = get_kfold_splitter(args.nfolds)
    test_imgs = load_data(data_path, "*_x.npy")
    if args.exec_mode == "predict" and "val" in data_path:
        _, val_idx = list(kfold.split(test_imgs))[args.fold]
        test_imgs = sorted(get_split(test_imgs, val_idx))
        if meta is not None:
            meta = sorted(get_split(meta, val_idx))
    return test_imgs, meta

In [110]:
nfolds=3
data_path = '/data/private_data/schw/33_3d'
kfold = KFold(n_splits=nfolds, shuffle=True, random_state=12345)
fold=2


In [114]:
meta = load_data(data_path, "*_meta.npy")
orig_lbl = load_data(data_path, "*_orig_lbl.npy")
        
imgs, lbls = load_data(data_path, "*_x.npy"), load_data(data_path, "*_y.npy")
train_idx, val_idx = list(kfold.split(imgs))[fold]
orig_lbl, meta = get_split(orig_lbl, val_idx), get_split(meta, val_idx)
train_imgs, train_lbls = get_split(imgs, train_idx), get_split(lbls, train_idx)
val_imgs_2, val_lbls_2 = get_split(imgs, val_idx), get_split(lbls, val_idx)

In [115]:
val_lbls_2 = [each.split('/')[-1].split('.')[0] for each in val_lbls]

In [40]:
# import json
# import mpu.io

# data = {'fold_0': val_lbls_0,
#         'fold_1': val_lbls_1,
#         'fold_2': val_lbls_2,
#        }
# print(json.dumps(data, indent=4))


# mpu.io.write('schw_seed.json', data)

In [144]:
data_path = '/data_anvar/public_datasets/preproc_study/gbm/3a_atlas'
t1_paths = []
t2_paths = []
flair_paths = []
t1c_paths = []
for sub in train_imgs:
    sub = sub.split('/')[-1][:-6]
#     print(sub)
    t1_paths.append(os.path.join(data_path, sub, 'T1.nii.gz'))
    t2_paths.append(os.path.join(data_path, sub, 'T2.nii.gz'))
    t1c_paths.append(os.path.join(data_path, sub, 'CT1.nii.gz'))
    flair_paths.append(os.path.join(data_path, sub, 'FLAIR.nii.gz'))
landmarks_t1_fold2 = tio.HistogramStandardization.train(
    t1_paths,
    output_path='/home/polina/DeepLearningExamples/PyTorch/Segmentation/nnUNet/notebooks/t1_landmarks_fold2.npy',
)
landmarks_t2_fold2 = tio.HistogramStandardization.train(
    t2_paths,
    output_path='/home/polina/DeepLearningExamples/PyTorch/Segmentation/nnUNet/notebooks/t2_landmarks_fold2.npy',
)
landmarks_t1c_fold2 = tio.HistogramStandardization.train(
    t1c_paths,
    output_path='/home/polina/DeepLearningExamples/PyTorch/Segmentation/nnUNet/notebooks/t1c_landmarks_fold2.npy',
)
landmarks_flair_fold2 = tio.HistogramStandardization.train(
    flair_paths,
    output_path='/home/polina/DeepLearningExamples/PyTorch/Segmentation/nnUNet/notebooks/flair_landmarks_fold2.npy',
)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 68/68 [00:35<00:00,  1.94it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 68/68 [00:34<00:00,  1.97it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 68/68 [00:35<00:00,  1.93it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 68/68 [00:38<00:00,  1.78it/s]


In [145]:
landmarks_dict = {'t1': landmarks_t1_fold2,
                  't2': landmarks_t2_fold2,
                  't1c': landmarks_t1c_fold2,
                  'flair': landmarks_flair_fold2}
histogram_transform = tio.HistogramStandardization(landmarks_dict)

In [146]:
subjects = []
for sub in train_imgs:
    sub = sub.split('/')[-1][:-6]
    subject = tio.Subject(
        t1=tio.ScalarImage(os.path.join(data_path, sub, 'T1.nii.gz')),
        t2=tio.ScalarImage(os.path.join(data_path, sub, 'T2.nii.gz')),
        t1c=tio.ScalarImage(os.path.join(data_path, sub, 'CT1.nii.gz')),
        flair=tio.ScalarImage(os.path.join(data_path, sub, 'FLAIR.nii.gz')),
        label=tio.LabelMap(os.path.join(data_path, sub, 'CT1_SEG.nii.gz')),
    )
    subjects.append(subject)
dataset = tio.SubjectsDataset(subjects)

In [149]:

out_path = '/data/private_data/6_histogram_fold2'
for i ,sample in enumerate(tqdm(dataset)):
        transformed = histogram_transform(sample)
        if not os.path.exists(os.path.join(out_path,str(transformed.t1.path).split('/')[-2])):
            os.mkdir(os.path.join(out_path,str(transformed.t1.path).split('/')[-2]))
        transformed.t1.save(os.path.join(out_path,str(transformed.t1.path).split('/')[-2], 'T1.nii.gz'))
        transformed.t2.save(os.path.join(out_path,str(transformed.t2.path).split('/')[-2], 'T2.nii.gz'))
        transformed.t1c.save(os.path.join(out_path,str(transformed.t1c.path).split('/')[-2], 'CT1.nii.gz'))
        transformed.flair.save(os.path.join(out_path,str(transformed.flair.path).split('/')[-2], 'FLAIR.nii.gz'))
        transformed.label.save(os.path.join(out_path,str(transformed.label.path).split('/')[-2], 'CT1_SEG.nii.gz'))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 68/68 [12:26<00:00, 10.97s/it]


In [41]:
out_path = '/data/private_data/6_histogram_fold2'
data_path = '/data_anvar/public_datasets/preproc_study/gbm/3a_atlas'
for each in val_imgs:
    sub = each.split('/')[-1][:-6]
    print(sub)
    sub_in = os.path.join(data_path,sub)
    sub_out = os.path.join(out_path,sub)
    if not os.path.exists(sub_out):
            os.system(f'cp -r {sub_in} {sub_out}')  


In [7]:
import json
import os
from glob import glob
from glob2 import glob
from subprocess import call
import time

import nibabel
import numpy as np
from joblib import Parallel, delayed


def load_nifty(directory, example_id, suffix):
    return nibabel.load(f'{directory}/{suffix}.nii.gz')


def load_channels(d, example_id):
    return [load_nifty(d, example_id, suffix) for suffix in ["FLAIR", "T1", "CT1", "T2"]]


def get_data(nifty, dtype="int16"):
    if dtype == "int16":
        data = np.abs(nifty.get_fdata().astype(np.int16))
        data[data == -32768] = 0
        return data
    return nifty.get_fdata().astype(np.uint8)


def prepare_nifty(d, d_out):
    sub = d.split("/")[-1]
    flair, t1, t1ce, t2 = load_channels(d, sub)
    affine, header = flair.affine, flair.header
    vol = np.stack([get_data(flair), get_data(t1), get_data(t1ce), get_data(t2)], axis=-1)
    vol = nibabel.nifti1.Nifti1Image(vol, affine, header=header)
#     print('l')
#     print( os.path.join(d_out, sub + ".nii.gz"))
    nibabel.save(vol, os.path.join(d_out, sub + ".nii.gz"))
#     print(glob(f'{d}/{sub}/**/MASK.nii.gz')[0])
    if os.path.exists(f'{d}/CT1_SEG.nii.gz'):
        seg = load_nifty(d, sub, "CT1_SEG")
        affine, header = seg.affine, seg.header
        vol = get_data(seg, "unit8")
#         vol[vol == 4] = 3
#         mask = np.zeros(vol.shape, dtype=np.uint8)
#         mask[vol == 2] = 1
#         mask[vol == 3] = 2
#         mask[vol == 1] = 3
        seg = nibabel.nifti1.Nifti1Image(vol, affine, header=header)
#         print(os.path.join(d_out, sub + "_seg.nii.gz"))
        nibabel.save(seg, os.path.join(d_out, sub + "_seg.nii.gz"))


def prepare_dirs(d_out, train):
    img_path, lbl_path = os.path.join(d_out, "images"), os.path.join(d_out, "labels")
    call(f"mkdir {img_path}", shell=True)
    if train:
        call(f"mkdir {lbl_path}", shell=True)
    dirs = glob(os.path.join(d_out, "*"))
    for d in dirs:
        if '.nii.gz' in d:
                if "FLAIR" in d or "T1" in d or "CT1" in d or "T2" in d:
                    continue
                if "_seg" in d:
                    call(f"mv {d} {lbl_path}", shell=True)
                else:
                    call(f"mv {d} {img_path}", shell=True)
                
#         call(f"rm -rf {d}", shell=True)
         

def prepare_dataset_json(d_out, train):
    images, labels = glob(os.path.join(d_out, "images", "*")), glob(os.path.join(d_out, "labels", "*"))
    images = sorted([img.replace(d_out + "/", "") for img in images])
    labels = sorted([lbl.replace(d_out + "/", "") for lbl in labels])
    
    modality = {"0": "FLAIR", "1": "T1", "2": "T1CE", "3": "T2"}
    labels_dict = {"0": "background", "1": "edema", "2": "tumor core", "3": "enhancing tumour"}
    if train:
        key = "training"
        data_pairs = [{"image": img, "label": lbl} for (img, lbl) in zip(images, labels)]
    else:
        key = "test"
        data_pairs = [{"image": img} for img in images]

    dataset = {
        "labels": labels_dict,
        "modality": modality,
        key: data_pairs,
    }

    with open(os.path.join(d_out, "dataset.json"), "w") as outfile:
        json.dump(dataset, outfile)


def run_parallel(func, args):
    return Parallel(n_jobs=os.cpu_count())(delayed(func)(arg) for arg in args)


def prepare_dataset(data, train, out):
    print(f"Preparing GBM dataset from: {data}")
    d_out = out
    if not os.path.exists(d_out):
        call(f"mkdir {d_out}", shell=True)
    start = time.time()
#     run_parallel(prepare_nifty, sorted(glob(os.path.join(data, "*"))))
    for each in sorted(glob(os.path.join(data, "*"))):
        if os.path.exists(os.path.join(each, "CT1_SEG.nii.gz")):
            prepare_nifty(each, d_out)
    prepare_dirs(d_out, train)
    prepare_dataset_json(d_out, train)
    end = time.time()
    print(f"Preparing time: {(end - start):.2f}")


In [8]:
prepare_dataset('/data_anvar/public_datasets/preproc_study/gbm/6_hist/6_hist_fold_2',True, '/data/private_data/6_histogram_fold_2' )
# prepare_dataset('/data_anvar/public_datasets/preproc_study/gbm/7a_resamp',True, '/data/private_data/7a_resample')

Preparing GBM dataset from: /data_anvar/public_datasets/preproc_study/gbm/6_hist/6_hist_fold_2
Preparing time: 337.99


In [9]:
!python3 ../preprocess.py --data /data/private_data --task 31.2 --ohe --exec_mode training --results /data/private_data/

Preprocessing /data/private_data/6_histogram_fold_2
Pre-processing time: 41.77


In [34]:
len(os.listdir('/data_anvar/public_datasets/preproc_study/gbm/6_hist/6_hist_fold_0'))

102

In [36]:
len(os.listdir('/data/private_data/31.0_3d'))

409

In [37]:
len(os.listdir('/data/private_data/31.1_3d'))

409

In [38]:
# !cd /data/private_data/31.2_3d/ && ls

In [105]:
import torch
check = torch.load('/results/gbm_results/6_histogram_fold_0/checkpoints/last.ckpt')

In [106]:
check['epoch']

89

In [1]:
!cd /data_anvar/public_datasets/preproc_study/gbm/6_hist && ls

6_hist_brain_masks  6_hist_fold_0  6_hist_fold_1  6_hist_fold_2


In [23]:
!cd /results/gbm_results/6_histogram_fold_0 && ls

fold-0


In [38]:
len(os.listdir('/data_anvar/public_datasets/preproc_study/gbm/6_hist/6_histogram/6_hist_fold_0'))

102

In [49]:
import torch 
ch = torch.load('/results/gbm_results/6_histogram_fold_1/fold-1/checkpoints/last.ckpt')

In [50]:
for i in ch:
    print(i)

epoch
global_step
pytorch-lightning_version
state_dict
callbacks
optimizer_states
lr_schedulers
native_amp_scaling_state
hparams_name
hyper_parameters


In [52]:
ch['hyper_parameters']

{'args': Namespace(affinity='socket_unique_contiguous', amp=True, batch_size=2, benchmark=False, blend='gaussian', brats=True, ckpt_path=None, data='/data/private_data/31.1_3d', data2d_dim=3, deep_supervision=True, deep_supr_num=2, depth=6, dim=3, epochs=100, exec_mode='train', filters=[64, 96, 128, 192, 256, 384, 512], focal=False, fold=1, gpus=1, gradient_clip_val=0, invert_resampled_y=False, learning_rate=0.0003, logname=None, min_fmap=2, momentum=0.99, more_chn=False, negative_slope=0.01, nfolds=3, no_back_in_output=False, norm='instance', num_workers=8, nvol=4, optimizer='adam', overlap=0.5, oversampling=0.4, patience=100, profile=False, res_block=False, results='/results/gbm_results/6_histogram_fold_1', resume_training=False, save_ckpt=True, save_preds=False, scheduler=True, seed=1, skip_first_n_eval=0, sync_batchnorm=False, task='31.1', test_batches=0, train_batches=0, tta=False, val_batch_size=4, warmup=5, weight_decay=0.0001),
 'triton': False,
 'data_dir': None}

In [31]:
!export CUDA_VISIBLE_DEVICES=1 && python ../main.py --exec_mode predict --task 31.1_3d --data /data/private_data/31.1_3d --dim 3 --fold 1 --nfolds 3 --ckpt_path /results/gbm_results/6_histogram_fold_1/fold-1/checkpoints/epoch=7-dice_mean=46.59.ckpt --results /results/gbm_infer/6_histogram_fold_1 --amp --tta --save_preds

Global seed set to 1
0 training, 0 validation, 102 test examples
Filters: [64, 96, 128, 192, 256, 384, 512],
Kernels: [[3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3], [3, 3, 3]]
Strides: [[1, 1, 1], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2]]
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]
Testing:   8%|██▋                               | 8/102 [02:19<25:42, 16.41s/it]^C


In [29]:
!cd /data/private_data && ls

20_3d  31.0_3d	 6_histogram_fold_0  gbm_2a_interp
22_3d  31.1_3d	 6_histogram_fold_1  gbm_3a_atlas_train
27_3d  34_3d	 6_histogram_fold_2  schw
2b_n4  3a_susan  bgpd_1_reg	     self_supervision


In [28]:
!python3 ../preprocess.py --data /data/private_data/ --task 31.0 --ohe --exec_mode training --results /data/private_data/

Preprocessing /data/private_data/6_histogram_fold_0
Pre-processing time: 71.26


In [None]:
export CUDA_VISIBLE_DEVICES=0 && python ../main.py --exec_mode predict --task ${task}_3d --data /data/private_data/${task}_3d --dim 3 --fold 1 --nfolds 3 --ckpt_path /results/gbm_results/$name/fold-1/checkpoints/best*.ckpt --results /results/gbm_infer/$name --amp --tta --save_preds