In [1]:
from tomoSegmentPipeline.utils import setup
from nnunet.dataset_conversion.utils import generate_dataset_json

import os 
from glob import glob
from pathlib import Path
import pickle as pkl

train_tomos = ['tomo02', 'tomo03', 'tomo17', 'tomo32', 'tomo10']
concat_train_ids = sorted([s.replace('tomo', '') for s in train_tomos])
concat_train_ids = '-'.join(concat_train_ids)

test_tomos = ['tomo38', 'tomo04']
concat_test_ids = sorted([s.replace('tomo', '') for s in test_tomos])
concat_test_ids = '-'.join(concat_test_ids)

raw_data_base_path = os.environ['nnUNet_raw_data_base']
preprocessed_path = os.environ['nnUNet_preprocessed']



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet



# Build 3d_fullres_large configuration

In [17]:
# imagesTr_paths = glob('/mnt/home/icb/jeronimo.carvajal/Thesis/data/nnUnet/raw_data_base/nnUNet_raw_data/Task002_Heart/imagesTr/*')
# imagesTs_paths = glob('/mnt/home/icb/jeronimo.carvajal/Thesis/data/nnUnet/raw_data_base/nnUNet_raw_data/Task002_Heart/imagesTs/*')

# for i in imagesTr_paths+imagesTs_paths:
#     new_name = i.replace('.nii.gz', '_0000.nii.gz')
#     os.rename(i, new_name)

In [21]:
from batchgenerators.utilities.file_and_folder_operations import *
plans = load_pickle('../data/nnUnet/preprocessed/Task002_Heart/nnUNetPlansv2.1_plans_3D.pkl')
stage = max(plans['plans_per_stage'].keys())
plans['plans_per_stage'][stage]['batch_size'] *= 3
save_pickle(plans, '../data/nnUnet/preprocessed/Task002_Heart/nnUNetPlansv2.1_bs3x_plans_3D.pkl')

# Train tomograms

In [27]:
tomo_files, label_files = setup.get_paths(train_tomos, 'rawCET')
task_no = 778
task_name = 'rawCETBaseline'

task_path = os.path.join(raw_data_base_path, "nnUNet_raw_data/Task%03d_%s" %(task_no, task_name))

imagesTr_path = Path("%s/imagesTr/" %task_path)
imagesTr_path.mkdir(parents=True, exist_ok=True)

labelsTr_path = Path("%s/labelsTr/" %task_path)
labelsTr_path.mkdir(parents=True, exist_ok=True)

# test set
imagesTs_path = Path("%s/imagesTs/" %task_path)
imagesTs_path.mkdir(parents=True, exist_ok=True)

In [21]:
task_path

'/home/haicu/jeronimo.carvajal/Thesis/data/nnUnet/raw_data_base/nnUNet_raw_data/Task777_rawCETBaseline'

In [22]:
# copy train patches to correct folder
for tomo_file, label_file in zip(tomo_files, label_files):
    os.system("cp %s %s" %(tomo_file, imagesTr_path.as_posix()))
    os.system("cp %s %s" %(label_file, labelsTr_path.as_posix()))
    
tomo_test_files, label_test_files = setup.get_paths(test_tomos, 'rawCET')

for tomo_file in tomo_test_files:
    os.system("cp %s %s" %(tomo_file, imagesTs_path.as_posix()))

In [23]:
generate_dataset_json(os.path.join(task_path, 'dataset.json'), imagesTr_dir=imagesTr_path.as_posix(), labels={0:'background', 1:'membrane', 2:'noise'},
                      imagesTs_dir=imagesTs_path.as_posix(),
                      modalities=['Normalized'], dataset_name='Raw Cryo Electron Tomogram Patches')

In [None]:
os.system('nnUNet_plan_and_preprocess -t %03d --verify_dataset_integrity' %task_no)

# Fold setup for Baseline model

I decided to leave all the patches of one tomogram out as validation. One different tomogram for each fold.

In [28]:
patch_names = [x.split('/')[-1].replace('_0000.nii.gz', '') for x in tomo_files]

split_final = []

for tomo in train_tomos:
    train_patches = [x for x in patch_names if not x.startswith(tomo)]
    val_patches = [x for x in patch_names if x.startswith(tomo)]
    fold_dict = {'train':train_patches, 'val':val_patches}
    split_final.append(fold_dict)
    
task_preprocessed_path = os.path.join(preprocessed_path, "Task%03d_%s" %(task_no, task_name))
p = os.path.join(task_preprocessed_path, "splits_final.pkl")

pkl.dump( split_final, open( p, "wb" ) )

In [19]:
with open(p, 'rb') as f:
    data = pkl.load(f)
data

[{'train': ['tomo03_patch000',
   'tomo03_patch002',
   'tomo03_patch004',
   'tomo03_patch006',
   'tomo03_patch008',
   'tomo03_patch010',
   'tomo03_patch020',
   'tomo03_patch021',
   'tomo03_patch022',
   'tomo03_patch030',
   'tomo03_patch031',
   'tomo03_patch032',
   'tomo10_patch001',
   'tomo10_patch002',
   'tomo10_patch003',
   'tomo10_patch009',
   'tomo10_patch010',
   'tomo10_patch011',
   'tomo10_patch020',
   'tomo10_patch030',
   'tomo17_patch000',
   'tomo17_patch001',
   'tomo17_patch002',
   'tomo17_patch006',
   'tomo17_patch008',
   'tomo17_patch009',
   'tomo17_patch010',
   'tomo17_patch014',
   'tomo17_patch015',
   'tomo17_patch030',
   'tomo32_patch000',
   'tomo32_patch002',
   'tomo32_patch004',
   'tomo32_patch006',
   'tomo32_patch008',
   'tomo32_patch010',
   'tomo32_patch012',
   'tomo32_patch014',
   'tomo32_patch020',
   'tomo32_patch030',
   'tomo32_patch031'],
  'val': ['tomo02_patch000',
   'tomo02_patch001',
   'tomo02_patch002',
   'tomo02_patc