## Split train and valid

In [10]:
import os
import glob

In [36]:
base = '/home/ubuntu/kimin/atlas_2022/ATLAS_pipline/data'

task_name = 'Task500_ATLAS'
target_base = os.path.join(base, task_name)
imagesTr_path = os.path.join(target_base, "imagesTr")
labelsTr_path = os.path.join(target_base, "labelsTr")

In [97]:
from collections import Counter

train_pathes = sorted(glob.glob(os.path.join(imagesTr_path, '*.nii.gz')))

prefix_list = []
for train_path in train_pathes:
    prefix = train_path.split('/')[-1][:6]
    prefix_list.append(prefix)
    
cnt_prefix_list = Counter(prefix_list)     
train_cnt_list = list(cnt_prefix_list.values())
print("train 데이터의 총합: ", sum(train_cnt_list))
print("prefix 별 train 개수: ", train_cnt_list)


train 데이터의 총합:  655
prefix 별 train 개수:  [38, 12, 15, 37, 18, 111, 26, 29, 5, 12, 8, 6, 7, 8, 11, 16, 13, 5, 37, 24, 8, 49, 2, 45, 18, 2, 2, 7, 25, 23, 12, 7, 17]


In [100]:
val_cnt_list = []
for train_cnt in train_cnt_list:
    val_ratio = train_cnt // 10 
    if val_ratio == 0:
        val_cnt_list.append(1)
    else:
        val_cnt_list.append(val_ratio)

print("valid 데이터의 총합: ", sum(val_cnt_list))
print("prefix 별 valid 개수: ", val_cnt_list)

valid 데이터의 총합:  60
prefix 별 valid 개수:  [3, 1, 1, 3, 1, 11, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 1, 4, 1, 4, 1, 1, 1, 1, 2, 2, 1, 1, 1]


In [101]:
def flag_train_val(count, val_cnt):
    if count <= val_cnt:
        return 'valid'
    return 'train'

In [102]:
train_pathes = sorted(glob.glob(os.path.join(imagesTr_path, '*.nii.gz')))

target_imagesTr = []
target_imagesVal = []
count = 0
total_train_cnt = 0
current_idx = 0
for train_path  in train_pathes:
    name = train_path.split('/')[-1][:-7]
    prefix_idx = int(train_path.split('/')[-1][:6][-1]) - 1
    count += 1

    flag = flag_train_val(count, val_cnt_list[prefix_idx])
    
    if flag == 'train':
        target_imagesTr.append(name)
    else:
        target_imagesVal.append(name)
    
    if current_idx != prefix_idx:
        count = 0
        current_idx = prefix_idx

In [103]:
print("imagesTr 개수: ", len(target_imagesTr))
print("imagesVal 개수: ", len(target_imagesVal))
print("총합: ", len(target_imagesTr) + len(target_imagesVal) )

imagesTr 개수:  595
imagesVal 개수:  60
총합:  655


## Make json file

In [9]:
import os
import glob
import random

base = '/home/ubuntu/kimin/atlas_2022/ATLAS_pipline/data'

task_name = 'Task505_BRATS'
target_base = os.path.join(base, task_name, 'imagesTr')

train_pathes = sorted(glob.glob(os.path.join(target_base, '*.nii.gz')))
train_pathes_name = list(map(lambda x: x[:-7].split('/')[-1], train_pathes))

random.seed(2022)
random.shuffle(train_pathes_name)

imagesTr_dir, imagesVal_dir = train_pathes_name[:-125], train_pathes_name[-125:]

In [10]:
import json
from typing import List

from typing import Tuple
import numpy as np

In [11]:
def subfiles(folder: str, join: bool = True, prefix: str = None, suffix: str = None, sort: bool = True) -> List[str]:
    if join:
        l = os.path.join
    else:
        l = lambda x, y: y
    res = [l(folder, i) for i in os.listdir(folder) if os.path.isfile(os.path.join(folder, i))
           and (prefix is None or i.startswith(prefix))
           and (suffix is None or i.endswith(suffix))]
    if sort:
        res.sort()
    return res

def save_json(obj, file: str, indent: int = 4, sort_keys: bool = True) -> None:
    with open(file, 'w') as f:
        json.dump(obj, f, sort_keys=sort_keys, indent=indent)

def get_identifiers_from_splitted_files(folder: str):
    uniques = np.unique([i[:-7] for i in subfiles(folder, suffix='.nii.gz', join=False)]) # edit i[:-12] to i[:-7]
    return uniques

def generate_dataset_json(output_file: str, imagesTr_dir: List, imagesVal_dir: List, modalities: Tuple,
                          labels: dict, dataset_name: str, sort_keys=True, license: str = "hands off!", dataset_description: str = "",
                          dataset_reference="", dataset_release='0.0'):
    """
    :param output_file: This needs to be the full path to the dataset.json you intend to write, so
    output_file='DATASET_PATH/dataset.json' where the folder DATASET_PATH points to is the one with the
    imagesTr and labelsTr subfolders
    :param imagesTr_dir: path to the imagesTr folder of that dataset
    :param imagesTs_dir: path to the imagesTs folder of that dataset. Can be None
    :param modalities: tuple of strings with modality names. must be in the same order as the images (first entry
    corresponds to _0000.nii.gz, etc). Example: ('T1', 'T2', 'FLAIR').
    :param labels: dict with int->str (key->value) mapping the label IDs to label names. Note that 0 is always
    supposed to be background! Example: {0: 'background', 1: 'edema', 2: 'enhancing tumor'}
    :param dataset_name: The name of the dataset. Can be anything you want
    :param sort_keys: In order to sort or not, the keys in dataset.json
    :param license:
    :param dataset_description:
    :param dataset_reference: website of the dataset, if available
    :param dataset_release:
    :return:
    """
    # train_identifiers = get_identifiers_from_splitted_files(imagesTr_dir)

    # if imagesTs_dir is not None:
    #     test_identifiers = get_identifiers_from_splitted_files(imagesTs_dir)
    # else:
    #     test_identifiers = []

    json_dict = {}
    json_dict['name'] = dataset_name
    json_dict['description'] = dataset_description
    json_dict['tensorImageSize'] = "4D"
    json_dict['reference'] = dataset_reference
    json_dict['licence'] = license
    json_dict['release'] = dataset_release
    json_dict['modality'] = {str(i): modalities[i] for i in range(len(modalities))}
    json_dict['labels'] = {str(i): labels[i] for i in labels.keys()}

    json_dict['numTraining'] = len(imagesTr_dir)
    json_dict['numTest'] = len(imagesVal_dir)
    json_dict['training'] = [
        {'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i
        in
        imagesTr_dir]
    json_dict['validation'] = [
        {'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i
        in
        imagesVal_dir]

    if not output_file.endswith("dataset.json"):
        print("WARNING: output file name is not dataset.json! This may be intentional or not. You decide. "
              "Proceeding anyways...")
    save_json(json_dict, os.path.join(output_file), sort_keys=sort_keys)

In [15]:
base = '/home/ubuntu/kimin/atlas_2022/ATLAS_pipline/data'

task_name = 'Task505_BRATS'
target_base = os.path.join(base, task_name)

In [16]:
generate_dataset_json(
    output_file=os.path.join(target_base, 'dataset.json'),
    imagesTr_dir=imagesTr_dir,
    imagesVal_dir=imagesVal_dir,
    modalities=('T1',),
    labels={0: 'background', 1: 'Core Tumor'},
    dataset_name=task_name,
    license='hands off!'
)

In [34]:
label_path = sorted(glob.glob(os.path.join(target_base, 'labelsTr' ,'*.nii.gz')))
label_path[:5]

['/home/ubuntu/kimin/atlas_2022/ATLAS_pipline/data/Task505_BRATS/labelsTr/BraTS2021_00000.nii.gz',
 '/home/ubuntu/kimin/atlas_2022/ATLAS_pipline/data/Task505_BRATS/labelsTr/BraTS2021_00002.nii.gz',
 '/home/ubuntu/kimin/atlas_2022/ATLAS_pipline/data/Task505_BRATS/labelsTr/BraTS2021_00003.nii.gz',
 '/home/ubuntu/kimin/atlas_2022/ATLAS_pipline/data/Task505_BRATS/labelsTr/BraTS2021_00005.nii.gz',
 '/home/ubuntu/kimin/atlas_2022/ATLAS_pipline/data/Task505_BRATS/labelsTr/BraTS2021_00006.nii.gz']

In [35]:
import ants 

for label_file in label_path:
    test_label = ants.image_read(label_file)
    np_image = test_label.numpy()
    np_image[np_image==1] = 1
    np_image[np_image==2] = 0
    np_image[np_image==4] = 0

    processed = ants.from_numpy(
        np_image,
        origin=test_label.origin[:3],
        spacing=test_label.spacing[:3],
        direction=test_label.direction[:3],
    )
    ants.image_write(processed, label_file)

# Json 파일 수정

In [2]:
import json

In [3]:
def save_json(obj, file: str, indent: int = 4, sort_keys: bool = True) -> None:
    with open(file, 'w') as f:
        json.dump(obj, f, sort_keys=sort_keys, indent=indent)

def read_json(file: str) -> json:
    with open(file, 'r') as f:
        json_dict = json.load(f)
    return json_dict

In [5]:
json_dict = read_json('/home/ubuntu/kimin/atlas_2022/ATLAS_pipline/dataset_501.json')
json_dict['test']

{'description': '',
 'labels': {'0': 'background', '1': 'label'},
 'licence': 'hands off!',
 'modality': {'0': 't1w'},
 'name': 'Task501_ATLAS',
 'numTest': 121,
 'numTraining': 534,
 'reference': '',
 'release': '0.0',
 'tensorImageSize': '4D',
 'test': ['./imagesTs/sub001_003.nii.gz',
  './imagesTs/sub001_017.nii.gz',
  './imagesTs/sub001_025.nii.gz',
  './imagesTs/sub001_026.nii.gz',
  './imagesTs/sub001_027.nii.gz',
  './imagesTs/sub001_032.nii.gz',
  './imagesTs/sub001_033.nii.gz',
  './imagesTs/sub002_005.nii.gz',
  './imagesTs/sub002_008.nii.gz',
  './imagesTs/sub003_004.nii.gz',
  './imagesTs/sub003_006.nii.gz',
  './imagesTs/sub003_010.nii.gz',
  './imagesTs/sub004_007.nii.gz',
  './imagesTs/sub004_009.nii.gz',
  './imagesTs/sub004_017.nii.gz',
  './imagesTs/sub004_019.nii.gz',
  './imagesTs/sub004_030.nii.gz',
  './imagesTs/sub004_033.nii.gz',
  './imagesTs/sub004_036.nii.gz',
  './imagesTs/sub005_046.nii.gz',
  './imagesTs/sub005_068.nii.gz',
  './imagesTs/sub005_081.nii.gz'

In [7]:
json_dict['validation'] = json_dict.pop('test')

In [8]:
json_dict

{'description': '',
 'labels': {'0': 'background', '1': 'label'},
 'licence': 'hands off!',
 'modality': {'0': 't1w'},
 'name': 'Task501_ATLAS',
 'numTest': 121,
 'numTraining': 534,
 'reference': '',
 'release': '0.0',
 'tensorImageSize': '4D',
 'training': [{'image': './imagesTr/sub001_001.nii.gz',
   'label': './labelsTr/sub001_001.nii.gz'},
  {'image': './imagesTr/sub001_002.nii.gz',
   'label': './labelsTr/sub001_002.nii.gz'},
  {'image': './imagesTr/sub001_004.nii.gz',
   'label': './labelsTr/sub001_004.nii.gz'},
  {'image': './imagesTr/sub001_005.nii.gz',
   'label': './labelsTr/sub001_005.nii.gz'},
  {'image': './imagesTr/sub001_006.nii.gz',
   'label': './labelsTr/sub001_006.nii.gz'},
  {'image': './imagesTr/sub001_007.nii.gz',
   'label': './labelsTr/sub001_007.nii.gz'},
  {'image': './imagesTr/sub001_008.nii.gz',
   'label': './labelsTr/sub001_008.nii.gz'},
  {'image': './imagesTr/sub001_009.nii.gz',
   'label': './labelsTr/sub001_009.nii.gz'},
  {'image': './imagesTr/sub001

In [11]:
p_t = './imagesTr/sub001_001.nii.gz'
p_t[:-7] + '_0000'+ p_t[-7:]

'./imagesTr/sub001_001_0000.nii.gz'

In [22]:
new_data = []
for train_path in json_dict['training']:
    new_dict = {'image': 0, 'label': 0}
    new_dict['image'] = train_path['image'][:-7] + '_0000'+ train_path['image'][-7:] 
    new_dict['label'] = train_path['label'][:-7] + '_0000'+ train_path['label'][-7:] 
    new_data.append(new_dict)
new_data[:5]

[{'image': './imagesTr/sub001_001_0000.nii.gz',
  'label': './labelsTr/sub001_001_0000.nii.gz'},
 {'image': './imagesTr/sub001_002_0000.nii.gz',
  'label': './labelsTr/sub001_002_0000.nii.gz'},
 {'image': './imagesTr/sub001_004_0000.nii.gz',
  'label': './labelsTr/sub001_004_0000.nii.gz'},
 {'image': './imagesTr/sub001_005_0000.nii.gz',
  'label': './labelsTr/sub001_005_0000.nii.gz'},
 {'image': './imagesTr/sub001_006_0000.nii.gz',
  'label': './labelsTr/sub001_006_0000.nii.gz'}]

In [14]:
json_dict['training'][:5]

[{'image': './imagesTr/sub001_001.nii.gz',
  'label': './labelsTr/sub001_001.nii.gz'},
 {'image': './imagesTr/sub001_002.nii.gz',
  'label': './labelsTr/sub001_002.nii.gz'},
 {'image': './imagesTr/sub001_004.nii.gz',
  'label': './labelsTr/sub001_004.nii.gz'},
 {'image': './imagesTr/sub001_005.nii.gz',
  'label': './labelsTr/sub001_005.nii.gz'},
 {'image': './imagesTr/sub001_006.nii.gz',
  'label': './labelsTr/sub001_006.nii.gz'}]

In [26]:
p_t = './imagesTs/sub049_028.nii.gz'
'./imagesTr/' + p_t[:-7][11:] + '_0000'+ p_t[-7:]

'./imagesTr/sub049_028_0000.nii.gz'

In [27]:
val_new_data = []
for valid_path in json_dict['validation']:
    new_dict = {'image': 0, 'label': 0}
    new_dict['image'] = './imagesTr/' + valid_path[:-7][11:] + '_0000'+ valid_path[-7:] 
    new_dict['label'] = './labelsTr/' + valid_path[:-7][11:]  + '_0000'+ valid_path[-7:] 
    val_new_data.append(new_dict)
val_new_data[:5]

[{'image': './imagesTr/sub001_003_0000.nii.gz',
  'label': './labelsTr/sub001_003_0000.nii.gz'},
 {'image': './imagesTr/sub001_017_0000.nii.gz',
  'label': './labelsTr/sub001_017_0000.nii.gz'},
 {'image': './imagesTr/sub001_025_0000.nii.gz',
  'label': './labelsTr/sub001_025_0000.nii.gz'},
 {'image': './imagesTr/sub001_026_0000.nii.gz',
  'label': './labelsTr/sub001_026_0000.nii.gz'},
 {'image': './imagesTr/sub001_027_0000.nii.gz',
  'label': './labelsTr/sub001_027_0000.nii.gz'}]

In [28]:
json_dict['training'] = new_data
json_dict['validation'] = val_new_data
json_dict["labels"] = {
        "0": "background",
        "1": "Stroke Lesion"
    }
json_dict["modality"] = {
        "0": "T1"
    }
json_dict

{'description': '',
 'labels': {'0': 'background', '1': 'Stroke Lesion'},
 'licence': 'hands off!',
 'modality': {'0': 'T1'},
 'name': 'Task501_ATLAS',
 'numTest': 121,
 'numTraining': 534,
 'reference': '',
 'release': '0.0',
 'tensorImageSize': '4D',
 'training': [{'image': './imagesTr/sub001_001_0000.nii.gz',
   'label': './labelsTr/sub001_001_0000.nii.gz'},
  {'image': './imagesTr/sub001_002_0000.nii.gz',
   'label': './labelsTr/sub001_002_0000.nii.gz'},
  {'image': './imagesTr/sub001_004_0000.nii.gz',
   'label': './labelsTr/sub001_004_0000.nii.gz'},
  {'image': './imagesTr/sub001_005_0000.nii.gz',
   'label': './labelsTr/sub001_005_0000.nii.gz'},
  {'image': './imagesTr/sub001_006_0000.nii.gz',
   'label': './labelsTr/sub001_006_0000.nii.gz'},
  {'image': './imagesTr/sub001_007_0000.nii.gz',
   'label': './labelsTr/sub001_007_0000.nii.gz'},
  {'image': './imagesTr/sub001_008_0000.nii.gz',
   'label': './labelsTr/sub001_008_0000.nii.gz'},
  {'image': './imagesTr/sub001_009_0000.ni

In [29]:
output_file = '/home/ubuntu/kimin/atlas_2022/ATLAS_pipline/data/Task500_ATLAS/dataset_501.json'

save_json(json_dict, os.path.join(output_file), sort_keys=True)