In [None]:
from pathlib import Path
import shutil
import os
import pandas as pd
import json

In [None]:
data_dir = 'data/HNTSMRG24_train'
proj_dir = Path('').absolute().parent.parent
src_dir = proj_dir / data_dir

dst_dir = 'Dataset003_HNTSMRG'

os.makedirs(dst_dir, exist_ok=True)

dst_dir_samples = f'{dst_dir}/imagesTr/'
os.makedirs(dst_dir_samples, exist_ok=True)

dst_dir_masks = f'{dst_dir}/labelsTr/'
os.makedirs(dst_dir_masks, exist_ok=True)

In [None]:
src_dir

In [None]:
sets_raw = {}

In [None]:
pre_samples = [str(p) for p in src_dir.glob('*\preRT\*T2*')]
pre_masks = [str(p) for p in src_dir.glob('*\preRT\*mask*')]
sets_raw['preRT'] = {'samples': pre_samples,
                     'masks': pre_masks}

mid_samples = [str(p) for p in src_dir.glob('*\midRT\*midRT*T2.nii*')]
mid_masks = [str(p) for p in src_dir.glob('*\midRT\*midRT*mask.nii*')]
sets_raw['midRT'] = {'samples': mid_samples,
                     'masks': mid_masks}

In [None]:
dataset = sets_raw['preRT']
for i, (sample, mask) in enumerate(zip(dataset['samples'], dataset['masks'])):
    dst_sample = f'{dst_dir_samples}/HNTSMRG_{i:03d}_0000.nii.gz'
    dst_mask = f'{dst_dir_masks}/HNTSMRG_{i:03d}.nii.gz'

    shutil.copyfile(sample, dst_sample)
    shutil.copyfile(mask, dst_mask)

In [None]:
n = len(dataset['samples'])
dataset = sets_raw['midRT']
for i, (sample, mask) in enumerate(zip(dataset['samples'], dataset['masks'])):
    dst_sample = f'{dst_dir_samples}/HNTSMRG_{i+n:03d}_0000.nii.gz'
    dst_mask = f'{dst_dir_masks}/HNTSMRG_{i+n:03d}.nii.gz'

    shutil.copyfile(sample, dst_sample)
    shutil.copyfile(mask, dst_mask)

In [None]:
dataset_p = sets_raw['preRT']
dataset_m = sets_raw['midRT']

data = []
for i, (sample_p, mask_p, sample_m, mask_m) in enumerate(zip(dataset_p['samples'], dataset_p['masks'], dataset_m['samples'], dataset_m['masks'])):
    obs = [sample_p.rsplit('\\')[-1].split('.')[0],
           mask_p.rsplit('\\')[-1].split('.')[0],
           sample_m.rsplit('\\')[-1].split('.')[0],
           mask_m.rsplit('\\')[-1].split('.')[0]]
    data.append(obs)

In [None]:
n += len(dataset['samples'])

In [None]:
df = pd.DataFrame(data)

In [None]:
data_dict = {"channel_names": {"0": "T2"},
             "labels": {"background": 0,
                        "GTVp": 1,
                        "GTVn": 2},
             "numTraining": n,
             "file_ending": ".nii.gz",
             "overwrite_image_reader_writer": "SimpleITKIO"}

In [None]:
with open(f'{dst_dir}/dataset.json', 'w') as json_file:
    json.dump(data_dict, json_file, indent = 4)

# Splits file

#5 fold

In [None]:
f0 = [f'HNTSMRG_{i:03d}' for i in range(n)]
f0_pre = f0[:150]
f0_mid = f0[150:]

In [None]:
nsplits = 5
nsamples = len(f0_pre)
split_size = int(nsamples/nsplits)

In [None]:
splits_pre = [f0_pre[i*split_size: (i+1)*split_size] for i in range(nsplits)]
splits_mid = [f0_mid[i*split_size: (i+1)*split_size] for i in range(nsplits)]
new_splits = []
for i in range(nsplits):
    val = splits_pre[i]
    train_pre = [sample
             for j, split in enumerate(splits_pre)
             if j != i
             for sample in split]
    train_mid = [sample
             for j, split in enumerate(splits_mid)
             if j != i
             for sample in split]

    split_dict = {'train': train_pre + train_mid,
                  'val': val}
    new_splits.append(split_dict)

In [None]:
json_dir = f'json_splits/{dst_dir}/'
os.makedirs(json_dir, exist_ok=True)

In [None]:
with open(f'{json_dir}/splits_final.json', 'w') as json_file:
    json.dump(new_splits, json_file, indent = 4)

# 15 fold

In [None]:
f0 = [f'HNTSMRG_{i:03d}' for i in range(n)]
f0_pre = f0[:150]
f0_mid = f0[150:]

In [None]:
nsplits = 15
nsamples = len(f0_pre)
split_size = int(nsamples/nsplits)

In [None]:
splits_pre = [f0_pre[i*split_size: (i+1)*split_size] for i in range(nsplits)]
splits_mid = [f0_mid[i*split_size: (i+1)*split_size] for i in range(nsplits)]
new_splits = []
for i in range(nsplits):
    val = splits_pre[i]
    train_pre = [sample
             for j, split in enumerate(splits_pre)
             if j != i
             for sample in split]
    train_mid = [sample
             for j, split in enumerate(splits_mid)
             if j != i
             for sample in split]

    split_dict = {'train': train_pre + train_mid,
                  'val': val}
    new_splits.append(split_dict)

In [None]:
dst_dirn = 'Dataset004_HNTSMRG'

In [None]:
json_dir = f'json_splits/{dst_dirn}/'
os.makedirs(json_dir, exist_ok=True)

In [None]:
with open(f'{json_dir}/splits_final.json', 'w') as json_file:
    json.dump(new_splits, json_file, indent = 4)

In [None]:
json_dir