In [2]:
import numpy as np
from tqdm import tqdm
import nibabel
import os

RAW_TRAIN_DATA_DIR = os.path.join(os.curdir, "dataset", "raw", "Train")
RAW_TRAIN_LABEL_DIR = os.path.join(os.curdir, "dataset", "raw", "Train", "Labels")

TRAIN_DATASET_DIR = os.path.join(os.curdir, "dataset", "train")
VALIDATION_DATASET_DIR = os.path.join(os.curdir, "dataset", "validation")


def get_splited_dataset_filenames(raw_train_dir, train_ratio):
    train_names = [name for name in sorted(os.listdir(raw_train_dir))]
    boundary = int(len(train_names) * train_ratio)
    train_filenames = []
    for filename in tqdm(train_names[:boundary]):
        _, ext = os.path.splitext(filename)
        if "hdr" in ext:
            train_filenames.append(filename)
    val_filenames = []
    for filename in tqdm(train_names[boundary:]):
        _, ext = os.path.splitext(filename)
        if "hdr" in ext:
            val_filenames.append(filename)
    return train_filenames, val_filenames


train_paths, val_paths = get_splited_dataset_filenames(RAW_TRAIN_DATA_DIR, 0.8)
print(len(train_paths))
print(train_paths)
print(len(val_paths))
print(val_paths)


100%|██████████| 40/40 [00:00<00:00, 172960.99it/s]
100%|██████████| 11/11 [00:00<00:00, 92090.51it/s]

20
['HFH_001.hdr', 'HFH_002.hdr', 'HFH_003.hdr', 'HFH_004.hdr', 'HFH_005.hdr', 'HFH_006.hdr', 'HFH_007.hdr', 'HFH_008.hdr', 'HFH_009.hdr', 'HFH_010.hdr', 'HFH_011.hdr', 'HFH_012.hdr', 'HFH_013.hdr', 'HFH_014.hdr', 'HFH_015.hdr', 'HFH_016.hdr', 'HFH_017.hdr', 'HFH_018.hdr', 'HFH_019.hdr', 'HFH_020.hdr']
5
['HFH_021.hdr', 'HFH_022.hdr', 'HFH_023.hdr', 'HFH_024.hdr', 'HFH_025.hdr']





In [2]:
from PIL import Image

def init_trainset(filenames, raw_dataset_dir, raw_label_dir, train_data_dir):
    for filename in tqdm(sorted(filenames)):
        name, ext = os.path.splitext(filename)
        if "hdr" in ext:
            label_image = nibabel.load(os.path.join(raw_label_dir, name + "_Hipp_Labels" + ext)).get_fdata()
            label_image = label_image.squeeze()
            indices = []
            for index in range(label_image.shape[1]):
                sliced_label_image = label_image[:, index, ...]
                sliced_label_image = sliced_label_image.astype(np.int32)
                pil_label_image = Image.fromarray(sliced_label_image).resize(
                    (256, 256), Image.Resampling.NEAREST
                )
                scaled_label_image = np.array(pil_label_image)
                if (scaled_label_image != 0).any():
                    indices.append(index)
                    dense_label_image = np.zeros((256, 256, 3))
                    dense_label_image[scaled_label_image == 0, 0] = 1
                    dense_label_image[scaled_label_image == 1, 1] = 1
                    dense_label_image[scaled_label_image == 2, 2] = 1
                    new_names = name.split("_")[:2]
                    new_filename = f"{new_names[0]}_{new_names[1]}_{index:>03d}.npy"
                    new_path = os.path.join(train_data_dir, "label", new_filename)
                    with open(new_path, "wb") as f:
                        np.save(f, dense_label_image)

            data_image = nibabel.load(
                os.path.join(raw_dataset_dir, filename)
            ).get_fdata()
            data_image = data_image.squeeze()
            for index in indices:
                sliced_data_image = data_image[:, index, ...]
                sliced_data_image = sliced_data_image.astype(np.int32)
                pil_data_image = Image.fromarray(sliced_data_image).resize(
                    (256, 256), Image.Resampling.NEAREST
                )
                scaled_data_image = np.array(pil_data_image)
                str_index = str(index)
                new_filename = f"{name}_{index:>03d}.npy"
                new_path = os.path.join(train_data_dir, "data", new_filename)
                with open(new_path, "wb") as f:
                    np.save(f, scaled_data_image)

all_filenames = get_splited_dataset_filenames(RAW_TRAIN_DATA_DIR, 1.0)[0]
init_trainset(
    all_filenames,
    RAW_TRAIN_DATA_DIR,
    RAW_TRAIN_LABEL_DIR,
    TRAIN_DATASET_DIR,
)


100%|██████████| 51/51 [00:00<00:00, 351247.13it/s]
0it [00:00, ?it/s]
100%|██████████| 25/25 [00:06<00:00,  3.79it/s]


In [4]:
def init_validationset(filenames, raw_dataset_dir, raw_label_dir, validation_data_dir):
    for filename in tqdm(sorted(filenames)):
        name, ext = os.path.splitext(filename)
        if "hdr" in ext:
            image = nibabel.load(os.path.join(raw_dataset_dir, filename)).get_fdata()
            image = image.squeeze()

            label_image = nibabel.load(
                os.path.join(raw_label_dir, name + "_Hipp_Labels" + ext)
            ).get_fdata()
            label_image = label_image.squeeze()

            for index in range(image.shape[1]):
                sliced_image = image[:, index, ...]
                sliced_image = sliced_image.astype(np.int32)
                pil_image = Image.fromarray(sliced_image).resize(
                    (256, 256), Image.Resampling.NEAREST
                )
                scaled_image = np.array(pil_image)
                str_index = str(index)
                new_filename = f"{name}_{index:>03d}.npy"
                new_path = os.path.join(validation_data_dir, "data", new_filename)
                with open(new_path, "wb") as f:
                    np.save(f, scaled_image)

                sliced_label_image = label_image[:, index, ...]
                sliced_label_image = sliced_label_image.astype(np.int32)
                pil_label_image = Image.fromarray(sliced_label_image).resize(
                    (256, 256), Image.Resampling.NEAREST
                )
                scaled_label_image = np.array(pil_label_image)
                dense_label_image = np.zeros((256, 256, 3))
                dense_label_image[scaled_label_image == 0, 0] = 1
                dense_label_image[scaled_label_image == 1, 1] = 1
                dense_label_image[scaled_label_image == 2, 2] = 1
                new_names = name.split("_")[:2]
                new_filename = f"{new_names[0]}_{new_names[1]}_{index:>03d}.npy"
                new_path = os.path.join(validation_data_dir, "label", new_filename)
                with open(new_path, "wb") as f:
                    np.save(f, dense_label_image)


init_validationset(
    all_filenames, RAW_TRAIN_DATA_DIR, RAW_TRAIN_LABEL_DIR, VALIDATION_DATASET_DIR
)


100%|██████████| 25/25 [00:33<00:00,  1.35s/it]


In [5]:
from PIL import Image

RAW_TEST_DATA_DIR = os.path.join(os.curdir, "dataset", "raw", "Test")
TEST_DATA_DIR = os.path.join(os.curdir, "dataset", "test")


def init_testset(filenames, raw_dataset_dir, test_data_dir):
    for filename in tqdm(sorted(filenames)):
        name, ext = os.path.splitext(filename)
        if "hdr" in ext:
            image = nibabel.load(os.path.join(raw_dataset_dir, filename)).get_fdata()
            image = image.squeeze()

            for index in range(image.shape[1]):
                sliced_image = image[:, index, ...]
                sliced_image = sliced_image.astype(np.int32)
                pil_image = Image.fromarray(sliced_image).resize(
                    (256, 256), Image.Resampling.NEAREST
                )
                scaled_image = np.array(pil_image)
                new_filename = f"{name}_{index:>03d}.npy"
                new_path = os.path.join(test_data_dir, new_filename)
                with open(new_path, "wb") as f:
                    np.save(f, scaled_image)


test_filenames, _ = get_splited_dataset_filenames(RAW_TEST_DATA_DIR, 1)

init_testset(test_filenames, RAW_TEST_DATA_DIR, TEST_DATA_DIR)


100%|██████████| 50/50 [00:00<00:00, 251758.94it/s]
0it [00:00, ?it/s]
100%|██████████| 25/25 [00:07<00:00,  3.15it/s]
