In [1]:
import pandas as pd

In [3]:
index_df = pd.DataFrame(index=["train", "test"], columns=["index"])
train_index = []
test_index = []
for i in range(0, 131):
    if i >= 27 and i <= 47:
        test_index.append(i)
    else:
        train_index.append(i)

# random.shuffle(train_index)
# length = len(train_index)
# valid_index = train_index[: length // 11]
# train_index = train_index[length // 11 :]

train_index_str = ""
for index in train_index:
    train_index_str += str(index) + " "
valid_index_str = ""
# for index in valid_index:
#     valid_index_str+=str(index)+" "
test_index_str = ""
for index in test_index:
    test_index_str += str(index) + " "

index_df.loc["train", "index"] = train_index_str
# index_df.loc["valid", "index"] = valid_index_str
index_df.loc["test", "index"] = test_index_str
index_df.to_csv("./index.csv")

In [1]:
import os
import SimpleITK as sitk
import numpy as np
from scipy import ndimage

In [11]:
def Preprocess(ct, ct_array, seg, seg_array, index, upper=200, lower=-200, xy_down_scale=0.5, z_down_scale=1.0,
               expand_slice=20, min_slice=10):
    ct_array[ct_array > upper] = upper
    ct_array[ct_array < lower] = lower

    ct_array = ndimage.zoom(ct_array, (ct.GetSpacing(
    )[-1] / z_down_scale, xy_down_scale, xy_down_scale), order=3)
    seg_array = ndimage.zoom(seg_array, (ct.GetSpacing(
    )[-1] / z_down_scale, xy_down_scale, xy_down_scale), order=0)

    z = np.any(seg_array, axis=(1, 2))
    start_slice, end_slice = np.where(z)[0][[0, -1]]
    if start_slice - expand_slice < 0:
        start_slice = 0
    else:
        start_slice -= expand_slice

    if end_slice + expand_slice >= seg_array.shape[0]:
        end_slice = seg_array.shape[0] - 1
    else:
        end_slice += expand_slice

    if end_slice - start_slice + 1 < min_slice:
        print('Too little slice，give up the sample:',
              "segmentation-" + index + ".nii")
        return None, None

    ct_array = ct_array[start_slice:end_slice + 1, :, :]
    seg_array = seg_array[start_slice:end_slice + 1, :, :]

    new_ct = sitk.GetImageFromArray(ct_array)
    new_ct.SetDirection(ct.GetDirection())
    new_ct.SetOrigin(ct.GetOrigin())
    new_ct.SetSpacing((ct.GetSpacing()[0] * int(1 / xy_down_scale), ct.GetSpacing()[
        1] * int(1 / xy_down_scale), z_down_scale))

    new_seg = sitk.GetImageFromArray(seg_array)
    new_seg.SetDirection(seg.GetDirection())
    new_seg.SetOrigin(seg.GetOrigin())
    new_seg.SetSpacing((seg.GetSpacing()[0] * int(1 / xy_down_scale), ct.GetSpacing()[
        1] * int(1 / xy_down_scale), z_down_scale))
    return new_ct, new_seg

In [12]:
def GenerateNewData(index, ct_raw_path, seg_raw_path):
    ct_raw_index_path = os.path.join(ct_raw_path, "volume-" + index + ".nii")
    seg_raw_index_path = os.path.join(
        seg_raw_path, "segmentation-" + index + ".nii")
    if not os.path.exists(ct_raw_index_path) or not os.path.exists(seg_raw_index_path):
        print(index)
        return None, None
    ct = sitk.ReadImage(ct_raw_index_path, sitk.sitkInt16)
    seg = sitk.ReadImage(seg_raw_index_path, sitk.sitkInt8)

    ct_array = sitk.GetArrayFromImage(ct)
    seg_array = sitk.GetArrayFromImage(seg)

    return Preprocess(
        ct,
        ct_array,
        seg,
        seg_array,
        index,
        upper,
        lower,
        xy_down_scale,
        z_down_scale,
        expand_slice,
        min_slice,
    )

In [13]:
prepro_path = "./preprocessed_data"

if not os.path.exists(prepro_path):
    os.makedirs(prepro_path)

ct_train_path = os.path.join(prepro_path, "train", "ct")
seg_train_path = os.path.join(prepro_path, "train", "seg")
ct_test_path = os.path.join(prepro_path, "test", "ct")
seg_test_path = os.path.join(prepro_path, "test", "seg")

if not os.path.exists(ct_train_path):
    os.makedirs(ct_train_path)

if not os.path.exists(seg_train_path):
    os.makedirs(seg_train_path)
if not os.path.exists(ct_test_path):
    os.makedirs(ct_test_path)
if not os.path.exists(seg_test_path):
    os.makedirs(seg_test_path)

In [14]:
ct_raw_path = "./raw_data/ct"
seg_raw_path = "./raw_data/seg"

upper = 200
lower = -200
xy_down_scale = 0.5
z_down_scale = 1.0
expand_slice = 6
min_slice = 10

train_index = index_df.loc["train", "index"].strip().split(" ")
test_index = index_df.loc["test", "index"].strip().split(" ")

for index in train_index:
    new_ct, new_seg = GenerateNewData(index, ct_raw_path, seg_raw_path)
    if new_ct is None or new_seg is None:
        continue

    sitk.WriteImage(new_ct, os.path.join(ct_train_path, index + ".nii"))
    sitk.WriteImage(new_seg, os.path.join(seg_train_path, index + ".nii"))

for index in test_index:
    new_ct, new_seg = GenerateNewData(index, ct_raw_path, seg_raw_path)
    if new_ct is None or new_seg is None:
        continue

    sitk.WriteImage(new_ct, os.path.join(ct_test_path, index + ".nii"))
    sitk.WriteImage(new_seg, os.path.join(seg_test_path, index + ".nii"))