In [49]:
import torchio as tio
import numpy as np
import pandas as pd
from tqdm import tqdm
import glob


# path to ct scans
image_paths = sorted(glob.glob("../data/preprocessed/img/*.nii"))

# path to primary tumor's masks
label_paths = sorted(glob.glob('../data/preprocessed/seg/*GTV*.nii'))

df = pd.read_csv("../data/lung-01.csv")

# Preprocessing operations
trans = tio.Compose([
    tio.ToCanonical(),
    tio.Resample("gtv"),
    tio.Resample(1),
    tio.CropOrPad((220,120,60), mask_name='gtv'),
    tio.ZNormalization(masking_method=tio.ZNormalization.mean),
])

subjects = []
indexs = []

print("Gathering paths...")
for (image_path, label_path) in tqdm(zip(image_paths, label_paths)):
    
    subject = tio.Subject(
        ctscan=tio.ScalarImage(image_path),
        gtv=tio.LabelMap(label_path),
    )
    subjects.append(subject)
    indexs.append(image_path[-13:-4])

dataset = tio.SubjectsDataset(subjects)

print("Done !")

print('Dataset size:', len(dataset), 'subjects')

print("Creating dataset...")
for i, subject in tqdm(zip(indexs, dataset)):
    if int(i[-3:]) < 144:
        continue
    
    survival_time = df.iloc[int(i[6:])-1]["Survival.time"]
    status_event = df.iloc[int(i[6:])-1]["deadstatus.event"]
    
    subject = trans(subject)
    t = subject.ctscan.data.squeeze().detach().numpy()
    m = subject.gtv.data.squeeze().detach().numpy()

    np.save("../data/dataset/"+i, np.array([t, m, survival_time, status_event], dtype=object))

print("Done !")

421it [00:00, 9585.86it/s]
0it [00:00, ?it/s]

Gathering paths...
Done !
Dataset size: 421 subjects
Creating dataset...


421it [15:29,  2.21s/it]

Done !



