In [1]:
import os
import numpy as np
import pandas as pd
import SimpleITK as sitk
import pylidc

In [2]:
def masks_build(suid, hu_a):
    scans = {s.series_instance_uid:s for s in pylidc.query(pylidc.Scan).all()}
    s = scans[suid]
    ann_count = np.zeros_like(hu_a, dtype=int)
    for ann_cluster in s.cluster_annotations():
        # print(ann_cluster)
        if len(ann_cluster) < 3:
            continue
        for ann in ann_cluster:
            # print("id: ", ann.id)
            # print("shape: ", ann.boolean_mask().shape)
            mask = ann.boolean_mask()
            bbox = ann.bbox_matrix().T
            # print("bbox rci: ", bbox)
            
            bbox = np.roll(bbox, shift=1, axis=1)
            # print("bbox irc: ", bbox)
            result = np.diff(bbox, axis=0)[0]
            # print("ijk: ", result[0], result[1], result[2])
            result_p = np.diff(bbox, axis=0)[0] + 1
            # print("ijk: ", result_p[0], result_p[1], result_p[2])
            mask = np.transpose(mask, (2, 0, 1))
            # print("origin: ", ann_count[bbox[0][0]:bbox[0][0] + result_p[0], bbox[0][1]:bbox[0][1] + result_p[1], bbox[0][2]:bbox[0][2]+result_p[2]])
            ann_count[bbox[0][0]:bbox[0][0] + result_p[0], bbox[0][1]:bbox[0][1] + result_p[1], bbox[0][2]:bbox[0][2]+result_p[2]] += mask.astype(int)
            # print("add: ", ann_count[bbox[0][0]:bbox[0][0] + result_p[0], bbox[0][1]:bbox[0][1] + result_p[1], bbox[0][2]:bbox[0][2]+result_p[2]])
  
    masks = (ann_count >= 1)
    
    return masks

In [4]:
# slice into 5

def split_data(ct_scan, save_dir, filename_prefix, index_array):
    num_slices = ct_scan.shape[0]

    # get mask
    masks = masks_build(filename_prefix, ct_scan.astype('int'))

    # Get the pixel values for all the slices and replace -2000 with 0
    ct_scan[ct_scan < -1000] = -1000
    ct_scan[ct_scan > 1000] = 1000

    for i in index_array:  # Loop from the 3rd slice to the second-to-last slice
        save_path = os.path.join(save_dir, f"{filename_prefix}_{i}")
        os.makedirs(save_path, exist_ok=True)  # Create the directory if it doesn't exist

        # Extract two slices on both sides
        x = np.array([ct_scan[i-2], ct_scan[i-1], ct_scan[i+1], ct_scan[i+2]])
        y = np.array(ct_scan[i])  # Middle slice as the label
        x_mask = np.array([masks[i-2], masks[i-1], masks[i+1], masks[i+2]])
        y_mask = np.array(masks[i])  # Middle slice as the label

        # Save the slices as separate files
        np.save(os.path.join(save_path, "train_mask.npy"), x_mask)
        np.save(os.path.join(save_path, "val_mask.npy"), y_mask)
        np.save(os.path.join(save_path, "train.npy"), x)
        np.save(os.path.join(save_path, "val.npy"), y)

def main():
    read_root = "../Luna16_data/Luna16_img"
    save_root = "../Luna16_data/split4_mask_data_nodule"
    
    df_nodules = pd.read_csv("z4_all_nodule.csv")
    
    # Group by 'UID' and aggregate 'NDX' values into a list
    uid_ndx_dict = df_nodules.groupby('UID')['NDX'].agg(list).to_dict()

    # Now, uid_ndx_dict is a dictionary where each UID is associated with a list of NDX values
    # print(uid_ndx_dict)

    # Create the save directory if it doesn't exist
    os.makedirs(save_root, exist_ok=True)

    sub_directories = os.listdir(read_root)
    # print(sub_directories)

    for sub_dir in sub_directories:
        sub_dir_path = os.path.join(read_root, sub_dir)
        print(f"Processing {sub_dir}...")

        for file_name in os.listdir(sub_dir_path):
            if file_name.endswith(".mhd"):
                if file_name[:-4] in uid_ndx_dict:
                    # print(file_name[:-4])
                    file_path = os.path.join(sub_dir_path, file_name)
                    data = sitk.ReadImage(file_path)
                    ct_scan = sitk.GetArrayFromImage(data)

                    # Extract and save slices
                    split_data(ct_scan, save_root, file_name[:-4], uid_ndx_dict[file_name[:-4]])

if __name__ == "__main__":
    main()

Processing subset0...
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Processing subset1...
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Processing subset2...
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Processing subset3...
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Processing subset4...
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Processing subset5...
Processing subset6...
Failed to reduce all groups to <= 4 Annotations.
Some nodules may be close and must be grouped manually.
Failed