In [56]:
import os 
import glob 
import csv 

import SimpleITK as sitk

HEADER = ["id", "x", "y", "z", "ow", "ox", "oy", "oz", "vis", "sel", "lock", "label", "desc", "associatedNodeID"]

In [57]:
BASE_PATH = "/data/shastra1/Data_zara/SC_Zara"
all_files = glob.glob(os.path.join(BASE_PATH, "*.fcsv"))
print(all_files)

['/data/shastra1/Data_zara/SC_Zara/SC_001.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_032.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_002.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_038.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_003.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_005.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_006.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_007.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_039.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_008.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_009.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_010.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_011.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_040.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_012.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_013.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_041.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_014.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_015.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_016.fcsv', '/data/shastra1/Data_zara/SC_Zara/SC_04

In [59]:
def get_image_path(csv_file_path):
    if " " not in csv_file_path:
        return csv_file_path.replace(".fcsv", " .nrrd")
    return f"{csv_file_path.split(' ')[0]} .nrrd"

def get_seg_file_path(csv_file_path):
    if " " not in csv_file_path:
        return csv_file_path.replace(".fcsv", "_segment.nrrd")
    base_path, idx = csv_file_path.replace(".fcsv", "").split(" ")
    seg_idx = str(int(idx) + 1)
    return f"{base_path}_segment{seg_idx}.nrrd"

sc_nodules = []
for file_path in all_files:
    nodule_id = os.path.basename(file_path)
    # print(nodule_id)
    fiducial = csv.DictReader(filter(lambda row: row[0]!='#', open(file_path)), fieldnames=HEADER)
    fiducial = next(fiducial)
    nodule_dict = {
        k: fiducial[k] for k in ["x", "y", "z"]
    }
    nodule_dict["nodule_id"] = nodule_id.replace(".fcsv", "").replace(" ", "_")
    nodule_dict["image_file"] = get_image_path(file_path)
    if "SC_028" in nodule_dict["image_file"] or "SC_128" in nodule_dict["image_file"]:
        nodule_dict["image_file"] = nodule_dict["image_file"].replace(" ", "")
    nodule_dict["seg_file"] = get_seg_file_path(file_path)
    nodule_dict["label"] = 1
    sc_nodules.append(nodule_dict)


In [60]:
BASE_PATH = "/data/shastra1/Data_zara/NSCLC_Zara/"
all_files = glob.glob(os.path.join(BASE_PATH, "*segment*nrrd"))

def get_center_from_seg(seg_file_path):
    segmentation = sitk.ReadImage(seg_file_path)
    binary_mask = sitk.BinaryThreshold(segmentation, lowerThreshold=1, upperThreshold=1) 
    label_shape_filter = sitk.LabelShapeStatisticsImageFilter()
    label_shape_filter.Execute(binary_mask)
    center_of_mass = label_shape_filter.GetCentroid(1)
    
    return center_of_mass

def get_nodule_id(seg_file_path):
    pre, suff = os.path.basename(seg_file_path).split("segment")
    return pre + suff[0]


nsclc_nodules = []

for seg_file_path in all_files:
    x, y, z = get_center_from_seg(seg_file_path)
    nodule_dict = {
        "x": x,
        "y": y,
        "z": z,
        "label": 0,
        "seg_file": seg_file_path,
        "image_file": "_".join(seg_file_path.split("_")[:-1]) + ".nrrd",
        "nodule_id": get_nodule_id(seg_file_path),
    }
    nsclc_nodules.append(nodule_dict)


In [61]:
all_nodules = sc_nodules + nsclc_nodules

In [62]:
len(all_nodules)
with open("/data/kaplinsp/zara_dataset.csv", "w") as fp:
    writer = csv.DictWriter(fp, fieldnames=all_nodules[0].keys())
    writer.writeheader()
    writer.writerows(all_nodules)