In [1]:
import settings
import helpers
import SimpleITK  # conda install -c https://conda.anaconda.org/simpleitk SimpleITK
import numpy
import pandas
import ntpath
import cv2  # conda install -c https://conda.anaconda.org/menpo opencv3
import shutil
import random
import math
import multiprocessing
from bs4 import BeautifulSoup #  conda install beautifulsoup4, coda install lxml
import os
import glob



('User: ', 'mahui')


In [5]:
def find_mhd_file(patient_id):
    for subject_no in range(settings.LUNA_SUBSET_START_INDEX, 15):
        src_dir = settings.LUNA16_RAW_SRC_DIR + "train_subset" + str(subject_no).zfill(2) + "/"
        for src_path in glob.glob(src_dir + "*.mhd"):
            if patient_id in src_path:
                return src_path
    return None



def normalize(image):
    MIN_BOUND = -1000.0
    MAX_BOUND = 400.0
    image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND)
    image[image > 1] = 1.
    image[image < 0] = 0.
    return image


def process_image(src_path):
    patient_id = ntpath.basename(src_path).replace(".mhd", "")
    print("Patient: ", patient_id)

    dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + patient_id + "/"
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)

    origin = numpy.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
    print("Origin (x,y,z): ", origin)

    direction = numpy.array(itk_img.GetDirection())      # x,y,z  Origin in world coordinates (mm)
    print("Direction: ", direction)


    spacing = numpy.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / settings.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    img_array = helpers.rescale_patient_images(img_array, spacing, settings.TARGET_VOXEL_MM)

    img_list = []
    for i in range(img_array.shape[0]):
        img = img_array[i]
        seg_img, mask = helpers.get_segmented_lungs(img.copy())
        img_list.append(seg_img)
        img = normalize(img)
        cv2.imwrite(dst_dir + "img_" + str(i).rjust(4, '0') + "_i.png", img * 255)
        cv2.imwrite(dst_dir + "img_" + str(i).rjust(4, '0') + "_m.png", mask * 255)

def process_luna_candidates_patient(src_path, patient_id):
    dst_dir = settings.LUNA16_EXTRACTED_IMAGE_DIR + "/_labels/"
    img_dir = dst_dir + patient_id + "/"
    df_pos_annos = pandas.read_csv(dst_dir + patient_id + "_annos_pos_lidc.csv")
    if not os.path.exists(dst_dir):
        os.mkdir(dst_dir)

    

    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    print("Img array: ", img_array.shape)
    print("Pos annos: ", len(df_pos_annos))

    num_z, height, width = img_array.shape        #heightXwidth constitute the transverse plane
    origin = numpy.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
    print("Origin (x,y,z): ", origin)
    spacing = numpy.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
    print("Spacing (x,y,z): ", spacing)
    rescale = spacing / settings.TARGET_VOXEL_MM
    print("Rescale: ", rescale)

    direction = numpy.array(itk_img.GetDirection())      # x,y,z  Origin in world coordinates (mm)
    print("Direction: ", direction)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
        print("Swappint x origin")
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
        print("Swappint y origin")
    print("Direction: ", direction)
    assert abs(sum(direction) - 3) < 0.01

    src_df = pandas.read_csv("resources/luna16_annotations/" + "candidates_V2.csv")
    src_df = src_df[src_df["seriesuid"] == patient_id]
    src_df = src_df[src_df["class"] == 0]
    patient_imgs = helpers.load_patient_images(patient_id, settings.LUNA16_EXTRACTED_IMAGE_DIR, "*_i.png")
    candidate_list = []

    for df_index, candiate_row in src_df.iterrows():
        node_x = candiate_row["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = candiate_row["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = candiate_row["coordZ"]
        candidate_diameter = 6
        # print("Node org (x,y,z,diam): ", (round(node_x, 2), round(node_y, 2), round(node_z, 2), round(candidate_diameter, 2)))
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float-origin) / spacing)
        # center_int = numpy.rint((center_float - origin) )
        # print("Node tra (x,y,z,diam): ", (center_int[0], center_int[1], center_int[2]))
        # center_int_rescaled = numpy.rint(((center_float-origin) / spacing) * rescale)
        center_float_rescaled = (center_float - origin) / settings.TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(0, 2).shape
        # center_int = numpy.rint((center_float - origin) )
        # print("Node sca (x,y,z,diam): ", (center_float_rescaled[0], center_float_rescaled[1], center_float_rescaled[2]))
        coord_x = center_float_rescaled[0]
        coord_y = center_float_rescaled[1]
        coord_z = center_float_rescaled[2]

        ok = True

        for index, row in df_pos_annos.iterrows():
            pos_coord_x = row["coord_x"] * patient_imgs.shape[2]
            pos_coord_y = row["coord_y"] * patient_imgs.shape[1]
            pos_coord_z = row["coord_z"] * patient_imgs.shape[0]
            diameter = row["diameter"] * patient_imgs.shape[2]
            dist = math.sqrt(math.pow(pos_coord_x - coord_x, 2) + math.pow(pos_coord_y - coord_y, 2) + math.pow(pos_coord_z - coord_z, 2))
            if dist < (diameter + 64):  #  make sure we have a big margin
                ok = False
                print("################### Too close", (coord_x, coord_y, coord_z))
                break

        if pos_annos_manual is not None and ok:
            for index, row in pos_annos_manual.iterrows():
                pos_coord_x = row["x"] * patient_imgs.shape[2]
                pos_coord_y = row["y"] * patient_imgs.shape[1]
                pos_coord_z = row["z"] * patient_imgs.shape[0]
                diameter = row["d"] * patient_imgs.shape[2]
                print((pos_coord_x, pos_coord_y, pos_coord_z))
                print(center_float_rescaled)
                dist = math.sqrt(math.pow(pos_coord_x - center_float_rescaled[0], 2) + math.pow(pos_coord_y - center_float_rescaled[1], 2) + math.pow(pos_coord_z - center_float_rescaled[2], 2))
                if dist < (diameter + 72):  #  make sure we have a big margin
                    ok = False
                    print("################### Too close", center_float_rescaled)
                    break

        if not ok:
            continue

        candidate_list.append([len(candidate_list), round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(candidate_diameter / patient_imgs.shape[0], 4), 0])

    df_candidates = pandas.DataFrame(candidate_list, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"])
    df_candidates.to_csv(dst_dir + patient_id + "_candidates_luna.csv", index=False)


def process_images(delete_existing=False, only_process_patient=None):
    if delete_existing and os.path.exists(settings.LUNA16_EXTRACTED_IMAGE_DIR):
        print("Removing old stuff..")
        if os.path.exists(settings.LUNA16_EXTRACTED_IMAGE_DIR):
            shutil.rmtree(settings.LUNA16_EXTRACTED_IMAGE_DIR)

    if not os.path.exists(settings.LUNA16_EXTRACTED_IMAGE_DIR):
        os.mkdir(settings.LUNA16_EXTRACTED_IMAGE_DIR)
        os.mkdir(settings.LUNA16_EXTRACTED_IMAGE_DIR + "_labels/")

    for subject_no in range(settings.LUNA_SUBSET_START_INDEX, 15):
        src_dir = settings.LUNA16_RAW_SRC_DIR  + "train_subset" + str(subject_no).zfill(2) + "/"
        src_paths = glob.glob(src_dir + "*.mhd")

        if only_process_patient is None and True:
            pool = multiprocessing.Pool(6)
            pool.map(process_image, src_paths)
        else:
            for src_path in src_paths:
                print(src_path)
                if only_process_patient is not None:
                    if only_process_patient not in src_path:
                        continue
                process_image(src_path)




def process_luna_candidates_patients(only_patient_id=None):
    for subject_no in range(settings.LUNA_SUBSET_START_INDEX, 15):
        src_dir = settings.LUNA16_RAW_SRC_DIR  + "train_subset" + str(subject_no).zfill(2) + "/"
        for patient_index, src_path in enumerate(glob.glob(src_dir + "*.mhd")):
            # if not "100621383016233746780170740405" in src_path:
            #     continue
            patient_id = ntpath.basename(src_path).replace(".mhd", "")
            if only_patient_id is not None and patient_id != only_patient_id:
                continue
            print("Patient: ", patient_index, " ", patient_id)
            process_luna_candidates_patient(src_path, patient_id)

In [None]:
process_images()

('Patient: ', 'LKDS-00001')
('Patient: ', 'LKDS-00013')
('Patient: ', 'LKDS-00004')
('Patient: ', 'LKDS-00007')
('Patient: ', 'LKDS-00016')
('Patient: ', 'LKDS-00020')
('Img array: ', (221, 512, 512))
('Origin (x,y,z): ', array([-170.5, -170. ,   -5. ]))
('Direction: ', array([ 1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.]))
('Spacing (x,y,z): ', array([ 0.66406202,  0.66406202,  1.25      ]))
('Rescale: ', array([ 0.66406202,  0.66406202,  1.25      ]))
('Img array: ', (281, 512, 512))
('Origin (x,y,z): ', array([-207.5, -206.5,   49.5]))
('Direction: ', array([ 1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.]))
('Spacing (x,y,z): ', array([ 0.80664098,  0.80664098,  1.25      ]))
('Rescale: ', array([ 0.80664098,  0.80664098,  1.25      ]))
('Img array: ', (325, 512, 512))
('Origin (x,y,z): ', array([-174.,  -12.,   23.]))
('Direction: ', array([ 1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.]))
('Spacing (x,y,z): ', array([ 0.68359399,  0.68359399,  1.        ]))
('Rescale: ', array([ 0.683593

('Rescale: ', array([ 0.68359399,  0.68359399,  1.        ]))
('Patient: ', 'LKDS-00051')
('Img array: ', (336, 512, 512))
('Origin (x,y,z): ', array([-171. ,  -10. ,  299.2]))
('Direction: ', array([ 1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.]))
('Spacing (x,y,z): ', array([ 0.68359399,  0.68359399,  1.        ]))
('Rescale: ', array([ 0.68359399,  0.68359399,  1.        ]))
('Img array: ', (461, 512, 512))
('Origin (x,y,z): ', array([-164., -180.,   37.]))
('Direction: ', array([ 1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.]))
('Spacing (x,y,z): ', array([ 0.703125,  0.703125,  0.625   ]))
('Rescale: ', array([ 0.703125,  0.703125,  0.625   ]))
('Patient: ', 'LKDS-00053')
('Img array: ', (291, 512, 512))
('Origin (x,y,z): ', array([-178. ,  -25. ,  261.8]))
('Direction: ', array([ 1.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  1.]))
('Spacing (x,y,z): ', array([ 0.68359399,  0.68359399,  1.        ]))
('Rescale: ', array([ 0.68359399,  0.68359399,  1.        ]))
('Patient: ', 'LKDS-00042')
('

Process PoolWorker-63:
Process PoolWorker-64:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/mahui/anaconda/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
  File "/Users/mahui/anaconda/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
    self.run()
  File "/Users/mahui/anaconda/lib/python2.7/multiprocessing/process.py", line 114, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/mahui/anaconda/lib/python2.7/multiprocessing/process.py", line 114, in run
  File "/Users/mahui/anaconda/lib/python2.7/multiprocessing/pool.py", line 102, in worker
    task = get()
    self._target(*self._args, **self._kwargs)
Process PoolWorker-62:
  File "/Users/mahui/anaconda/lib/python2.7/multiprocessing/queues.py", line 376, in get
  File "/Users/mahui/anaconda/lib/python2.7/multiprocessing/pool.py", line 102, in worker
Traceback (most recent call last):
    racquire()
  File "/Users/mahui/anaconda

In [None]:
process_luna_candidates_patients(only_patient_id=None)

