In [1]:
from utils.imports import *

Using TensorFlow backend.


In [2]:
def find_mhd_file(data_path,patient_id):
    for subject_no in range(LUNA_SUBSET_START_INDEX, 15):
        src_dir = data_path + "*_subset" + str(subject_no).zfill(2) + "/"
        for src_path in glob.glob(src_dir + "*.mhd"):
            if patient_id in src_path:
                return src_path
    return None


def normalize(image):
    MIN_BOUND = -1000.0
    MAX_BOUND = 400.0
    image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND)
    image[image > 1] = 1.
    image[image < 0] = 0.
    return image


def process_image(src_path):
    patient_id = ntpath.basename(src_path).replace(".mhd", "")
    dst_dir = pic_path + patient_id + "/"
    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    origin = numpy.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
    direction = numpy.array(itk_img.GetDirection())      # x,y,z  Origin in world coordinates (mm)
    spacing = numpy.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
    rescale = spacing / TARGET_VOXEL_MM
    img_array = helpers.rescale_patient_images(img_array, spacing, TARGET_VOXEL_MM)
    img_list = []
    for i in range(img_array.shape[0]):
        img = img_array[i]
        seg_img, mask = helpers.get_segmented_lungs(img.copy())
        img_list.append(seg_img)
        img = normalize(img)
        cv2.imwrite(dst_dir + "img_" + str(i).rjust(4, '0') + "_i.png", img * 255)
        cv2.imwrite(dst_dir + "img_" + str(i).rjust(4, '0') + "_m.png", mask * 255)


def process_pos_annotations_patient(src_path, label_path,csv_path, patient_id):
    df_node = pandas.read_csv(csv_path + "annotations.csv")
    dst_dir = dst_dir + patient_id + "/"
    itk_img = SimpleITK.ReadImage(src_path)
    img_array = SimpleITK.GetArrayFromImage(itk_img)
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    num_z, height, width = img_array.shape        #heightXwidth constitute the transverse plane
    origin = numpy.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
    spacing = numpy.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
    rescale = spacing / TARGET_VOXEL_MM

    direction = numpy.array(itk_img.GetDirection())      # x,y,z  Origin in world coordinates (mm)
    flip_direction_x = False
    flip_direction_y = False
    if round(direction[0]) == -1:
        origin[0] *= -1
        direction[0] = 1
        flip_direction_x = True
    if round(direction[4]) == -1:
        origin[1] *= -1
        direction[4] = 1
        flip_direction_y = True
    assert abs(sum(direction) - 3) < 0.01
    patient_imgs = helpers.load_patient_images(patient_id, pic_path, "*_i.png")
    pos_annos = []
    df_patient = df_node[df_node["seriesuid"] == patient_id]
    anno_index = 0
    for index, annotation in df_patient.iterrows():
        node_x = annotation["coordX"]
        if flip_direction_x:
            node_x *= -1
        node_y = annotation["coordY"]
        if flip_direction_y:
            node_y *= -1
        node_z = annotation["coordZ"]
        diam_mm = annotation["diameter_mm"]
        center_float = numpy.array([node_x, node_y, node_z])
        center_int = numpy.rint((center_float-origin) / spacing)
        center_float_rescaled = (center_float - origin) / TARGET_VOXEL_MM
        center_float_percent = center_float_rescaled / patient_imgs.swapaxes(0, 2).shape
        diameter_pixels = diam_mm / TARGET_VOXEL_MM
        diameter_percent = diameter_pixels / float(patient_imgs.shape[1])

        pos_annos.append([anno_index, round(center_float_percent[0], 4), round(center_float_percent[1], 4), round(center_float_percent[2], 4), round(diameter_percent, 4), 1])
        anno_index += 1

    df_annos = pandas.DataFrame(pos_annos, columns=["anno_index", "coord_x", "coord_y", "coord_z", "diameter", "malscore"])
    df_annos.to_csv(label_path + patient_id + "_annos_pos.csv", index=False)
    return [patient_id, spacing[0], spacing[1], spacing[2]]



def process_images(delete_existing=False, only_process_patient=None):
    if delete_existing and os.path.exists(pic_path):
        print("Removing old stuff..")
        if os.path.exists(pic_path):
            shutil.rmtree(pic_path)

    for subject_no in range(LUNA_SUBSET_START_INDEX, 15):
        src_dir = data_path + "*_subset" + str(subject_no).zfill(2) + "/"
        src_paths = glob.glob(src_dir + "*.mhd")

        if only_process_patient is None and True:
            pool = multiprocessing.Pool(6)
            pool.map(process_image, src_paths)
        else:
            for src_path in src_paths:
                print(src_path)
                if only_process_patient is not None:
                    if only_process_patient not in src_path:
                        continue
                process_image(src_path)


def process_pos_annotations_patient2():
    candidate_index = 0
    only_patient = None
    for subject_no in range(LUNA_SUBSET_START_INDEX, 15):
        src_dir = data_path  + "*_subset" + str(subject_no).zfill(2) +  "/"
        print(src_dir)
        for src_path in glob.glob(src_dir + "*.mhd"):
            if only_patient is not None and only_patient not in src_path:
                continue
            patient_id = ntpath.basename(src_path).replace(".mhd", "")
            print(candidate_index, " patient: ", patient_id)
            process_pos_annotations_patient(src_path, label_path,csv_path, patient_id)
            candidate_index += 1

In [5]:
data_path = PATH['src_train']
pic_path = PATH['pic_train']
label_path = PATH['label_train']
csv_path = PATH['annotations_train']

#### LKDS-00395，LKDS-00731，LKDS-00983的尺寸为768*768，会报错
#### LKDS-00434 有错，空文件夹，LKDS-00984没文件夹
#### subset06结束的时候会卡住

In [20]:
LUNA_SUBSET_START_INDEX = 0

In [None]:
only_process_patient = None
process_images(delete_existing=False, only_process_patient=only_process_patient)

In [16]:
LUNA_SUBSET_START_INDEX = 14

In [17]:
process_pos_annotations_patient2()

/Volumes/solo/ali/Data/train/*_subset14/
0  patient:  LKDS-00985
Img array:  (310, 512, 512)
Annos:  1
Origin (x,y,z):  [ -1.69500000e+02   5.00000000e-01  -5.15100000e+02]
Spacing (x,y,z):  [ 0.66601598  0.66601598  1.        ]
Rescale:  [ 0.66601598  0.66601598  1.        ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Node org (x,y,z,diam):  (52.99, 199.38, -471.1, 4.16)
Node tra (x,y,z,diam):  (334.0, 299.0, 44.0)
Node sca (x,y,z,diam):  (222.48839613999999, 198.88401880000001, 44.000024400000029)
1  patient:  LKDS-00989
Img array:  (294, 512, 512)
Annos:  1
Origin (x,y,z):  [-184.   -17.   224.4]
Spacing (x,y,z):  [ 0.68359399  0.68359399  1.        ]
Rescale:  [ 0.68359399  0.68359399  1.        ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Node org (x,y,z,diam):  (-91.89, 164.79, 347.4, 10.77)
Node tra (x,y,z,diam):  (135.0, 266.0, 123.0)
Node sca (x,y,z,diam):  (92.1132

## 修改计划
### 1、读取一遍文件自动完成2个处理
### 2、异常文件的调试
### 3、错误自动跳过

In [21]:
data_path = PATH['src_test']
pic_path = PATH['pic_test']
label_path = PATH['label_test']
csv_path = PATH['annotations_test']

#### LKDS-00186

In [22]:
only_process_patient = None
process_images(delete_existing=False, only_process_patient=only_process_patient)

Patient:  LKDS-00063
Patient:  LKDS-00027
Patient:  LKDS-00033
Patient:  LKDS-00056
Patient:  LKDS-00046
Patient:  LKDS-00012
Img array:  (226, 512, 512)
Origin (x,y,z):  [-200.5  -194.    -17.75]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Spacing (x,y,z):  [ 0.75781202  0.75781202  1.25      ]
Rescale:  [ 0.75781202  0.75781202  1.25      ]
Img array:  (293, 512, 512)
Origin (x,y,z):  [-215.5  -195.     25.25]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Spacing (x,y,z):  [ 0.76171899  0.76171899  1.25      ]
Rescale:  [ 0.76171899  0.76171899  1.25      ]
Img array:  (264, 512, 512)
Origin (x,y,z):  [-211.621 -358.121  -99.4  ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Spacing (x,y,z):  [ 0.75781298  0.75781298  1.        ]
Rescale:  [ 0.75781298  0.75781298  1.        ]
Img array:  (409, 512, 512)
Origin (x,y,z):  [-173.157 -308.657 -156.5  ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Spacing (x,y,z):  [ 0.68554699  0.68554699  0.69999999]
Rescale:  [ 0.68

Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Spacing (x,y,z):  [ 0.61914098  0.61914098  1.        ]
Rescale:  [ 0.61914098  0.61914098  1.        ]
Patient:  LKDS-00209
Img array:  (320, 768, 768)
Origin (x,y,z):  [-176.    -4.   475.2]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Spacing (x,y,z):  [ 0.45572901  0.45572901  1.        ]
Img array:  (300, 512, 512)
Rescale:  [ 0.45572901  0.45572901  1.        ]
Origin (x,y,z):  [-173.    -5.  -546.4]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Spacing (x,y,z):  [ 0.68359399  0.68359399  1.        ]
Rescale:  [ 0.68359399  0.68359399  1.        ]
Img array:  (709, 512, 512)
Origin (x,y,z):  [-234.3 -250.    -1.5]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Spacing (x,y,z):  [ 0.97656202  0.97656202  0.625     ]
Rescale:  [ 0.97656202  0.97656202  0.625     ]
Img array:  (817, 512, 512)
Origin (x,y,z):  [-179.2 -186.    60. ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Spacing (x,y,z):  [ 0.72656202  0.7265620

ValueError: bad axis2 argument to swapaxes

In [23]:
process_pos_annotations_patient2()

/Volumes/solo/ali/Data/test/*_subset00/
0  patient:  LKDS-00012
Img array:  (264, 512, 512)
Annos:  6
Origin (x,y,z):  [-211.621 -358.121  -99.4  ]
Spacing (x,y,z):  [ 0.75781298  0.75781298  1.        ]
Rescale:  [ 0.75781298  0.75781298  1.        ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Node org (x,y,z,diam):  (-35.93, -135.24, -87.9, 10.39)
Node tra (x,y,z,diam):  (232.0, 294.0, 12.0)
Node sca (x,y,z,diam):  (175.69398815160002, 222.87769124099998, 11.5)
Node org (x,y,z,diam):  (-83.69, -108.2, -82.63, 10.41)
Node tra (x,y,z,diam):  (169.0, 330.0, 17.0)
Node sca (x,y,z,diam):  (127.92719909540001, 249.92219115799998, 16.766101689999999)
Node org (x,y,z,diam):  (-91.62, -112.62, -58.4, 6.59)
Node tra (x,y,z,diam):  (158.0, 324.0, 41.0)
Node sca (x,y,z,diam):  (120.00007550740001, 245.50015447599998, 41.000000000000007)
Node org (x,y,z,diam):  (-99.05, -196.06, 50.6, 7.97)
Node tra (x,y,z,diam):  (149.0, 214.0, 150.0)
Node 

Img array:  (445, 512, 512)
Annos:  12
Origin (x,y,z):  [-200.   -200.     30.25]
Spacing (x,y,z):  [ 0.78125  0.78125  0.625  ]
Rescale:  [ 0.78125  0.78125  0.625  ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Node org (x,y,z,diam):  (-79.38, 43.0, 82.27, 7.26)
Node tra (x,y,z,diam):  (154.0, 311.0, 83.0)
Node sca (x,y,z,diam):  (120.625, 243.0, 52.023381294999993)
Node org (x,y,z,diam):  (66.0, -19.0, 83.77, 6.59)
Node tra (x,y,z,diam):  (340.0, 232.0, 86.0)
Node sca (x,y,z,diam):  (266.0, 181.0, 53.524055755399999)
Node org (x,y,z,diam):  (-81.0, 45.5, 93.28, 7.26)
Node tra (x,y,z,diam):  (152.0, 314.0, 101.0)
Node sca (x,y,z,diam):  (119.0, 245.5, 63.028327338099999)
Node org (x,y,z,diam):  (78.93, -39.4, 151.51, 13.68)
Node tra (x,y,z,diam):  (357.0, 206.0, 194.0)
Node sca (x,y,z,diam):  (278.93134329999998, 160.6007463, 121.25897601599999)
Node org (x,y,z,diam):  (-77.5, 78.5, 187.32, 6.87)
Node tra (x,y,z,diam):  (157.0, 3

Img array:  (343, 512, 512)
Annos:  12
Origin (x,y,z):  [-195.621 -312.121  495.5  ]
Spacing (x,y,z):  [ 0.75781298  0.75781298  1.        ]
Rescale:  [ 0.75781298  0.75781298  1.        ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Node org (x,y,z,diam):  (-27.23, -135.24, 520.3, 10.48)
Node tra (x,y,z,diam):  (222.0, 233.0, 25.0)
Node sca (x,y,z,diam):  (168.38650725350001, 176.877391596, 24.796019900000033)
Node org (x,y,z,diam):  (69.88, -128.12, 522.5, 7.82)
Node tra (x,y,z,diam):  (350.0, 243.0, 27.0)
Node sca (x,y,z,diam):  (265.50016706010001, 184.00011577799998, 27.0)
Node org (x,y,z,diam):  (125.38, -167.62, 729.5, 6.59)
Node tra (x,y,z,diam):  (424.0, 191.0, 234.0)
Node sca (x,y,z,diam):  (321.00020198200002, 144.50009092399998, 234.0)
Node org (x,y,z,diam):  (105.88, -82.82, 742.57, 9.69)
Node tra (x,y,z,diam):  (398.0, 303.0, 247.0)
Node sca (x,y,z,diam):  (301.50018971200001, 229.29636277969999, 247.07142859999999)
N

Node org (x,y,z,diam):  (-48.99, -140.03, 376.41, 13.73)
Node tra (x,y,z,diam):  (183.0, 251.0, 12.0)
Node sca (x,y,z,diam):  (135.14005270999999, 186.097992691, 6.9069932819999735)
Node org (x,y,z,diam):  (-72.44, -131.91, 379.54, 16.22)
Node tra (x,y,z,diam):  (151.0, 262.0, 17.0)
Node sca (x,y,z,diam):  (111.6918065385, 194.221730866, 10.040499795000017)
Node org (x,y,z,diam):  (79.79, -136.69, 387.27, 20.59)
Node tra (x,y,z,diam):  (357.0, 256.0, 30.0)
Node sca (x,y,z,diam):  (263.917650794, 189.43878887700001, 17.768248814000003)
Node org (x,y,z,diam):  (-44.63, -152.02, 388.54, 11.87)
Node tra (x,y,z,diam):  (188.0, 235.0, 32.0)
Node sca (x,y,z,diam):  (139.49936120390001, 174.11191588200001, 19.038479080999991)
Node org (x,y,z,diam):  (35.31, -101.08, 428.42, 7.19)
Node tra (x,y,z,diam):  (296.0, 304.0, 98.0)
Node sca (x,y,z,diam):  (219.43579138440001, 225.051173371, 58.917484859000012)
Node org (x,y,z,diam):  (61.87, -73.13, 442.4, 6.2)
Node tra (x,y,z,diam):  (332.0, 342.0, 1

Img array:  (286, 512, 512)
Annos:  6
Origin (x,y,z):  [-190.635 -351.635 -181.7  ]
Spacing (x,y,z):  [ 0.73046899  0.73046899  1.        ]
Rescale:  [ 0.73046899  0.73046899  1.        ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Node org (x,y,z,diam):  (-72.71, -222.05, -49.7, 10.23)
Node tra (x,y,z,diam):  (161.0, 177.0, 132.0)
Node sca (x,y,z,diam):  (117.92860989079999, 129.58039939400001, 132.0)
Node org (x,y,z,diam):  (-77.02, -233.12, -47.83, 7.66)
Node tra (x,y,z,diam):  (156.0, 162.0, 134.0)
Node sca (x,y,z,diam):  (113.61705838359998, 118.51067698099999, 133.87234039999998)
Node org (x,y,z,diam):  (96.88, -194.79, 9.3, 12.87)
Node tra (x,y,z,diam):  (394.0, 215.0, 191.0)
Node sca (x,y,z,diam):  (287.51130464109997, 156.847584794, 191.0)
Node org (x,y,z,diam):  (39.22, -110.24, 46.58, 11.5)
Node tra (x,y,z,diam):  (315.0, 330.0, 228.0)
Node sca (x,y,z,diam):  (229.8507651212, 241.39531088899997, 228.27979919999999)
Node

Img array:  (265, 512, 512)
Annos:  4
Origin (x,y,z):  [-133.711 -297.211 -105.3  ]
Spacing (x,y,z):  [ 0.578125  0.578125  1.      ]
Rescale:  [ 0.578125  0.578125  1.      ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Node org (x,y,z,diam):  (71.83, -123.5, -83.62, 9.37)
Node tra (x,y,z,diam):  (356.0, 300.0, 22.0)
Node sca (x,y,z,diam):  (205.5382831, 173.71229700000001, 21.679814390000004)
Node org (x,y,z,diam):  (67.16, -131.84, -80.3, 7.26)
Node tra (x,y,z,diam):  (347.0, 286.0, 25.0)
Node sca (x,y,z,diam):  (200.875, 165.375, 25.0)
Node org (x,y,z,diam):  (-65.71, -167.21, 38.7, 6.2)
Node tra (x,y,z,diam):  (118.0, 225.0, 144.0)
Node sca (x,y,z,diam):  (68.000000000000014, 130.00000000000003, 144.0)
Node org (x,y,z,diam):  (85.85, -118.67, 106.43, 13.39)
Node tra (x,y,z,diam):  (380.0, 309.0, 212.0)
Node sca (x,y,z,diam):  (219.56130570000002, 178.53662420000001, 211.73089170000003)
23  patient:  LKDS-00155
Img array:  (345

Img array:  (300, 512, 512)
Annos:  9
Origin (x,y,z):  [-163.   -29.   149.7]
Spacing (x,y,z):  [ 0.68359399  0.68359399  1.        ]
Rescale:  [ 0.68359399  0.68359399  1.        ]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Direction:  [ 1.  0.  0.  0.  1.  0.  0.  0.  1.]
Node org (x,y,z,diam):  (-51.12, 202.63, 197.7, 7.26)
Node tra (x,y,z,diam):  (164.0, 339.0, 48.0)
Node sca (x,y,z,diam):  (111.8750390189, 231.625080784, 48.0)
Node org (x,y,z,diam):  (57.0, 114.0, 218.7, 6.2)
Node tra (x,y,z,diam):  (322.0, 209.0, 69.0)
Node sca (x,y,z,diam):  (220.0000767299, 143.00004987400001, 69.0)
Node org (x,y,z,diam):  (-20.08, 108.98, 222.97, 9.05)
Node tra (x,y,z,diam):  (209.0, 202.0, 73.0)
Node sca (x,y,z,diam):  (142.92015294660001, 137.97942952299999, 73.268041240000002)
Node org (x,y,z,diam):  (-39.96, 208.52, 259.75, 8.61)
Node tra (x,y,z,diam):  (180.0, 347.0, 110.0)
Node sca (x,y,z,diam):  (123.03896511260001, 237.51505293900001, 110.05089820000001)
Node org (x,y,z,diam):  

ValueError: need at least one array to concatenate