In [1]:
from utils import *
from path import *

In [2]:
working_path = PATH['output_train']
file_list=glob(working_path+"images_*.npy")

In [3]:
for img_file in tqdm(file_list):    
    imgs_to_process = np.load(img_file).astype(np.float64)

    for i in range(len(imgs_to_process)):
        img = imgs_to_process[i]
        mean = np.mean(img)
        std = np.std(img)
        img = img - mean
        img = img / std
        middle = img[100:400, 100:400]
        mean = np.mean(middle)
        max_im = np.max(img)
        min_im = np.min(img)

        img[img == max_im] = mean
        img[img == min_im] = mean

        kmeans = KMeans(n_clusters=2).fit(np.reshape(middle, [np.prod(middle.shape), 1]))
        centers = sorted(kmeans.cluster_centers_.flatten())
        threshold = np.mean(centers)
        thresh_img = np.where(img < threshold, 1.0, 0.0)  # threshold the image

        eroded = morphology.erosion(thresh_img, np.ones([4, 4]))
        dilation = morphology.dilation(eroded, np.ones([10, 10]))

        labels = measure.label(dilation)
        label_vals = np.unique(labels)
        regions = measure.regionprops(labels)
        good_labels = []
        for prop in regions:
            B = prop.bbox
            if B[2] - B[0] < 475 and B[3] - B[1] < 475 and B[0] > 40 and B[2] < 472:
                good_labels.append(prop.label)
        mask = np.ndarray([img.shape[0], img.shape[1]], dtype=np.int8)
        mask[:] = 0
        for N in good_labels:
            mask = mask + np.where(labels == N, 1, 0)
        mask = morphology.dilation(mask, np.ones([10, 10]))  # one last dilation
        imgs_to_process[i] = mask
    np.save(img_file.replace("images", "lungmask"), imgs_to_process)

file_list = glob(working_path + "lungmask_*.npy")
out_images = []  # final set of images
out_nodemasks = []  # final set of nodemasks
for fname in tqdm(file_list):
    #print "working on file ", fname
    imgs_to_process = np.load(fname.replace("lungmask", "images"))
    masks = np.load(fname)
    node_masks = np.load(fname.replace("lungmask", "masks"))
    for i in range(len(imgs_to_process)):
        mask = masks[i]
        node_mask = node_masks[i]
        img = imgs_to_process[i]
        new_size = [512, 512]  
        img = mask * img  

        new_mean = np.mean(img[mask > 0])
        new_std = np.std(img[mask > 0])

        labels = measure.label(mask)
        regions = measure.regionprops(labels)
        min_row = 512
        max_row = 0
        min_col = 512
        max_col = 0
        for prop in regions:
            B = prop.bbox
            if min_row > B[0]:
                min_row = B[0]
            if min_col > B[1]:
                min_col = B[1]
            if max_row < B[2]:
                max_row = B[2]
            if max_col < B[3]:
                max_col = B[3]
        width = max_col - min_col
        height = max_row - min_row
        if width > height:
            max_row = min_row + width
        else:
            max_col = min_col + height

        img = img[min_row:max_row, min_col:max_col]
        mask = mask[min_row:max_row, min_col:max_col]
        if max_row - min_row < 5 or max_col - min_col < 5:  
            pass
        else:
            mean = np.mean(img)
            img = img - mean
            min = np.min(img)
            max = np.max(img)
            img = img / (max - min)
            new_img = resize(img, [512, 512])
            new_node_mask = resize(node_mask[min_row:max_row, min_col:max_col], [512, 512])
            out_images.append(new_img)
            out_nodemasks.append(new_node_mask)

num_images = len(out_images)
final_images = np.ndarray([num_images, 1, 512, 512], dtype=np.float32)
final_masks = np.ndarray([num_images, 1, 512, 512], dtype=np.float32)
for i in range(num_images):
    final_images[i, 0] = out_images[i]
    final_masks[i, 0] = out_nodemasks[i]

np.save(working_path + "trainImages.npy", final_images)
np.save(working_path + "trainMasks.npy", final_masks)

100%|██████████| 975/975 [12:44<00:00,  1.33it/s]
  warn("The default mode, 'constant', will be changed to 'reflect' in "
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 975/975 [02:18<00:00,  7.76it/s]
