In [1]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import numpy as np
from skimage import morphology
from skimage import measure
from sklearn.cluster import KMeans
from skimage.transform import resize
from glob import glob
import matplotlib.pyplot as plt

In [3]:
main_path = 'gdrive/My Drive/luna/'
working_path = 'gdrive/My Drive/luna/output/'


In [4]:

file_list = glob(working_path + 'masks_*.npy')
out_images = []  # final set of images
out_nodemasks = []  # final set of nodemasks
for fname in file_list:
    print("working on file ", fname)
    imgs_to_process = np.load(fname.replace("lungmask", "images"))
    masks = np.load(fname)
    node_masks = np.load(fname.replace("lungmask", "masks"))
    for i in range(len(imgs_to_process)):
        mask = masks[i]
        node_mask = node_masks[i]
        img = imgs_to_process[i]
        new_size = [512, 512]  # we're scaling back up to the original size of the image
        img = mask * img  # apply lung mask
        #
        # renormalizing the masked image (in the mask region)
        #
        new_mean = np.mean(img[mask > 0])
        new_std = np.std(img[mask > 0])
        #
        #  Pulling the background color up to the lower end
        #  of the pixel range for the lungs
        #
        old_min = np.min(img)  # background color
        img[img == old_min] = new_mean - 1.2 * new_std  # resetting backgound color
        img = img - new_mean
        img = img / new_std
        # make image bounding box  (min row, min col, max row, max col)
        labels = measure.label(mask)
        regions = measure.regionprops(labels)
        #
        # Finding the global min and max row over all regions
        #
        min_row = 512
        max_row = 0
        min_col = 512
        max_col = 0
        for prop in regions:
            B = prop.bbox
            if min_row > B[0]:
                min_row = B[0]
            if min_col > B[1]:
                min_col = B[1]
            if max_row < B[2]:
                max_row = B[2]
            if max_col < B[3]:
                max_col = B[3]
        width = max_col - min_col
        height = max_row - min_row
        if width > height:
            max_row = min_row + width
        else:
            max_col = min_col + height
        #
        # cropping the image down to the bounding box for all regions
        # (there's probably an skimage command that can do this in one line)
        #
        img = img[min_row:max_row, min_col:max_col]
        mask = mask[min_row:max_row, min_col:max_col]
        if max_row - min_row < 5 or max_col - min_col < 5:  # skipping all images with no god regions
            pass
        else:
            # moving range to -1 to 1 to accomodate the resize function
            mean = np.mean(img)
            img = img - mean
            min = np.min(img)
            max = np.max(img)
            img = img / (max - min)
            new_img = resize(img, [512, 512])
            new_node_mask = resize(node_mask[min_row:max_row, min_col:max_col], [512, 512])
            out_images.append(new_img)
            out_nodemasks.append(new_node_mask)

num_images = len(out_images)
#
#  Writing out images and masks as 1 channel arrays for input into network
#
final_images = np.ndarray([num_images, 1, 512, 512], dtype=np.float32)
final_masks = np.ndarray([num_images, 1, 512, 512], dtype=np.float32)
for i in range(num_images):
    final_images[i, 0] = out_images[i]
    final_masks[i, 0] = out_nodemasks[i]

rand_i = np.random.choice(range(num_images), size=num_images, replace=False)
test_i = int(0.1*num_images)

np.save(main_path+"trainImages.npy",final_images[rand_i[test_i:]])
np.save(main_path+"trainMasks.npy",final_masks[rand_i[test_i:]])

np.save(main_path+"testImages.npy",final_images[rand_i[:test_i]])
np.save(main_path+"testMasks.npy",final_masks[rand_i[:test_i]])
print('process done...')

working on file  gdrive/My Drive/luna/output/masks_0002_0025.npy
working on file  gdrive/My Drive/luna/output/masks_0001_0023.npy
working on file  gdrive/My Drive/luna/output/masks_0002_0026.npy
working on file  gdrive/My Drive/luna/output/masks_0003_0028.npy
working on file  gdrive/My Drive/luna/output/masks_0005_0086.npy
working on file  gdrive/My Drive/luna/output/masks_0007_0098.npy
working on file  gdrive/My Drive/luna/output/masks_0008_0103.npy
working on file  gdrive/My Drive/luna/output/masks_0010_0117.npy
working on file  gdrive/My Drive/luna/output/masks_0009_0110.npy
working on file  gdrive/My Drive/luna/output/masks_0011_0132.npy
working on file  gdrive/My Drive/luna/output/masks_0011_0130.npy
working on file  gdrive/My Drive/luna/output/masks_0011_0131.npy
working on file  gdrive/My Drive/luna/output/masks_0011_0129.npy
working on file  gdrive/My Drive/luna/output/masks_0012_0135.npy
working on file  gdrive/My Drive/luna/output/masks_0012_0136.npy
working on file  gdrive/M

In [5]:

imgs_train = np.load(main_path + "trainImages.npy").astype(np.float32)
imgs_mask_train = np.load(main_path + "trainMasks.npy").astype(np.float32)

In [6]:
imgs_train.shape

(144, 1, 512, 512)