# LUNA Lung Segmentation ROI


# Dependency Descriptions
1. **skimage.morphology**: Morphological operations, e.g., opening or skeletonization.
2. **skimage.measure**: Measurement of image properties, e.g., similarity and contours.
3. **sklearn.cluster.KMeans**: clusters data by trying to separate samples in n groups of equal variance, minimizing a criterion known as the inertia or within-cluster sum-of-squares
4. **skimage.transform.resize**: Geometric and other transforms, e.g., rotation or the Radon transform.

In [1]:
import numpy as np
from skimage import morphology
from skimage import measure
from sklearn.cluster import KMeans
from skimage.transform import resize
from glob import glob

In [2]:
WORKING_PATH = "../../../../output/build-simple-model/"
FILE_LIST = glob(WORKING_PATH + "images_*.npy")

In [3]:
# run the program through a single set of images for demonstration
for img_file in FILE_LIST:
    print(img_file)

../../../../output/build-simple-model\images_0001_0023.npy


In [4]:
    imgs_to_process = np.load(img_file).astype(np.float64) # load the image as numpy array in float64
    print("on image", img_file)
    print(imgs_to_process)

on image ../../../../output/build-simple-model\images_0001_0023.npy
[[[-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  ..., 
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]]

 [[-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  ..., 
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]]

 [[-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  ..., 
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -2048. -2048.]
  [-2048. -2048. -2048. ..., -2048. -

In [5]:
    for i in range(len(imgs_to_process)):
        print(i) # only 3 images in the numpy arrray (for the 3 layers)

0
1
2


In [6]:
i = 0 # for demonstration, the next parts will be done with i = 0 (only one image)
img = imgs_to_process[i] # this is the img (a two dimensional numpy array)
#Standardize the pixel values
mean = np.mean(img)
std = np.std(img)
img = img-mean
img = img/std # to standardize values you subtract mean and divide by standard deviation
# note: A standardized variable is a variable that has been rescaled to have a mean of zero and a standard deviation of one

In [7]:
# Find the average pixel value near the lungs
# to renormalize washed out images
middle = img[100:400,100:400] # grabs the all pixels excluding pixels in a 100px border of the image 
mean = np.mean(middle)  # new mean which is the mean of pixels in the middle
max = np.max(img) # max values in the current standardized pixel spectrum
min = np.min(img) # min values in the current standardized pixel spectrum
# To improve threshold finding, I'm moving the 
# underflow and overflow on the pixel spectrum
img[img==max]=mean 
img[img==min]=mean # take all max/min values, and set them to mean values for sake of thresholding (so that the extreme outliers do not screw up the means)

In [8]:
# Using Kmeans to separate foreground (radio-opaque tissue)
# and background (radio transparent tissue ie lungs)
# Doing this only on the center of the image to avoid 
# the non-tissue parts of the image as much as possible

# kmeans algorithm: finds clusters in data 
# first initialize 2 centroids, then loop through these two steps until convergence:
# assign all points to one of the two centroids, find mean of points, relocate centroid
kmeans = KMeans(n_clusters=2).fit(np.reshape(middle,[np.prod(middle.shape),1])) # perform kmeans on the values in the middle
centers = sorted(kmeans.cluster_centers_.flatten())
threshold = np.mean(centers)
thresh_img = np.where(img<threshold,1.0,0.0)  # threshold the image
print(middle.shape)
print(np.prod(middle.shape)) # multiply elements
print(np.reshape(middle,[np.prod(middle.shape),1])) # Gives a new shape to an array without changing its data (now it is just a long list of numbers)
print(kmeans) # at this point kmeans has been performed on the middle array
print(kmeans.cluster_centers_) # each centroid is defined by a single value as pixel values are singlular values
print(kmeans.cluster_centers_.flatten())
print(sorted(kmeans.cluster_centers_.flatten())) # this will give you the two centroids which represent the two clusters of pixels in the image
print(np.mean(centers)) # averaging the centroids: gives you a dividing threshold that distinguishes the two clusters of pixels
print(np.where(img<threshold,1.0,0.0)) # where the image is less than the threshold make them 1, and greater make them 0 (thus segmenting lungs as 1, nonlungs as 0)

(300, 300)
90000
[[-0.25934945]
 [-0.25557844]
 [-0.26060646]
 ..., 
 [ 0.98885633]
 [ 1.01776744]
 [ 0.91846406]]
KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=2, n_init=10, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)
[[ 1.05730228]
 [-0.01335521]]
[ 1.05730228 -0.01335521]
[-0.013355206424837673, 1.0573022779300885]
0.521973535753
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
