# Import Statements

In [28]:
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.ndimage import median_filter
import cv2 as cv


# import images
image_dir = 'Input Images/'
export_dir = 'Output Images/'
cat_dir = 'Category Images/'

# Analysis Modules

In [2]:
class Cat_Generator:
    
    def __init__(self, cat_dir): 
        self.cat_dict = self.cat_generator(cat_dir)
        
    #takes a dictionary key corresponding to an image and returns a tuple with the average RGB values in the image
    def avg_rgb(self, cat_image):
        r = np.median(cat_image[:,:,0].ravel()).squeeze()
        g = np.median(cat_image[:,:,1].ravel()).squeeze()
        b = np.median(cat_image[:,:,2].ravel()).squeeze()
    
        return (r,g,b)
    
    #takes a directory containing category images and returns key:value pairs for each category/image
    def cat_generator(self, cat_dir):
        cat_dict = {}
        color_placeholder = (0,0,0)
        for file in os.listdir(cat_dir): 
            cat_image = np.asarray(Image.open(cat_dir + file))
            cat_rgb = self.avg_rgb(cat_image)
            cat_dict[os.path.basename(file)] = [cat_rgb, color_placeholder]
        
        return cat_dict
    
    #allows a user to set the color replacement rgb values associated with each category
    def cat_color(self, cat_key, rgb):
        self.cat_dict[cat_key][1] = rgb
        
    #allow access to the dictionary
    def d(self):
        return self.cat_dict

In [29]:
class Histology_CAT: 
    
    def __init__(self, image_dir, export_dir, cat_dict = None):
        
        self.export_dir = export_dir #directory where processed images should be deposited
        self.cat_dict = cat_dict #dictionary where each key:value pair corresponds to a classification category
        
        
        #generate dictionary of images from the directory of images
        self.image_dict = {}
        for file in os.listdir(image_dir): 
            self.image_dict[os.path.basename(file)] = np.asarray(Image.open(image_dir + file))
            self.image_key = os.path.basename(file) #filename of image currently being processed (gets reassigned during analysis)
            
        #generate indexed list of keys from category dictionary (cat_dict)
        if cat_dict is not None:
            self.cat_keys = []
            for key in self.cat_dict:
                self.cat_keys.append(key)
            
        self.tissue_rgb = None
        self.tissue_mask = None
        self.implant_rgb = None
        self.implant_mask = None
        
            
     
    #process all images in the image directory
    def kowalski(self, krnl_sz):
    
        for image_key in self.image_dict:
            self.image_key = image_key
            
            #kmeans clustering to separate into tissue and non-tissue clusters
            kmeans_arr, centers = self.kmeans_quant(self.image_dict[image_key], n_cluster = 2) 
            self.tissue_rgb = centers[np.argmin(np.sum(centers, axis = 1))] #identify RGB color of the tissue (the darker color)
            print(self.tissue_rgb, self.tissue_rgb.astype('uint8'))
            print(np.unique(kmeans_arr))
            
            #median filtering for smoothing 
            medfilt_arr = self.med_filter(kmeans_arr, krnl_sz)
            print(np.unique(medfilt_arr))
            self.tissue_mask = self.cat_mask(medfilt_arr, self.tissue_rgb)
            print(np.sum(self.tissue_mask))
            
            #overwrite tissue pixels with rgb (255, 255, 255) to match empty space
            notissue_arr = self.img_overwrite(self.image_dict[image_key], self.tissue_mask, (255,255,255))
            
            #kmeans clustering to separate overwritten image into empty space and implant cluster
            kmeans_arr, centers = self.kmeans_quant(notissue_arr, n_cluster = 2)
            self.implant_rgb = centers[np.argmin(np.sum(centers, axis = 1))] #identify RGB color of the implant (the darker color)
            
            #median filtering for smoothing 
            medfilt_arr = self.med_filter(kmeans_arr, krnl_sz + 2)
            self.implant_mask = self.cat_mask(medfilt_arr, self.implant_rgb)
            
            #assemble final image by overwriting an all-white array with tissue and implant pixels
            white_img = np.ones(self.image_dict[image_key].shape)*255
            tissue_overwrite = self.img_overwrite(white_img, self.tissue_mask, (120,81,169)) #set tissue to purple
            implant_overwrite = self.img_overwrite(tissue_overwrite, self.implant_mask, (0,0,0)) #set implant to black
     
        return "All done, Skipper!"
    
    
    # Scatter-plot visualization in RGB space of the input image distribution
    def visualize_dist(self, image_key):
        r = self.image_dict[image_key][:,:,0].ravel()
        g = self.image_dict[image_key][:,:,1].ravel()
        b = self.image_dict[image_key][:,:,2].ravel()
        
        fig = plt.figure(figsize = (20,10))
        ax = fig.add_subplot(projection='3d')
        ax.scatter(r,g,b, marker = '.')
        plt.show()
       
    
    #perform K Means clustering to quantize RGB colors in the image 
    def kmeans_quant(self, img_array, n_cluster):
        
        flat_arr = img_array.reshape((-1,3))
        clustering = KMeans(n_clusters=n_cluster, random_state=42).fit(flat_arr)
        labels = clustering.labels_
        centers = clustering.cluster_centers_ 
        quantized_arr = centers[labels].reshape(img_array.shape).astype('uint8')
       
        #Save image after application of K Means clustering quantization
        quantized_img = Image.fromarray(quantized_arr, mode = 'RGB') 
        quantized_img.save(self.export_dir + str(n_cluster) + " quanta "+ self.image_key, "JPEG")
    
        return quantized_arr, centers
    
    
    #perform median filtering to smooth local blemishes while keeping edges
    def med_filter(self, image_arr, krnl_sz):
        filtered_arr = cv.medianBlur(image_arr, krnl_sz)
        #filtered_arr = median_filter(image_arr, size = (krnl_sz, krnl_sz, 3))
        
        #Save image after application of median filtering
        filtered_img = Image.fromarray(filtered_arr, mode = 'RGB') 
        filtered_img.save(self.export_dir + str(krnl_sz) + " pixel median filter "+ self.image_key, "JPEG")
        
        return filtered_arr
    
    
    #given a pixel's category color (derived from K-means cluster center rgb), return boolean list of all corresponding pixels
    def cat_mask(self, image_arr, cat_color):
    
        bool_array = (image_arr == cat_color) #each data element is checked against cat_color
        bool_array = np.sum(bool_array, axis = 2) #sum boolean array of rgb values at each pixel
        bool_array = (bool_array == 3) # if r+g+b matches, the above sum is 3 and this pixel should be set to True
        
        return bool_array.ravel()
    
    
    def img_overwrite(self, image_arr, label_array, rgb_color):
    
        overwrite_arr = image_arr.copy() #to prevent 'assignment destination is read-only' error
        overwrite_arr.reshape(-1,3)[label_array] = [rgb_color]
        
        #Save image after application of naive categorical recoloring
        overwrite_img = Image.fromarray(overwrite_arr, mode = 'RGB') 
        overwrite_img.save(self.export_dir + str(rgb_color) + " overwrite " + self.image_key, "JPEG")
        
        return overwrite_arr
        
        
    #given an image and a dictionary of categories, classify each pixel's category and then replace RGB values to match category RGB
    def naive_recolor(self, image_arr):
        #get stack of RGB differences for each category
        cat_stack = []
        for cat_key in self.cat_dict:
            cat_stack.append(self.diff(image_arr, cat_key))
        cat_stack = np.asarray(cat_stack)
        
        #split stack & recolor each pixel
        recolored_arr = []
        pxl_stack = np.split(cat_stack, np.shape(cat_stack)[1], axis = 1)
        for pxl in pxl_stack:
            cat_index = np.argmin(tuple(pxl.squeeze()))  
            recolored_arr.append(self.cat_dict[self.cat_keys[cat_index]][1])
        recolored_arr = np.asarray(recolored_arr)
        
        #reshape 1D array into 2D image
        pxl_x, pxl_y, pxl_z = np.shape(self.image_dict[self.image_key])
        recolored_arr = np.reshape(recolored_arr, (pxl_x, pxl_y, 3))
        
        #Save image after application of naive categorical recoloring
        recolored_img = Image.fromarray(recolored_arr, mode = 'RGB') 
        recolored_img.save(self.export_dir + " naive categorical recoloring "+ self.image_key, "JPEG")
        
        return recolored_arr
        
  
    #returns a flattened array of the euclidian distance in RGB space of each pixel in the histology image compared to the average RGB of a given category
    def diff(self, image_arr, cat_key):
   
        r_diff = np.square(image_arr[:,:,0].ravel() - self.cat_dict[cat_key][0][0])
        g_diff = np.square(image_arr[:,:,1].ravel() - self.cat_dict[cat_key][0][1])
        b_diff = np.square(image_arr[:,:,2].ravel() - self.cat_dict[cat_key][0][2])
    
        return np.sqrt(r_diff + g_diff + b_diff) 

# Kowalski, Analysis!

In [30]:
categorizer = Histology_CAT(image_dir, export_dir)
print(categorizer.kowalski(krnl_sz = 5))

[144.50289788  89.70675937 100.1001255 ] [144  89 100]
[ 89 100 144 202 203 222]
[ 89 100 144 202 203 222]
0
All done, Skipper!


In [30]:
cat_dict = Cat_Generator(cat_dir)
#cat_dict.cat_color("implant.jpg",(0,0,0)) #black (implant)
#cat_dict.cat_color("implant1.jpg",(0,0,0)) #black (implant)
cat_dict.cat_color("empty.jpg",(255,255,255)) #white (empty)
#cat_dict.cat_color("tissue.jpg",(120,81,169)) #purple (tissue)
#cat_dict.cat_color("tissue1.jpg",(120,81,169)) #purple (tissue)

categorizer = Histology_CAT(image_dir, export_dir, cat_dict.d())
for file in os.listdir(image_dir): 
    img_arr = np.asarray(Image.open(image_dir + file))

kmeans, labels = categorizer.kmeans_quant(2)
#md_filt = categorizer.med_filter(naive_arr, 1)

In [9]:
cat_color = (0,10,20)
image_arr = [(0,0,0), (0,0,0), (0,0,0), (0,10,20), (0,10,0), (0,0,20), (0,0,0), (0,0,0)]
image_arr = np.asarray(image_arr).reshape(2,4,3)

bool_array = (image_arr == cat_color)
bool_array = np.sum(bool_array, axis = 2)
bool_array = (bool_array == 3)

print(type(cat_color))

print(image_arr)
print()
image_arr.reshape(-1,3)[bool_array.ravel()] = [(255,255,255)]
print(image_arr)

<class 'tuple'>
[[[ 0  0  0]
  [ 0  0  0]
  [ 0  0  0]
  [ 0 10 20]]

 [[ 0 10  0]
  [ 0  0 20]
  [ 0  0  0]
  [ 0  0  0]]]

[[[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  [255 255 255]]

 [[  0  10   0]
  [  0   0  20]
  [  0   0   0]
  [  0   0   0]]]


[[[255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]]

 [[255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]]

 [[255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]]

 [[255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]]

 [[255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]]

 [[255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  [255. 255. 255.]
  

In [11]:
cat_dict = Cat_Generator(cat_dir)
cat_dict.cat_color("implant.jpg",(0,0,0)) #black (implant)
cat_dict.cat_color("implant1.jpg",(0,0,0)) #black (implant)
cat_dict.cat_color("empty.jpg",(255,255,255)) #white (empty)
cat_dict.cat_color("tissue.jpg",(120,81,169)) #purple (tissue)
cat_dict.cat_color("tissue1.jpg",(120,81,169)) #purple (tissue)

categorizer = Histology_CAT(image_dir, export_dir, cat_dict.d())
print(categorizer.kowalski(n_cluster = 10, krnl_sz = 5))

All done, Skipper!


SyntaxError: invalid syntax (<ipython-input-17-8e0d0cb9d61e>, line 1)