# Filter background from D&F images.
Use the A3 tumor because  
* the tumor is Y-neg
* the image is mostly tumor cells (hardly any stroma)
* the image is mostly blue, with hardly any green.

Use this to generate training examples of Y-neg patches.

In [1]:
import time
import os
import glob
import cv2 # OpenCV-Python
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import numpy as np
import json
DIR_IMAGES_RAW = "/home/jrm/Martinez/images/raw/"
DIR_IMAGES_OUT = "/home/jrm/Martinez/images/temp/"
PATTERN_IMAGES_RAW = "*.DF1.*.tif"
PATCH_SIZE=224  # matches VGG
IMAGE_FILENAME='A3.DF1.26.tif'

In [2]:
def get_image_names(path,pattern):
    #RAW_IMAGE_NAMES = os.listdir(DIR_IMAGES_RAW)
    paths = glob.glob(path+pattern)
    names = [os.path.basename(x) for x in paths]
    return names
FILENAMES_IMAGES_RAW = get_image_names(DIR_IMAGES_RAW,PATTERN_IMAGES_RAW)
#FILENAMES_IMAGES_RAW

In [3]:
def load_pixel_array(path,filename,verbose=False):
    im = Image.open(path+filename)
    ima = np.array(im)   # convert to numpy
    if verbose:
        print(filename, ima.size, ima.shape)
    return ima
pixel_array = load_pixel_array(DIR_IMAGES_RAW,IMAGE_FILENAME,True)

A3.DF1.26.tif 479598264 (12513, 12776, 3)


In [4]:
avg = np.average(pixel_array,axis=(0,1))
std = np.std(pixel_array,axis=(0,1))
print("Average",avg,"STD",std)
print("Example pixel:",pixel_array[0,0])

Average [ 0.         10.75710886 34.77616833] STD [ 0.          5.89739359 40.8880172 ]
Example pixel: [0 9 3]


In [5]:
PATCH_THRESHOLD=PATCH_SIZE*PATCH_SIZE/2
IM_WIDTH = pixel_array.shape[0]
IM_HEIGHT = pixel_array.shape[1]

In [6]:
# TO DO: Create an iterator class and dispense with the globals.
W=0
H=0
def get_next_patch(pixels): # assume square
    global W,H
    if W+PATCH_SIZE>IM_WIDTH:
        H += PATCH_SIZE
        W = 0
    if H+PATCH_SIZE>IM_HEIGHT:
        patch = None
    if W+PATCH_SIZE<=IM_WIDTH and H+PATCH_SIZE<=IM_HEIGHT:
        patch = pixels[W:W+PATCH_SIZE, H:H+PATCH_SIZE]
        W += PATCH_SIZE
    return patch

In [7]:
def pixel_to_heatmap(green,blue):   # TO DO: this is slow, need a hash function
    bins=[10,20,30,40,50,60,70,80,90,256]
    gbin=None
    bbin=None
    for bin in range(0,10):
        if gbin is None and green<=bins[bin]:
            gbin=bin
        if bbin is None and blue<=bins[bin]:
            bbin=bin
        if gbin is not None and bbin is not None:
            return gbin,bbin
    return gbin,bbin
def accumulate_pixels(imary,verbose=False):  # TO DO: nested for loop is very slow
    heatmap=np.zeros( (10,10), dtype=np.int32)
    if verbose:
        print("accumulate",imary.shape)
    nrows,ncols,nchannel=imary.shape
    for row in range(0,nrows):
        for col in range(0,ncols):
            pixel = imary[row,col]
            #red = pixel[0]
            green = pixel[1]
            blue = pixel[2]
            gbin,bbin = pixel_to_heatmap(green,blue)
            heatmap[gbin,bbin] += 1
    return heatmap

In [8]:
W=0
H=0
start = time.time()
patch = get_next_patch(pixel_array)
hm = accumulate_pixels(patch)
end = time.time()
print(end - start)
print(hm)

0.19380474090576172
[[29764     0     0     0     0     0     0     0     0     0]
 [20347     5     0     0     0     0     0     0     0     0]
 [   36     1     0     0     0     0     0     0     0     0]
 [   15     0     0     0     0     0     0     0     0     0]
 [    3     0     0     0     0     0     0     0     0     0]
 [    2     0     0     0     0     0     0     0     0     0]
 [    1     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    2     0     0     0     0     0     0     0     0     0]]


In [9]:
start = time.time()
if False:
    hm = accumulate_pixels(pixel_array)
    print(hm)
end = time.time()
print(end - start)


3.910064697265625e-05


In [12]:
W=0
H=0
BACKGROUND_MAX=np.array( [20,20,20] )
GREEN_MIN=np.array( [0,40,0] )
BLUE_MIN=np.array( [0,0,40] )
num_back_patches = 0
num_green_patches = 0
num_blue_patches = 0
num_patches = 0

if True:
    patch = get_next_patch(pixel_array)
    while patch is not None:
        num_patches += 1
        num_background_pixels = np.sum(np.all(patch<BACKGROUND_MAX,axis=2))
        if num_background_pixels>=PATCH_THRESHOLD:
            num_back_patches += 1
        else:
            num_green_pixels = np.sum(np.all(patch>=GREEN_MIN,axis=2))
            if num_green_pixels>=PATCH_THRESHOLD:
                num_green_patches += 1
                if num_green_patches <3:
                    pic = Image.fromarray(patch)
                    name = "GREEN{}.tif".format(num_blue_patches)
                    pic.save(DIR_IMAGES_OUT+name)
                    hm = accumulate_pixels(patch)
                    print(hm,name)        
            else:
                num_blue_pixels = np.sum(np.all(patch>=BLUE_MIN,axis=2))
                if num_blue_pixels>=PATCH_THRESHOLD:
                    num_blue_patches += 1
                    if num_blue_patches in [250,256,260]:
                    #if num_blue_patches in [23,200,297,256,272,376,411,447,611,612,631,632,633,634,706,789]:
                        pic = Image.fromarray(patch)
                        name = "BLUE{}.tif".format(num_blue_patches)
                        pic.save(DIR_IMAGES_OUT+name)
                        hm = accumulate_pixels(patch)
                        print(hm,name)        
                    
        patch = get_next_patch(pixel_array)
    print("back=",num_back_patches,"green=",num_green_patches,"blue=",num_blue_patches,"total=",num_patches)
    print("back=",num_back_patches/num_patches,"green=",num_green_patches/num_patches,"blue=",num_blue_patches/num_patches)

[[   0    0  795 7315 6904 3442 1097  224   57   22]
 [   0    0   13 1591 3611 4885 4409 4026 3166 8517]
 [   0    0    0    1    0    0    0    0    5   95]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    1    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]] BLUE250.tif
[[   0    0    2    0    0    0    0    0    0    0]
 [   0  160 9815 2734  607  248  146   83   38   50]
 [   0    1 3018 1502 1092  530  186  112  101  682]
 [   0    0  287 1775 1656 1805  466  225  232 1819]
 [   0    0    1  840 1316  795  425  271  216 1592]
 [   0    0    0  128 1624  323   91  115   90  748]
 [   0    0    0   15 1029  598   88   33   34  230]
 [   0    0    0    0  266 1027  132   31   13   49]
 [   0    0    0    0   51 1078  

In notebook 004, we saved the blue ones accidentally.  
By visual inspection, these were problematic.  
Contains green: 23, 200, 207, 256!, 272, 376, 411, 447, 611, 612, 631-4, 706, 789.  
Contains solid blue with letters (scale bar): 891-2, 908-9, 914-15.  
Looks black: 916-8, 921-3, 930-1.  

In [11]:
start = time.time()
patch = get_next_patch(pixel_array)
while patch is not None:
    num_patches += 1
    print(num_patches,end=" ")
    patch = get_next_patch(pixel_array)

end = time.time()
print(end - start)


4.6253204345703125e-05
