# Filter background from D&F images.
Use the A5 tumor because  
* the tumor is Y-neg
* the image is mostly tumor cells (hardly any stroma)
* the image is mostly blue, with hardly any green.

Eventually, use this to generate training examples of Y-neg patches.  
For now, just create a histogram of color intensities.  
We accidentally saved all the blue ones.

In [35]:
import time
import os
import glob
import cv2 # OpenCV-Python
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import numpy as np
import json
DIR_IMAGES_RAW = "/home/jrm/Martinez/images/raw/"
PATTERN_IMAGES_RAW = "*.DF1.*.tif"
PATCH_SIZE=224  # matches VGG
IMAGE_FILENAME='A3.DF1.26.tif'

In [2]:
def get_image_names(path,pattern):
    #RAW_IMAGE_NAMES = os.listdir(DIR_IMAGES_RAW)
    paths = glob.glob(path+pattern)
    names = [os.path.basename(x) for x in paths]
    return names
FILENAMES_IMAGES_RAW = get_image_names(DIR_IMAGES_RAW,PATTERN_IMAGES_RAW)
#FILENAMES_IMAGES_RAW

In [3]:
def load_pixel_array(path,filename,verbose=False):
    im = Image.open(path+filename)
    ima = np.array(im)   # convert to numpy
    if verbose:
        print(filename, ima.size, ima.shape)
    return ima
pixel_array = load_pixel_array(DIR_IMAGES_RAW,IMAGE_FILENAME,True)

A3.DF1.26.tif 479598264 (12513, 12776, 3)


In [4]:
avg = np.average(pixel_array,axis=(0,1))
std = np.std(pixel_array,axis=(0,1))
print("Average",avg,"STD",std)
print("Example pixel:",pixel_array[0,0])

Average [ 0.         10.75710886 34.77616833] STD [ 0.          5.89739359 40.8880172 ]
Example pixel: [0 9 3]


In [5]:
PATCH_THRESHOLD=PATCH_SIZE*PATCH_SIZE/2
IM_WIDTH = pixel_array.shape[0]
IM_HEIGHT = pixel_array.shape[1]

In [25]:
# TO DO: Create an iterator class and dispense with the globals.
W=0
H=0
def get_next_patch(pixels): # assume square
    global W,H
    if W+PATCH_SIZE>IM_WIDTH:
        H += PATCH_SIZE
        W = 0
    if H+PATCH_SIZE>IM_HEIGHT:
        patch = None
    if W+PATCH_SIZE<=IM_WIDTH and H+PATCH_SIZE<=IM_HEIGHT:
        patch = pixels[W:W+PATCH_SIZE, H:H+PATCH_SIZE]
        W += PATCH_SIZE
    return patch

In [32]:
def pixel_to_heatmap(green,blue):
    bins=[10,20,30,40,50,60,70,80,90,256]
    gbin=None
    bbin=None
    for bin in range(0,10):
        if gbin is None and green<=bins[bin]:
            gbin=bin
        if bbin is None and blue<=bins[bin]:
            bbin=bin
        if gbin is not None and bbin is not None:
            return gbin,bbin
    return gbin,bbin
def accumulate_pixels(imary):
    heatmap=np.zeros( (10,10), dtype=np.int32)
    print(imary.shape)
    nrows,ncols,nchannel=imary.shape
    for row in range(0,nrows):
        for col in range(0,ncols):
            pixel = imary[row,col]
            #red = pixel[0]
            green = pixel[1]
            blue = pixel[2]
            gbin,bbin = pixel_to_heatmap(green,blue)
            heatmap[gbin,bbin] += 1
    return heatmap

In [36]:
start = time.time()
patch = get_next_patch(pixel_array)
hm = accumulate_pixels(patch)
end = time.time()
print(end - start)
print(hm)

(224, 224, 3)
0.26656675338745117
[[ 1275 11089     0     0     0     0     0     0     0     0]
 [ 3362 34446     1     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     1     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    0     0     0     0     0     0     0     0     0     0]
 [    1     1     0     0     0     0     0     0     0     0]]


In [37]:
start = time.time()
hm = accumulate_pixels(pixel_array)
end = time.time()
print(end - start)
print(hm)


(12513, 12776, 3)
940.5925676822662
[[53128265 15832024 10922967  6582095  3659251  1892986   935756   514109
    294985   939314]
 [ 5199273  6095700  7194744  8245878  7126409  5283068  3816284  2873102
   2266562  8699916]
 [    2649   196621   257839   446240   615857   544327   418785   332082
    304153  3397483]
 [     696     8332    94640    70620   148409   152162   116319    75870
     54559   517624]
 [     310      592    10471    19598    32738    44592    38627    32612
     23353   127749]
 [     181      161     1135     5011    11887    19486    16084    13322
      9304    44251]
 [     105       95      464     1697     6081     9302     9037     7493
      6150    20884]
 [      56       45      176      581     2787     5210     5490     4328
      3555    12468]
 [      24       33       91      232      794     4681     2640     2270
      2144     9544]
 [     336      180      217      278      476     3059     7187     5173
      3672    17634]]


In [None]:

BACKGROUND_MAX=np.array( [20,20,20] )
GREEN_MIN=np.array( [0,40,0] )
BLUE_MIN=np.array( [0,0,40] )
num_back_patches = 0
num_green_patches = 0
num_blue_patches = 0
num_patches = 0

patch = get_next_patch(pixel_array)
while patch is not None:
    num_patches += 1
    num_background_pixels = np.sum(np.all(patch<BACKGROUND_MAX,axis=2))
    if num_background_pixels>=PATCH_THRESHOLD:
        num_back_patches += 1
    else:
        num_green_pixels = np.sum(np.all(patch>=GREEN_MIN,axis=2))
        if num_green_pixels>=PATCH_THRESHOLD:
            num_green_patches += 1
        else:
            num_blue_pixels = np.sum(np.all(patch>=BLUE_MIN,axis=2))
            if num_blue_pixels>=PATCH_THRESHOLD:
                num_blue_patches += 1
                pic = Image.fromarray(patch)
                name = "BLUE{}.tif".format(num_blue_patches)
                pic.save(name)
    patch = get_next_patch(pixel_array)
print("back=",num_back_patches,"green=",num_green_patches,"blue=",num_blue_patches,"total=",num_patches)
print("back=",num_back_patches/num_patches,"green=",num_green_patches/num_patches,"blue=",num_blue_patches/num_patches)