# Train a CNN on the color-based training set.
Classify patches from all Y-neg tumors based on color.  
Choose patches that are clearly green or blue.  
Save patches to the cache directory.  

In [1]:
import time
import os
import glob
import cv2 # OpenCV-Python
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import numpy as np
import json
DIR_IMAGES_RAW = "/home/jrm/Martinez/images/raw/"
DIR_IMAGES_OUT = "/home/jrm/Martinez/images/cache/"
PATTERN_IMAGES_RAW = "*.DF1.*.tif"
PATCH_SIZE=224  # matches VGG

In [2]:
DF_Ypos = ['B7.','B15','D1.','D5.','E7.','E9.','F9.','G3.','H13','I1.','I5.','I13']
DF_Yneg = ['A3.','A5.','B13','C1.','C11','D3.','E5.','F3.','F7','F11','F13','F15','G15','H1.','H3.','H7.','H15']

In [3]:
def get_image_names(path,pattern):
    #RAW_IMAGE_NAMES = os.listdir(DIR_IMAGES_RAW)
    paths = glob.glob(path+pattern)
    names = [os.path.basename(x) for x in paths]
    return names
FILENAMES_IMAGES_RAW = get_image_names(DIR_IMAGES_RAW,PATTERN_IMAGES_RAW)
#FILENAMES_IMAGES_RAW

In [8]:

class patch_maker:
    def __init__(self):
        self.path=""
        self.w=0
        self.h=0
        self.im_width = 0
        self.im_height = 0
        self.patch_size = 10 # scalar, assumed square for now
        self.pixel_array = None
        self.VMOD = 10   # simple rule for now: 1 of 10 images used for validation
        self.vmod = 0
    def set_input_path(self,path):
        self.path=path
    def set_patch_size(self,scalar):
        self.patch_size = scalar
    def load_pixel_array(self,filename,verbose=False):
        self.w=0
        self.h=0
        im = Image.open(self.path+filename)
        ima = np.array(im)   # convert to numpy
        self.im_width = ima.shape[0]
        self.im_height = ima.shape[1]    
        if verbose:
            print(filename, ima.size, ima.shape)
        self.pixel_array = ima
        return ima
    def get_next_patch(self): 
        IM_WIDTH = self.im_width
        IM_HEIGHT = self.im_height
        PIXEL_ARRAY = self.pixel_array
        patch = None
        if self.w+self.patch_size > IM_WIDTH:
            self.h += self.patch_size
            self.w = 0
        if self.w+self.patch_size <= IM_WIDTH and self.h+self.patch_size <= IM_HEIGHT:
            patch = PIXEL_ARRAY[self.w:self.w+self.patch_size, self.h:self.h+self.patch_size]
            self.w += self.patch_size
        return patch
    def get_train_or_valid(self):
        if self.vmod >= self.VMOD:
            tname='valid'
            self.vmod = 0
        else:
            tname='train'
            self.vmod += 1
        return tname
    def save_patch (self, prefix, pnum, color, patch):
        tname = self.get_train_or_valid()
        ext = ".tif"
        path=DIR_IMAGES_OUT+tname+'/'+str(color)+'/'+prefix+'.'+str(pnum)+ext
        im = Image.fromarray(patch)
        im.save(path)


## Create directory of patch files for training
Creates over 6K files.  
Uses about 1GB of disk.  

In [10]:
NO_GREEN_INTENSITY=np.array( [0,35,0] )
NO_BLUE_INTENSITY=np.array( [0,0,30] )
pm = patch_maker()
pm.set_input_path(DIR_IMAGES_RAW)
pm.set_patch_size(PATCH_SIZE)
BLUE=1
GREEN=0
for filename in FILENAMES_IMAGES_RAW:
    prefix = filename[0:3]
    pnum = 0;
    if prefix in DF_Yneg:
        print(filename,end=", ")
        pixels = pm.load_pixel_array(filename)
        patch = pm.get_next_patch()
        while patch is not None:
            num_green_pixels = np.sum(np.all(patch >= NO_GREEN_INTENSITY,axis=2))
            num_blue_pixels = np.sum(np.all(patch >= NO_BLUE_INTENSITY,axis=2))
            if num_blue_pixels>=10000 and num_blue_pixels>num_green_pixels*20:
                pnum += 1
                pm.save_patch(prefix,pnum,BLUE,patch)
            elif num_green_pixels>=15000 and num_green_pixels>num_blue_pixels*2:
                pnum += 1
                pm.save_patch(prefix,pnum,GREEN,patch)
            patch = pm.get_next_patch()
print()

F15.DF1.135.tif, H3.DF1.27.tif, C11.DF1.96.tif, C1.DF1.01.tif, F11.DF1.94.tif, H7.DF1.62.tif, A3.DF1.26.tif, A5.DF1.45.tif, H15.DF1.132.tif, G15.DF1.133.tif, F3.DF1.22.tif, B13.DF1.115.tif, D3.DF1.20.tif, E5.DF1.42.tif, H1.DF1.07.tif, 
