# Create patch files

Problem: first round of prediction said blue or green for many black files.  
Explanation: we were planning to filter black patches by color, 
but apparently we're no good at it. The CNN might do better.  
Solution: create a three-way classifier.  

Like DF_Filter.011, create subdirectories of train and valid for each class.  
Like DF_Filter.018, use slightly condensed code.  

Create exemplar patch files of blue/green/black.   
These are the training and validation sets.  
Also create mixed patch files for all other patches.  
These will be clasified by the trained model.  

In [1]:
import time
import os
import glob
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import numpy as np
import json
import tensorflow as tf
from tensorflow import keras
import keras.layers as kl
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
DIR_IMAGES_RAW = "/home/jrm/Martinez/images/raw/"
DIR_IMAGES_TRAIN = "/home/jrm/Martinez/images/training/train/"
DIR_IMAGES_VALID = "/home/jrm/Martinez/images/training/valid/"
DIR_IMAGES_UNKNOWN = "/home/jrm/Martinez/images/patches/"
PATTERN_IMAGES_RAW = "*.DF1.*.tif"
PATCH_SIZE=224  # matches VGG
IMAGE_SIZE = (PATCH_SIZE,PATCH_SIZE)
DF_Ypos = ['B7.','B15','D1.','D5.','E7.','E9.','F9.','G3.','H13','I1.','I5.','I13']
DF_Yneg = ['A3.','A5.','B13','C1.','C11','D3.','E5.','F3.','F7','F11','F13','F15','G15','H1.','H3.','H7.','H15']

In [3]:
def get_image_names(path,pattern):
    paths = glob.glob(path+pattern)
    names = [os.path.basename(x) for x in paths]
    return names
FILENAMES_IMAGES_RAW = get_image_names(DIR_IMAGES_RAW,PATTERN_IMAGES_RAW)

In [4]:
class patch_maker:
    def __init__(self):
        self.path=""
        self.w=0
        self.h=0
        self.im_width = 0
        self.im_height = 0
        self.patch_size = 10 # scalar, assumed square for now
        self.pixel_array = None
        self.output_dir = ""
        self.output_format='.jpg'
    def set_input_path(self,path):
        self.path=path
    def set_output_path(self,path):
        self.output_dir = path
    def set_patch_size(self,scalar):
        self.patch_size = scalar
    def load_pixel_array(self,filename,verbose=False):
        self.w=0
        self.h=0
        im = Image.open(self.path+filename)
        ima = np.array(im)   # convert to numpy
        self.im_width = ima.shape[0]
        self.im_height = ima.shape[1]    
        if verbose:
            print(filename, ima.size, ima.shape)
        self.pixel_array = ima
        return ima
    def get_next_patch(self): 
        IM_WIDTH = self.im_width
        IM_HEIGHT = self.im_height
        PIXEL_ARRAY = self.pixel_array
        patch = None
        if self.w+self.patch_size > IM_WIDTH:
            self.h += self.patch_size
            self.w = 0
        if self.w+self.patch_size <= IM_WIDTH and self.h+self.patch_size <= IM_HEIGHT:
            patch = PIXEL_ARRAY[self.w:self.w+self.patch_size, self.h:self.h+self.patch_size]
            self.w += self.patch_size
        return patch
    def save_patch (self, prefix, pnum, patch):
        ext = self.output_format
        path= self.output_dir+prefix+'.'+str(pnum)+ext
        im =  Image.fromarray(patch)
        im.save(path)

In [5]:
# These are not mutually exclusive categories
MIN_GREEN_INTENSITY=np.array( [0,35,0] )
MIN_BLUE_INTENSITY =np.array( [0,0,30] )
MAX_BLACK_INTENSITY=np.array( [10,10,10] )

pm = patch_maker()
pm.set_input_path(DIR_IMAGES_RAW)
pm.set_patch_size(PATCH_SIZE)
pm.set_output_path(DIR_IMAGES_TRAIN)  # later, move 20% of files to VALID
file_totals={'black':0,'blue':0,'green':0,'mixed':0}
for filename in FILENAMES_IMAGES_RAW:
    prefix = filename[0:3]
    file_counts={'black':0,'blue':0,'green':0,'mixed':0}
    if prefix in DF_Yneg:
        if prefix[2] == '.':  # we have prefixes like F1. and F13
            prefix=prefix[0:2]+'_'   # avoid the extra period
        print(filename,end=" ")
        pixels = pm.load_pixel_array(filename)
        patch = pm.get_next_patch()
        while patch is not None:
            num_green_pixels = np.sum(np.all(patch >= MIN_GREEN_INTENSITY,axis=2))
            num_blue_pixels  = np.sum(np.all(patch >= MIN_BLUE_INTENSITY,axis=2))
            num_black_pixels = np.sum(np.all(patch <= MAX_BLACK_INTENSITY,axis=2))
            if num_black_pixels>=30000 and num_blue_pixels<=1000 and num_green_pixels<1000:
                file_counts['black'] += 1
                pm.set_output_path(DIR_IMAGES_TRAIN)
                pm.save_patch('black/'+prefix,file_counts['black'],patch)
            elif num_blue_pixels>=10000 and num_blue_pixels>num_green_pixels*20:
                file_counts['blue'] += 1
                pm.set_output_path(DIR_IMAGES_TRAIN)
                pm.save_patch('blue/'+prefix,file_counts['blue'],patch)
            elif num_green_pixels>=15000 and num_green_pixels>num_blue_pixels*2:
                file_counts['green'] += 1
                pm.set_output_path(DIR_IMAGES_TRAIN)
                pm.save_patch('green/'+prefix,file_counts['green'],patch)
            else:
                file_counts['mixed'] += 1
                pm.set_output_path(DIR_IMAGES_UNKNOWN)
                pm.save_patch(prefix,file_counts['mixed'],patch)
            patch = pm.get_next_patch()
        print(file_counts)
        file_totals['black'] += file_counts['black']
        file_totals['blue'] += file_counts['blue']
        file_totals['green'] += file_counts['green']
        file_totals['mixed'] += file_counts['mixed']
print('Totals',file_totals)

F15.DF1.135.tif {'black': 1413, 'blue': 262, 'green': 34, 'mixed': 1426}
H3.DF1.27.tif {'black': 888, 'blue': 30, 'green': 110, 'mixed': 2107}
C11.DF1.96.tif {'black': 760, 'blue': 16, 'green': 292, 'mixed': 2067}
C1.DF1.01.tif {'black': 292, 'blue': 293, 'green': 69, 'mixed': 2481}
F11.DF1.94.tif {'black': 1401, 'blue': 375, 'green': 54, 'mixed': 1305}
H7.DF1.62.tif {'black': 1361, 'blue': 228, 'green': 178, 'mixed': 1368}
A3.DF1.26.tif {'black': 970, 'blue': 1482, 'green': 1, 'mixed': 682}
A5.DF1.45.tif {'black': 0, 'blue': 6, 'green': 454, 'mixed': 2675}
H15.DF1.132.tif {'black': 0, 'blue': 0, 'green': 66, 'mixed': 3069}
G15.DF1.133.tif {'black': 12, 'blue': 0, 'green': 239, 'mixed': 2884}
F3.DF1.22.tif {'black': 659, 'blue': 41, 'green': 430, 'mixed': 2005}
B13.DF1.115.tif {'black': 1256, 'blue': 407, 'green': 70, 'mixed': 1402}
D3.DF1.20.tif {'black': 469, 'blue': 4, 'green': 325, 'mixed': 2337}
E5.DF1.42.tif {'black': 956, 'blue': 266, 'green': 31, 'mixed': 1882}
H1.DF1.07.tif {'

In [6]:
# Move a portion of the train files into the valid directory.
from random import sample
VALIDATION_PORTION = 0.20
for color in ('blue','green','black'):
    fromdir = DIR_IMAGES_TRAIN+"/"+color+"/"
    todir   = DIR_IMAGES_VALID+"/"+color+"/"
    files = os.listdir(fromdir)
    total = len(files)
    selected = sample(files, int(VALIDATION_PORTION*total))
    for vfile in selected:
        os.rename(fromdir+vfile,todir+vfile)
print("Done")

Done
