# Make patches from center of HE images
Make training set: patches from Y-pos vs Y-neg images.  
Populate training subdirectories named Ypos or Yneg.  
(This notebook is a bug fix to HE_Filter.025)   
Of DF raw tiff images, there are 29 files, 458 MB each, 13 GB total.  
Of DF jpg center patches, there are 61K files, 262 MB total.  
Of HE raw tiff images, there are 29 files, 272 - 402 MB each, 8.5 GB total.  
Of HE jpg center patches, there are 39K files, 408 MB total.  

In [1]:
import time
import os
import glob
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import numpy as np
import json
import tensorflow as tf
from tensorflow import keras
import keras.layers as kl
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
DIR_IMAGES_RAW = "/home/jrm/Martinez/images/raw/"
DIR_IMAGES_TRAIN = "/home/jrm/Martinez/images/HEcenterpatch/train/"
DIR_IMAGES_VALID = "/home/jrm/Martinez/images/HEcenterpatch/valid/"
PATTERN_IMAGES_RAW = "*.HE1.*.tif"
PATCH_SIZE=224  # matches VGG
IMAGE_SIZE = (PATCH_SIZE,PATCH_SIZE)
# Classification based on visual inspection of DAPI+FLUORO images.
# These 3-character strings are unique filename prefixes.
DF_Ypos = ['B7.','B15','D1.','D5.','E7.','E9.','F9.','G3.','H13','I1.','I5.','I13']
DF_Yneg = ['A3.','A5.','B13','C1.','C11','D3.','E5.','F3.','F7.','F11','F13','F15','G15','H1.','H3.','H7.','H15']

In [3]:
def get_image_names(path,pattern):
    paths = glob.glob(path+pattern)
    names = [os.path.basename(x) for x in paths]
    return names
FILENAMES_IMAGES_RAW = get_image_names(DIR_IMAGES_RAW,PATTERN_IMAGES_RAW)

In [4]:
class patch_maker:
    def __init__(self):
        self.path=""
        self.w=0
        self.h=0
        self.im_width = 0
        self.im_height = 0
        self.patch_size = 10 # scalar, assumed square for now
        self.pixel_array = None
        self.output_dir = ""
        self.output_format='.jpg'
        self.center=(0.0,0.0)
        self.radial_threshold = 0.0
        self.BORDER = 200
    def set_input_path(self,path):
        self.path=path
    def set_output_path(self,path):
        self.output_dir = path
    def set_patch_size(self,scalar):
        self.patch_size = scalar
    def load_pixel_array(self,filename,verbose=False):
        self.w=0
        self.h=0
        im = Image.open(self.path+filename)
        ima = np.array(im)   # convert to numpy
        self.im_width = ima.shape[1]
        self.im_height = ima.shape[0]
        self.center=(self.im_width/2,self.im_height/2)
        self.radial_threshold = min(self.im_width,self.im_height)/2-self.BORDER
        if verbose:
            print(filename, ima.size, ima.shape)
        self.pixel_array = ima
        return ima
    def radius(self,x,y):
        return np.sqrt((x-self.center[0])**2+(y-self.center[1])**2)
    def get_next_patch(self): 
        IM_WIDTH = self.im_width
        IM_HEIGHT = self.im_height
        PIXEL_ARRAY = self.pixel_array
        patch = None
        if self.w+self.patch_size > IM_WIDTH:
            self.h += self.patch_size
            self.w = 0
        if self.w+self.patch_size <= IM_WIDTH and self.h+self.patch_size <= IM_HEIGHT:
            patch = PIXEL_ARRAY[self.h:self.h+self.patch_size, self.w:self.w+self.patch_size]
            self.w += self.patch_size
        return patch
    def get_next_center_patch(self):
        patch = self.get_next_patch()
        while (patch is not None):
            radius1 = self.radius(self.w,self.h)
            radius2 = self.radius(self.w,self.h+self.patch_size)
            radius3 = self.radius(self.w+self.patch_size,self.h)
            radius4 = self.radius(self.w+self.patch_size,self.h+self.patch_size)
            max_radius=max(radius1,radius2,radius3,radius4)
            if max_radius < self.radial_threshold:
                #print("Good",self.w,self.h)
                return patch
            #print("Bad",self.w,self.h)
            patch = self.get_next_patch()
        return None
    def save_patch (self, prefix, pnum, patch):
        ext = self.output_format
        path= self.output_dir+prefix+'.'+str(pnum)+ext
        im =  Image.fromarray(patch)
        im.save(path)

In [5]:
pm = patch_maker()
pm.set_input_path(DIR_IMAGES_RAW)
pm.set_patch_size(PATCH_SIZE)
pm.set_output_path(DIR_IMAGES_TRAIN)  # later, move 20% of files to VALID
success = True
for filename in FILENAMES_IMAGES_RAW:
    print(filename,end=" ")  # so user sees we're working on this file
    prefix = filename[0:3]
    if prefix in DF_Yneg:
        phenotype = 'Yneg'
    elif prefix in DF_Ypos:
        phenotype = 'Ypos'
    else:
        print ("ERROR: Unrecognizable filename!")
        success = False
        break
    if prefix[2] == '.':  # distinguish prefixes like F1. from F13
        prefix=prefix[0:2]+"_"  
    pixels = pm.load_pixel_array(filename)
    print(prefix,phenotype,pixels.shape,end=" ")
    patch = pm.get_next_center_patch()
    patch_count = 0
    while patch is not None:
        patch_count += 1
        pm.save_patch(phenotype+'/'+prefix,patch_count,patch)
        patch = pm.get_next_center_patch()
    print(patch_count)
if success:
    print("Done")

I1.HE1.09.tif I1_ Ypos (10439, 9446, 3) 1202
C1.HE1.01.tif C1_ Yneg (10641, 9851, 3) 1313
B7.HE1.60.tif B7_ Ypos (10236, 10034, 3) 1363
D5.HE1.43.tif D5_ Ypos (10256, 10054, 3) 1370
H15.HE1.132.tif H15 Yneg (10641, 9851, 3) 1313
G3.HE1.25.tif G3_ Ypos (10662, 9223, 3) 1143
F15.HE1.135.tif F15 Yneg (10236, 10257, 3) 1421
E9.HE1.75.tif E9_ Ypos (10844, 10236, 3) 1425
H13.HE1.114.tif H13 Ypos (11047, 9831, 3) 1309
F9.HE1.80.tif F9_ Ypos (10844, 10439, 3) 1489
I5.HE1.48.tif I5_ Ypos (10439, 10459, 3) 1491
B15.HE1.134.tif B15 Ypos (10236, 10054, 3) 1371
D1.HE1.02.tif D1_ Ypos (10459, 10257, 3) 1434
B13.HE1.115.tif B13 Yneg (10641, 10439, 3) 1488
D3.HE1.20.tif D3_ Yneg (10256, 10034, 3) 1366
F3.HE1.22.tif F3_ Yneg (10641, 10034, 3) 1367
I13.HE1.117.tif I13 Ypos (10439, 10236, 3) 1431
G15.HE1.133.tif G15 Yneg (10662, 9649, 3) 1256
A5.HE1.45.tif A5_ Yneg (10256, 10054, 3) 1370
H7.HE1.62.tif H7_ Yneg (10641, 10034, 3) 1367
E7.HE1.64.tif E7_ Ypos (9831, 9426, 3) 1195
H3.HE1.27.tif H3_ Yneg (1084

In [6]:
# Move a portion of the train files into the valid directory.
if success:
    from random import sample
    VALIDATION_PORTION = 0.20
    for phenotype in ('Ypos','Yneg'):
        fromdir = DIR_IMAGES_TRAIN+"/"+phenotype+"/"
        todir   = DIR_IMAGES_VALID+"/"+phenotype+"/"
        files = os.listdir(fromdir)
        total = len(files)
        selected = sample(files, int(VALIDATION_PORTION*total))
        for vfile in selected:
            os.rename(fromdir+vfile,todir+vfile)
    print("Done")

Done
