# Make patches from center of DF images
Make training set: patches from Y-pos vs Y-neg images.  
Populate training subdirectories named Ypos or Yneg.  
(This notebook is a modification to HE_centerpatch.025)  

In [1]:
import time
import os
import glob
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import numpy as np
import json
import tensorflow as tf
from tensorflow import keras
import keras.layers as kl
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
DIR_IMAGES_RAW = "/home/jrm/Martinez/images/raw/"
DIR_IMAGES_TRAIN = "/home/jrm/Martinez/images/DFcenterpatch/train/"
DIR_IMAGES_VALID = "/home/jrm/Martinez/images/DFcenterpatch/valid/"
PATTERN_IMAGES_RAW = "*.DF1.*.tif"
PATCH_SIZE=224  # matches VGG
IMAGE_SIZE = (PATCH_SIZE,PATCH_SIZE)
# Classification based on visual inspection of DAPI+FLUORO images.
# These 3-character strings are unique filename prefixes.
DF_Ypos = ['B7.','B15','D1.','D5.','E7.','E9.','F9.','G3.','H13','I1.','I5.','I13']
DF_Yneg = ['A3.','A5.','B13','C1.','C11','D3.','E5.','F3.','F7.','F11','F13','F15','G15','H1.','H3.','H7.','H15']

In [3]:
def get_image_names(path,pattern):
    paths = glob.glob(path+pattern)
    names = [os.path.basename(x) for x in paths]
    return names
FILENAMES_IMAGES_RAW = get_image_names(DIR_IMAGES_RAW,PATTERN_IMAGES_RAW)

In [4]:
class patch_maker:
    def __init__(self):
        self.path=""
        self.w=0
        self.h=0
        self.im_width = 0
        self.im_height = 0
        self.patch_size = 10 # scalar, assumed square for now
        self.pixel_array = None
        self.output_dir = ""
        self.output_format='.jpg'
        self.center=(0.0,0.0)
        self.radial_threshold = 0.0
        self.BORDER = 200
    def set_input_path(self,path):
        self.path=path
    def set_output_path(self,path):
        self.output_dir = path
    def set_patch_size(self,scalar):
        self.patch_size = scalar
    def load_pixel_array(self,filename,verbose=False):
        self.w=0
        self.h=0
        im = Image.open(self.path+filename)
        ima = np.array(im)   # convert to numpy
        self.im_width = ima.shape[1]
        self.im_height = ima.shape[0]
        self.center=(self.im_width/2,self.im_height/2)
        self.radial_threshold = min(self.im_width,self.im_height)/2-self.BORDER
        if verbose:
            print(filename, ima.size, ima.shape)
        self.pixel_array = ima
        return ima
    def radius(self,x,y):
        return np.sqrt((x-self.center[0])**2+(y-self.center[1])**2)
    def get_next_patch(self): 
        IM_WIDTH = self.im_width
        IM_HEIGHT = self.im_height
        PIXEL_ARRAY = self.pixel_array
        patch = None
        if self.w+self.patch_size > IM_WIDTH:
            self.h += self.patch_size
            self.w = 0
        if self.w+self.patch_size <= IM_WIDTH and self.h+self.patch_size <= IM_HEIGHT:
            patch = PIXEL_ARRAY[self.h:self.h+self.patch_size, self.w:self.w+self.patch_size]
            self.w += self.patch_size
        return patch
    def get_next_center_patch(self):
        patch = self.get_next_patch()
        while (patch is not None):
            radius1 = self.radius(self.w,self.h)
            radius2 = self.radius(self.w,self.h+self.patch_size)
            radius3 = self.radius(self.w+self.patch_size,self.h)
            radius4 = self.radius(self.w+self.patch_size,self.h+self.patch_size)
            max_radius=max(radius1,radius2,radius3,radius4)
            if max_radius < self.radial_threshold:
                #print("Good",self.w,self.h)
                return patch
            #print("Bad",self.w,self.h)
            patch = self.get_next_patch()
        return None
    def save_patch (self, prefix, pnum, patch):
        ext = self.output_format
        path= self.output_dir+prefix+'.'+str(pnum)+ext
        im =  Image.fromarray(patch)
        im.save(path)

In [5]:
pm = patch_maker()
pm.set_input_path(DIR_IMAGES_RAW)
pm.set_patch_size(PATCH_SIZE)
pm.set_output_path(DIR_IMAGES_TRAIN)  # later, move 20% of files to VALID
success = True
for filename in FILENAMES_IMAGES_RAW:
    print(filename,end=" ")  # so user sees we're working on this file
    prefix = filename[0:3]
    if prefix in DF_Yneg:
        phenotype = 'Yneg'
    elif prefix in DF_Ypos:
        phenotype = 'Ypos'
    else:
        print ("ERROR: Unrecognizable filename!")
        success = False
        break
    if prefix[2] == '.':  # distinguish prefixes like F1. from F13
        prefix=prefix[0:2]+"_"  
    pixels = pm.load_pixel_array(filename)
    print(prefix,phenotype,pixels.shape,end=" ")
    patch = pm.get_next_center_patch()
    patch_count = 0
    while patch is not None:
        patch_count += 1
        pm.save_patch(phenotype+'/'+prefix,patch_count,patch)
        patch = pm.get_next_center_patch()
    print(patch_count)
if success:
    print("Done")

F15.DF1.135.tif F15 Yneg (12513, 12776, 3) 2187
D5.DF1.43.tif D5_ Ypos (12513, 12776, 3) 2187
H3.DF1.27.tif H3_ Yneg (12513, 12776, 3) 2187
C11.DF1.96.tif C11 Yneg (12513, 12776, 3) 2187
H13.DF1.114.tif H13 Ypos (12513, 12776, 3) 2187
I1.DF1.09.tif I1_ Ypos (12513, 12776, 3) 2187
G3.DF1.25.tif G3_ Ypos (12513, 12776, 3) 2187
C1.DF1.01.tif C1_ Yneg (12513, 12776, 3) 2187
F11.DF1.94.tif F11 Yneg (12513, 12776, 3) 2187
F9.DF1.80.tif F9_ Ypos (12513, 12776, 3) 2187
H7.DF1.62.tif H7_ Yneg (12513, 12776, 3) 2187
A3.DF1.26.tif A3_ Yneg (12513, 12776, 3) 2187
A5.DF1.45.tif A5_ Yneg (12513, 12776, 3) 2187
H15.DF1.132.tif H15 Yneg (12513, 12776, 3) 2187
D1.DF1.02.tif D1_ Ypos (12513, 12776, 3) 2187
G15.DF1.133.tif G15 Yneg (12513, 12776, 3) 2187
B15.DF1.134.tif B15 Ypos (12513, 12776, 3) 2187
I5.DF1.48.tif I5_ Ypos (12513, 12776, 3) 2187
E9.DF1.75.tif E9_ Ypos (12513, 12776, 3) 2187
I13.DF1.117.tif I13 Ypos (12513, 12776, 3) 2187
F3.DF1.22.tif F3_ Yneg (12513, 12776, 3) 2187
B7.DF1.60.tif B7_ Yp

In [6]:
# Move a portion of the train files into the valid directory.
if success:
    from random import sample
    VALIDATION_PORTION = 0.20
    for phenotype in ('Ypos','Yneg'):
        fromdir = DIR_IMAGES_TRAIN+"/"+phenotype+"/"
        todir   = DIR_IMAGES_VALID+"/"+phenotype+"/"
        files = os.listdir(fromdir)
        total = len(files)
        selected = sample(files, int(VALIDATION_PORTION*total))
        for vfile in selected:
            os.rename(fromdir+vfile,todir+vfile)
    print("Done")

Done
