# Introduction/Imports
This notebook was used to generate positive and negative samples to train the network. For positive samples, the notebook uses tkinter to cycle through images from a directory and give a human a chance to click in the locations where the mark is detectable. The program draws a rectangle on the image indicating the sample it could save for that location. With confirmation from the human user, the program saves the sample.

The program creates a directory for each image and stores all of the samples for one image in the same directory. If images with the same file name are processed, the program will overwrite existing results, so use caution! Negative samples are loaded directly from samples selected by the FFT method in earlier work.

Before getting started, customize the values defined in the cell below.

In [42]:
import os
# input_dir--the directory where training images are stored
input_dir = os.path.join('..', 'Images', 'OtolithImages')
# output_dir--the directory where the selected samples should be stored
output_dir = 'SampleDatabase'
# n_training_samples--the number of images in each class to pull training samples from
n_training_samples = 30
# mark_list--the list of marks to be considered
mark_list = ['3,5H10', '1,6H', '6,2H', '4n,2n,2H']
# user_cutoff--the maximum number of samples to be drawn from a single image
user_cutoff = 34
# sample_size--the shape of the samples to draw (height, width)
sample_size = [800, 240]
# n_none_samples--the number of none samples to draw per image
n_none_samples = 120


To generate the positive samples, run the imports and the mclass cell. Then define the mark class and image indexes you want to consider, and run the next cell. Click where you want a sample to be taken, then tell the program whether or not to save the rectangular sample it draws. Press enter to move to the next image.

In [10]:
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import tkinter as tk
from tkinter import messagebox
from PIL import ImageTk, Image
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import sys
import imutils
import pickle
from OtolithAnalysis import feature_functions
from OtolithAnalysis import fft

In [25]:
class mclass:
    def __init__(self,  window, mark_type, im_ind):
        self.window = window
        self.window.geometry("1300x900")
        self.sample_ind = 0
        self.break_loop = False
        self.window.winfo_toplevel().title("Image index " + str(im_ind) + 
                                           ', Sample index ' + str(self.sample_ind))
        self.scale_ratio = 5
        self.xz = np.linspace(0, samplesize[0] - 1, sample_size[0])
        self.se = np.array(sample_size)
        self.fsize = (int(6000/self.scale_ratio),
                      int(4000/self.scale_ratio))
        self.fname = os.path.join(input_dir, mark_type,
                                     str(im_ind) + '.jpg')
        self.out_dir = os.path.join(output_dir, mark_type, str(im_ind))
        self.mark_type=mark_type
        self.im_ind = im_ind
        self.img = cv.imread(self.fname, cv.IMREAD_GRAYSCALE)
        self.plot(mark_type)
        self.window.bind('<Return>', self.quit)
        self.window.bind("<Button-1>", self.make_sample)
        self.wave_range = range(12,21)
        
    def plot(self, mark_type):
        im = Image.open(self.fname)
        im.thumbnail(self.fsize, Image.ANTIALIAS)
        img = ImageTk.PhotoImage(im)
        canvas = tk.Canvas(width=self.fsize[0], height=self.fsize[1], bg='black')
        canvas.pack()
        canvas.create_image(0, 0, image=img, anchor='nw')
        self.imgpil = img
        self.canvas = canvas
        
    def make_sample(self, event):
        coords = np.array([event.x, event.y])
        self.opt_angle(coords)
        se = self.se / self.scale_ratio
        h, w, = se[0] / 2, se[1] / 2
        s, c = np.sin(self.angle_save), np.cos(self.angle_save)
        x, y = np.zeros(4), np.zeros(4)
        sign1, sign2 = np.array([1, 1, -1, -1]), [-1, 1, 1, -1]
        for ind in range(len(sign1)):
            x[ind] = event.x - sign1[ind] * h * s + sign2[ind] * w * c
            y[ind] = event.y + sign1[ind] * h * c + sign2[ind] * w * s
        for ind in range(0, 4):
            ind2 = np.mod(ind + 1, 4)
            self.canvas.create_line(x[ind], y[ind], x[ind2], y[ind2])
        if messagebox.askyesno("Save?", "Save this sample?"):
            diag, xl, xh, yl, yh = self.img_corners(coords)
            imtemp = imutils.rotate(self.img[yl:yh, xl:xh], self.angle_save)
            xlow, xhigh, ylow, yhigh = feature_functions.high_low(diag, diag, 
                                                int(self.se[1]/2), 
                                                int(self.se[0]/2), 
                                                2 * np.array([diag, diag]))
            sample = imtemp[ylow:yhigh, xlow:xhigh]
            dirs, im = os.path.split(self.out_dir)
            output_dir, mark = os.path.split(dirs)
            if not os.path.isdir(output_dir):
                os.mkdir(output_dir)
            if self.mark_type not in os.listdir(output_dir):
                os.mkdir(os.path.join(output_dir, mark))
            if str(self.im_ind) not in os.listdir(os.path.join(output_dir, mark)):
                os.mkdir(self.out_dir)
            with open(os.path.join(self.out_dir, 
                                   str(self.sample_ind) + '.p'), 
                      'wb') as f:
                pickle.dump(sample, f)
            self.sample_ind += 1
            if self.sample_ind >= 35:
                self.quit()
            self.window.winfo_toplevel().title("Image index " + str(im_ind) + 
                               ', Sample index ' + str(self.sample_ind))
    def opt_angle(self, coords):
        angles = np.linspace(-90, 90, 9)
        fft_save = 0
        angle_save = 0
        diag, xl, xh, yl, yh = self.img_corners(coords)
        for angle in angles:
            imtemp = imutils.rotate(self.img[yl:yh, xl:xh], angle)
            xlow, xhigh, ylow, yhigh = feature_functions.high_low(diag, diag, 
                                                int(self.se[1]/2), 
                                                int(self.se[0]/2), 
                                                2 * np.array([diag, diag]))
            f_fft = fft.fft_score(imtemp[ylow:yhigh, xlow:xhigh], 
                              self.xz, self.se, 
                            wave_range=self.wave_range, std_var=False)
            if f_fft > fft_save:
                angle_save = angle
                fft_save = f_fft
        self.angle_save = angle_save
    
    def img_corners(self, coords):
        coords = self.scale_ratio * coords
        diag = int(np.sqrt(self.se[0]**2 + self.se[1]**2) / 2)
        xl = coords[0] - diag
        xh = coords[0] + diag
        yl = coords[1] - diag
        yh = coords[1] + diag
        return diag, xl, xh, yl, yh

    def quit(self, event=None):
        if not messagebox.askyesno("Next image?", "Proceed to the next image?"):
            self.break_loop = True
        self.window.quit()
        self.window.destroy()

In [26]:
for mark_type in mark_list:
    for im_ind in range(n_training_samples):
        window= tk.Tk()
        start = mclass(window, mark_type, im_ind)
        start.window.mainloop()
        if start.break_loop:
            break

In [45]:
# This renames all of the sample files so that they
# proceed from 0 to the number of samples -1 in order
for mark in mark_list:
    for im_ind in range(n_training_samples):
        flist = os.listdir(os.path.join(output_dir, mark, str(im_ind)))
        numarra = np.zeros(len(flist), dtype=int)
        for ind in range(len(flist)):
            temp = flist[ind]
            temp = temp.split('.')
            numarra[ind] = int(temp[0])
        flist = np.array(flist)
        flist = flist[np.argsort(numarra)]
        counter = 0
        for f in flist:
            os.rename(os.path.join(output_dir, mark, 
                                   str(im_ind), f),
                     os.path.join(output_dir, mark, 
                                  str(im_ind), str(counter)+'.p'))
            counter += 1

In [65]:
def count_samples(mark):
    counter = 0
    for im_ind in range(n_training_samples):
        dirname = os.path.join(output_dir, mark, str(im_ind))
        counter += len(os.listdir(dirname))
    return counter
for mark in mark_list:
    print(mark, count_samples(mark))

3,5H10 894
1,6H 847
6,2H 898
4n,2n,2H 827


In [53]:
# Delete samples from images with too many samples
for mark in mark_list:
    cutoff = user_cutoff
    while count_samples(mark) > 900:
        for im_ind in range(n_training_samples):
            dirname = os.path.join(output_dir, mark, str(im_ind))
            if len(os.listdir(dirname)) > cutoff:
                os.remove(os.path.join(dirname, str(cutoff) + '.p'))
        cutoff -= 1

# None samples
the following two code cells randomly select unmarked samples from a directory within the input_dir directory named "None".

In [39]:
nsamps = n_none_samps
n_img = n_training_samples
diag = int(np.sqrt(120**2 + 400**2)) + 1
s_tab = []
for im_ind in range(n_img):
    s_tab.append(np.zeros([nsamps, 4]))
    s_tab[-1][:, 2] = np.random.randint(-90, 90, size=nsamps, dtype=int)
    s_tab[-1][:, 0] = np.random.randint(diag, 6000-diag, size=nsamps, dtype=int)
    s_tab[-1][:, 1] = np.random.randint(diag, 4000-diag, size=nsamps, dtype=int)

In [None]:
if not os.path.isdir(output_dir):
    os.mkdir(output_dir)
if not os.path.isdir(os.path.join(output_dir, "None")):
    os.mkdir(os.path.join(output_dir, "None"))
for im_ind in range(n_training_samples):
    print(im_ind)
    img = cv.imread(os.path.join(input_dir, 'none', str(im_ind) + '.jpg'), 
                    cv.IMREAD_GRAYSCALE)
    if not os.path.isdir(os.path.join(output_dir, 'None', str(im_ind))):
        os.mkdir(os.path.join(output_dir, 'None', str(im_ind)))
    samples = feature_functions.extract_samples_3(img, s_tab[im_ind], 
                                                     np.array(sample_size))
    for s_ind in range(len(samples)):
        samp = samples[s_ind]
        fpath = os.path.join(output_dir, 'None', str(im_ind), 
                             str(s_ind) + '.p')
        with open(fpath, 'wb') as f:
            pickle.dump(samp, f)