In [None]:
# import packages
import numpy as np
import time
#import pandas as pd
import os
import os.path
import cv2
from PIL import Image
from keras.preprocessing import image

In [None]:
# get the directory where this file can be found
current_directory = os.getcwd()

# change the working directory to this directory
os.chdir(current_directory)

# print the directory
print(current_directory)

In [None]:
# set the parameter that decides if images should be converted to grayscale
set_img_grayscale = True

# set the parameter that decides if images should be binarized (black/white)
# and set the binarize boundaries
binarize = False
binarize_min = 135
binarize_max = 255

# define the number of loops to generate data
loops = 250

# define the folder where the pictures are saved (in a separate folder for each label)
imgloc = '00 Data/pictures/'

# define the location where the generated images should be saved
saveimgloc = '00 Data/generated/'

In [None]:
# define a function that reads images from a location and uses the image names as labels
def read_images(imgloc = ''):
    # read all images from file into a numpy array
    # cv2 assumes colors are BGR, so we also convert this to RGB
    images = np.array([cv2.cvtColor(cv2.imread(imgloc + name), cv2.COLOR_BGR2RGB) 
                       for name in os.listdir(imgloc)], dtype = np.object)

    # use the image names to create a numpy array with the label of each image
    labels  = np.array([str(name.rpartition(' ')[0].rpartition('_')[2]) for name in os.listdir(imgloc)])
    
    return images, labels

In [None]:
def resize_images(img, basewidth = 64, set_grayscale = False, binarize = False, binarize_min = 135, binarize_max = 255):
    # make sure the numpy array contains integers (otherwise we can't convert them to PIL images)
    img = img.astype('uint8')
    #plt.imshow(img)
    
    # convert the numpy array image to PIL image
    img = Image.fromarray(img)
    #print(type(img))

    # calculate the height, based on the preferred width
    hsize = int((float(img.size[1]) * float((basewidth / float(img.size[0])))))
    #print(hsize)

    # resize the image
    img = img.resize((basewidth,hsize), Image.ANTIALIAS)
    #print(img.size)
    #plt.imshow(img)
    
    # convert image to grayscale if parameter is True
    if set_grayscale:
        img = img.convert("L")
           
    # convert the image to numpy array
    img = np.array(img)
    #print(type(img))
    
    # binarize image if parameter is True
    if binarize:
        thr, img = cv2.threshold(img, binarize_min, binarize_max, cv2.THRESH_BINARY)
    
    # if image is converted to grayscale, make sure to set the channels shape parameter
    if set_grayscale:
        img = img.reshape((img.shape[0], img.shape[1], 1))
    
    return img

In [None]:
# define data generator
datagen = image.ImageDataGenerator(rescale = None #1/255 # normalize the data
                                   , rotation_range = 0 # degree range for random rotations
                                   , width_shift_range = 0.2 # range for random horizontal shifts
                                   , height_shift_range = 0.2 # range for random vertical shifts
                                   , shear_range = 0 # shear angle in counter-clockwise direction as radians
                                   , zoom_range = 0.1 # Range for random zoom
                                   , horizontal_flip = False # flip horizontally
                                   , vertical_flip = False # flip vertically
                                   , fill_mode = "nearest"
                                  )

In [None]:
# get the list of all folders in the imgloc directory
# each folder in this directory should be a label and be named accordingly
dirs = [d for d in os.listdir(imgloc) if os.path.isdir(os.path.join(imgloc, d))]

In [None]:
# print the current date and time
start = time.time()
print('start: ', str(time.ctime()), '\n')


# loop through all folders (labels), import the images, resize them and generate new ones
for label in dirs:
    print(imgloc + label)
    
    # create the necessary folders
    if not os.path.exists(saveimgloc + label):
        os.makedirs(saveimgloc + label)
    
    # read images
    train_img, train_label = read_images(imgloc + label + '/')
    
    # resize the images
    basewidth = 64
    img_list = []

    # loop through all the images in the data and resize them
    for img in train_img:
        # resize the numpy array images
        img = resize_images(img, basewidth, set_img_grayscale, binarize, binarize_min, binarize_max)
        # add the image to a list of numpy array images
        img_list.append(img)
    
    # convert the list with images back to a numpy array
    train_img = np.array(img_list)
    
    # fit datagenerator parameters from data
    datagen.fit(train_img)
    
    # create an iterator to iterate over the data
    data_it = datagen.flow(train_img, train_label, save_to_dir = saveimgloc + label, batch_size = len(train_label)
                           , save_prefix = 'label_' + str(label) + ' ', save_format = 'png')
    
    for i in range(loops):
        inputs, outputs = next(data_it)


# print the current date and time
print('\n', 'finish: ', str(time.ctime()), '\n')
print("this took --- %s seconds ---" % round(time.time() - start, 2))