In [1]:
from PIL import Image
from glob import glob
from tqdm import tqdm
import numpy as np
import os
import pickle
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

In [10]:
# load jpgs
def process_images(inpath, outpath, dim_tuple, extension='jpg', start=1):
    '''
    process_images(inpath, outpath, dim_tuple, extension, start=1):
    This function creates new images, reshapes, grayscale, and save the images in a desired location
    Input:
        inpath: Path to the image files
        outpath: Path to where the images should be saved
        dim_tuple: Desired image size
        extension: The image file type. default='jpg'
        start: Part saved image name. default=1
    Returns:
        Returns a folder containing double amount of images
    '''
    #open images
    for file in tqdm(glob(f'{inpath}*.{extension}')):
        with Image.open(file) as img:
            #rotate image
            rotated_images = rotate_images(img)
            
            # resize images
            resized = resizing(rotated_images, dim_tuple)
            
            #grayscale images
            gray_images = grayscale(resized)
            
            
            # save the images
            save_preprocessed_images(gray_images, outpath, extension, start)
            start += 2                      
    return

In [8]:
# rotate images
def rotate_images(image):
    '''
    rotate_images(image):
    This function rotates an image on it's center 7 times (45, 90, 135, 180, 225, 270, and mirror image)
    Input:
        image: One image file
    Returns:
        A list of images containing the original image and the rotated one
    '''
    rotated_images = []
    
    chirl_image = image.transpose(Image.FLIP_LEFT_RIGHT)
    
    rotated_images.extend([image, chirl_image])  
    return rotated_images

In [4]:
def resizing(images, dim_tuple):
    '''
    resizing(images, dim_tuple):
    This function resizes a list of images
    Input:
        images: List of images
        dim_tuple: Tuple containing the desired hight and the width
    Returns:
        List of resized images        
    '''
    resized = [image.resize(dim_tuple) for image in images]
    return resized

In [5]:
def grayscale(images):
    '''
    grayscale(images):
    This transforms RGB images to grayscale images
    Input:
        images:List of RBG images
    Returns:
        List of grayscale images
    '''
    gray_images = [image.convert(mode='L') for image in images]
    return gray_images

In [6]:
# save images in a different path
def save_preprocessed_images(processed_images, outpath, extension='jpg', start):
    '''
    save_preprocessed_images(processed_images, outpath, extension, start)
    This function saves any type of image in a specific directory
    Input:
        processed_images: List of images
        outpath: Where you want the files to be saved
    Returns:
        Does not return a variable. Creates image files 
    '''
    [image.save(f'{outpath}\\image{i}.{extension}') for i, image in enumerate(processed_images, start)]
    return

In [7]:
def save_train_test(inpath, outpath, extension='jpg', n, j):
    '''
    save_train_test(inpath, outpath, extension, n, label):
    This function converts an image file to a numpy array, normalizes the the pixel values and saves
    Input:
        outpath: The path where you want to save the file
        n: int. slicing the matrix
        j: int. slicing the matrix
            
    '''
    images = []
    for file in tqdm(glob(f'{inpath}*.{extension}')[n:j]):
        with Image.open(file) as img:
            np_image = np.array(img) / 255
            np_image = np_image.expand_dims(np_image, axis=0)
            images.append(np_image)
    
    images = np.asarray(images)   
    np.save(outpath, images)
    del images
    return 

# Process Damselflies Images

In [11]:
# create new, process, and save images damselfly images
inpath = r'E:\images2019\train_val2019\Damselflies\\'
outpath = r'E:\classification_damsel_images'
dim_tuple = (256, 256)

process_images(inpath, outpath, dim_tuple)

100%|██████████████████████████████████████████████████████████████████████████████| 8462/8462 [09:00<00:00, 15.67it/s]


# Process Dragonflies Images

In [13]:
# create new, process, and save images dragonfly images
inpath = r'E:\images2019\train_val2019\Dragonflies\\'
outpath = r'E:\classification_dragon_images'
dim_tuple = (256, 256)

process_images(inpath, outpath, dim_tuple, extension)

100%|██████████████████████████████████████████████████████████████████████████████| 9204/9204 [08:32<00:00, 17.96it/s]


# Train/Test Dragonfly Images

In [None]:
# create dragonfly train/test sets
inpath = r'E:\classification_dragon_images\\'
dragon_test_path = r'E:\classification_dragon_images\test\dragon_test.npy'
dragon_train_path = r'E:\classification_dragon_images\train\dragon_train.npy'

# define test set size (25%~)
files = os.listdir(inpath) 
n_files = len(files)
n_train = int(n_files * 0.75)

In [14]:
# create dragonfly training set
save_train_test(inpath, dragon_train_path, extension, 0, n_train)

100%|██████████████████████████████████████████████████████████████████████████| 13807/13807 [00:13<00:00, 1040.84it/s]


In [15]:
# create dragonfly testing set
save_train_test(inpath, dragon_test_path, extension, n_train, -1)

100%|█████████████████████████████████████████████████████████████████████████████| 4600/4600 [00:09<00:00, 484.16it/s]


# Train/Test Damselfly Images

In [None]:
# create damselfly train/test sets
inpath = r'E:\classification_damsel_images\\'
damsel_test_path = r'E:\classification_damsel_images\test\damsel_test.npy'
damsel_train_path = r'E:\classification_damsel_images\train\damsel_train.npy'

# define test set size (25%~)
files = os.listdir(inpath)
n_files = len(files)
n_train = int(n_files * 0.75)

In [16]:
# create dragonfly training set
save_train_test(inpath, damsel_train_path, extension, 0, n_train)

100%|███████████████████████████████████████████████████████████████████████████| 12694/12694 [00:40<00:00, 316.83it/s]


In [17]:
# create damselfly testing set
save_train_test(inpath, damsel_test_path, extension, n_train, -1)

100%|█████████████████████████████████████████████████████████████████████████████| 4229/4229 [00:15<00:00, 281.17it/s]
