In [1]:
from PIL import Image
from glob import glob
from tqdm import tqdm
import numpy as np
import os
import pickle

In [2]:
# load jpgs
def process_images(inpath, outpath, dim_tuple, extension, start=1):
    #open images
    for file in tqdm(glob(f'{inpath}*.{extension}')):
        with Image.open(file) as img:
            #rotate image
            rotated_images = rotate_images(img)
            
            # resize images
            resized = resizing(rotated_images, dim_tuple)
            
            #grayscale images
            gray_images = grayscale(resized)
            
            
            # save the images
            save_preprocessed_images(gray_images, outpath, extension, start)
            start += 3         
             
    return

In [3]:
# rotate images
def rotate_images(image):
    '''
    rotate_images(image):
    This function rotates an image on it's center 7 times (45, 90, 135, 180, 225, 270, and mirror image)
    Input:
        One image file
    Returns:
        A list of images containing the original image and the rotated one
    '''
    rotated_images = []

    rotate180 = image.rotate(180)
    
    chirl_image = image.transpose(Image.FLIP_LEFT_RIGHT)
    
    rotated_images.extend([image, rotate180, chirl_image])  
    return rotated_images

In [4]:
def resizing(images, dim_tuple):
    '''
    resizing(images, dim_tuple):
    This function resizes a list of images
    Input:
        List of images
        Tuple containing the desired hight and the width
    Returns:
        List of resized images        
    '''
    resized = [image.resize(dim_tuple) for image in images]
    return resized

In [5]:
def grayscale(images):
    '''
    grayscale(images):
    This transforms RGB images to grayscale images
    Input:
        List of RBG images
    Returns:
        List of grayscale images
    '''
    gray_images = [image.convert(mode='L') for image in images]
    return gray_images

In [6]:
# save images in a different path
def save_preprocessed_images(processed_images, outpath, extension, start):
    '''
    save_preprocessed_images(processed_images, outpath, extension, start)
    This function saves any type of image in a specific directory
    Input:
        processed_images: List of images
        outpath: Where you want the files to be saved
    Returns:
        Does not return a variable. Creates image files 
    '''
    [image.save(f'{outpath}\\image{i}.{extension}') for i, image in enumerate(processed_images, start)]
    return

In [7]:
def pickle_train_test(inpath, train_path, test_path, extension, n, label):
    '''
    pickle_train_test(inpath, train_path, test_path, extension, n, label):
    This function creates two datasets from the contents of a specific directory
    Input:
        inpath: The path your files are at
        train_path: The path you want to save your train set
        test_path: The path you want to save your test set
        n: Train set size
        label: Label (int)
            
    '''
    images = []
    #open images
    for file in tqdm(glob(f'{inpath}*.{extension}')):
        with Image.open(file) as img:
            np_image = np.asarray(img) / 255
            images.append([np_image, label])
            if len(images) == n:
                with open(train_path, 'wb') as file:
                    pickle.dump(images, file)
                del images
                images = []
            
    with open(test_path, 'wb') as file:
        pickle.dump(images, file)
    del images
    return 

# Process Damselflies Images

In [None]:
# create and save new images
inpath = r'E:\images2019\train_val2019\Damselflies\\'
extension = 'jpg'
outpath = r'E:\processed_damsel_images'
dim_tuple = (256, 256)

# process_images(inpath, outpath, dim_tuple, extension)

# Process Dragonflies Images

In [None]:
inpath = r'E:\images2019\train_val2019\Dragonflies\\'
extension = 'jpg'
outpath = r'E:\processed_dragon_images'
dim_tuple = (256, 256)

# process_images(inpath, outpath, dim_tuple, extension)

# Train/Test Dragonfly Images

In [10]:
inpath = r'E:\processed_dragon_images\\'
test_path = r'E:\processed_dragon_images\test\dragon_test.pkl'
train_path = r'E:\processed_dragon_images\train\dragon_train.pkl'
extension = 'jpg'

files = os.listdir(inpath) # dir is your directory path
n_files = len(files)
n_train = int(n_files * 0.8)
print(f'{n_files}, {n_train}')

# pickle_train_test(inpath, train_path, test_path, extension, n_train, 0)

27740, 22192


100%|████████████████████████████████████████████████████████████████████████████| 27738/27738 [04:42<00:00, 98.07it/s]


# Train/Test Damselfly Images

In [9]:
inpath = r'E:\processed_damsel_images\\'
test_path = r'E:\processed_damsel_images\test\damsel_test.pkl'
train_path = r'E:\processed_damsel_images\train\damsel_train.pkl'
extension = 'jpg'

files = os.listdir(inpath) # dir is your directory path
n_files = len(files)
n_train = int(n_files * 0.8)
print(f'{n_files}, {n_train}')

# pickle_train_test(inpath, train_path, test_path, extension, n_train, 1)

25388, 20310


100%|███████████████████████████████████████████████████████████████████████████| 25386/25386 [04:03<00:00, 104.22it/s]


In [11]:
from sklearn.model_selection import train_test_split
import pickle
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

In [13]:
with open(train_path, "rb") as train_file:
    damsel_train = pickle.load(train_file)

In [None]:
damsel = [np.array(cv2.imread(file)) for file in tqdm(glob(r'E:\processed_damsel_images\*.jpg'))]
print('Got damselflies')

In [None]:
dragon = [np.array(cv2.imread(file)) for file in tqdm(glob(r'E:\processed_dragon_images\*.jpg'))]
print('Got dragonflies')

In [None]:
drag_and_dam = np.concatenate ([damsel, np.ones(len(damsel))], axis=0)

In [None]:
# add labels - 1 for damselfly 0 for dragonfly
damsel = np.array(damsel)
damsel_labels =  np.ones(len(damsel))

dragon = np.array(dragon)
dragon_labels =  np.ones(len(dragon))

# concatanate the labels with the array
labeled_damsels = np.concatenate ([damsel, damsel_labels], axis=0)
labeled_dragons = np.concatenate ([dragon, dragon_labels], axis=0)

In [None]:
labeled_damsels[0]