# Challenge 3

## Preprocessing

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image, ImageOps
import os
from imutils import paths
%matplotlib inline

In [2]:
def processImage(im, desired_size, debug=False):
    # get old size
    old_size = im.size 
    # detect the ratio to which we need to scale the image
    ratio = float(desired_size)/max(old_size)
    # detect the new size of the image to fit the square box
    new_size = tuple([int(x*ratio) for x in old_size])
    # resize to new size
    im = im.resize(new_size, Image.ANTIALIAS)
    # define background color
    bgcolor = 0xffffff
    # create new blank image and "paste" the resized image in the middle of it
    new_im = Image.new("RGB", (desired_size, desired_size), bgcolor)
    new_im.paste(im, ((desired_size-new_size[0])//2,
                    (desired_size-new_size[1])//2))
    
    # do color equalization
    arr = Image.fromarray(np.uint8(new_im))
    new_im = ImageOps.equalize(arr)
    return new_im

In [3]:
def load(imagePaths, verbose = -1):
    data = []
    labels = []

    #loop over input images
    for (i, imagePath) in enumerate(imagePaths):
        # assumes our path has the following format:
        # /path/to/dataset/{class}/{image}.jpg
        image = Image.open(imagePath)
        label = imagePath.split(os.path.sep)[-2]
        
        #preprocess
        image = processImage(image, 128)
        
        data.append(np.asarray(image))
        labels.append(label)

        # show an update every 'verbose' images
        if verbose > 0 and i > 0 and (i+1) % verbose == 0:
            print('[INFO] processed {}/{}'.format(i + 1, len(imagePaths)))
        
    return (np.array(data), np.array(labels))

In [4]:
imagePaths = list(paths.list_images('gear_images'))
(data, labels) = load(imagePaths, verbose=100)

print("Before reshaping: ", data.shape)
size = 128 * 128 * 3
data = data.reshape(data.shape[0], size)
print("After reshaping: ", data.shape)

[INFO] processed 100/2122
[INFO] processed 200/2122
[INFO] processed 300/2122
[INFO] processed 400/2122
[INFO] processed 500/2122
[INFO] processed 600/2122
[INFO] processed 700/2122
[INFO] processed 800/2122
[INFO] processed 900/2122
[INFO] processed 1000/2122
[INFO] processed 1100/2122
[INFO] processed 1200/2122
[INFO] processed 1300/2122
[INFO] processed 1400/2122
[INFO] processed 1500/2122
[INFO] processed 1600/2122
[INFO] processed 1700/2122
[INFO] processed 1800/2122
[INFO] processed 1900/2122
[INFO] processed 2000/2122
[INFO] processed 2100/2122
Before reshaping:  (2122, 128, 128, 3)
After reshaping:  (2122, 49152)


In [5]:
data.shape

(2122, 49152)

In [6]:
labels.shape

(2122,)