## Load and predict

Loads a trained model and predicts the classes of the test set

In [1]:
from keras.models import load_model
from skimage.transform import resize
import csv
import h5py
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from skimage import segmentation
from skimage.morphology import watershed
from skimage import measure
from skimage import morphology
import numpy as np
import pandas as pd
import os
%matplotlib inline

Using TensorFlow backend.


## Set global variables and model hyper-parameters

In [11]:
DATA_LOCATION = '../data/'
TEST_IMAGES_LOCATION = '../data/test_images/'
IMAGE_SIZE = 48
N_CLASSES = 121

filenames = [i for i in os.listdir('../data/test_images') if i.endswith('.jpg')]

labels = pd.DataFrame(filenames, columns=['image'])
labels['class'] = -1

## Global helper methods

In [3]:
def get_padding(i):
    """
    Helper function for getting right padding sizes
    input:
        - i: positive integer gotten from substracting height and width of an image
    output:
        - Tuple representing the correct padding
    """
    if i%2 == 0:
        return (int(i/2),int(i/2))
    else:
        return (int(i/2-.5), int(i/2+.5))

def pad_image(img):
    """
    Add padding to image to make it square
    input:
        - img: numpy array (2D) representing image
    output:
        - padded array of shape (N,N)
    """
    H, W = img.shape
    if H == W:
        return img
    elif H > W:
        return np.pad(img, ((0,0), get_padding(H-W)), 'constant')
    else:
        return np.pad(img, (get_padding(W-H), (0,0)), 'constant')

def resize_image(img):
    """
    Resize image to new square shape
    input:
        - img: numpy array (2D) representing image
        - size: final shape of image in pixels (integer)
    """
    return resize(img, (IMAGE_SIZE,IMAGE_SIZE), mode='reflect')

def getLargestRegion(array):
            """
    Gets largest cluster of pixels and removes other small, irrelevant clusters
    input:
        - array: numpy array (2D) representing an image
    output:
        - numpy array of image with small clusters removed
    """
    # Threshold image
    imthr = np.where(array < np.mean(array),0.,1.0)

    # Dilate image
    imdilated = morphology.dilation(imthr, np.ones((2,2)))
    
    # Label sections
    labels = measure.label(imdilated)
    labels = imthr*labels
    labels = labels.astype(int)

    # Get largest region
    regionmaxprop = None
    regions = measure.regionprops(labels)
    for regionprop in regions:
        # check to see if the region is at least 50% nonzero
        if regionmaxprop is None:
            regionmaxprop = regionprop
        if regionmaxprop.filled_area < regionprop.filled_area:
            regionmaxprop = regionprop
            
    return np.where(labels == regionmaxprop.label,1.0,0.0)

## Load model

In [4]:
model = load_model('../data/output/models/modellabeled.h5')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 48, 48, 64)        640       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 24, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 24, 24, 32)        18464     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 12, 12, 32)        9248      
_________________________________________________________________
dropout_1 (Dropout)          (None, 12, 12, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 4608)              0         
__________

## Process test images

Transform test images to arrays to fit the trained model

In [14]:
total = len(filenames)
for i in range(total):
    # read and transform image to usable format
    img = mpimg.imread(TEST_IMAGES_LOCATION + filenames[i])
    img = np.absolute(np.divide(img.astype(float), 255) - 1.0)
    img = resize_image(pad_image(img))
    # uncomment next line if ConvLayer in Model 
    img = img.reshape(1,IMAGE_SIZE,IMAGE_SIZE,1)
    # uncomment next line if no ConvLayer in Model
    # img = img.flatten().reshape([-1,4096])
    
    labels.loc[labels['image'] == filenames[i], 'class'] = model.predict_classes(img, verbose=0)[0]

labels.sort_values(by='class')
labels['class'] = labels['class'].astype(int)
labels.sample(n=5)

Unnamed: 0,image,class
2974,26227.jpg,45
304,107714.jpg,2
2585,17065.jpg,112
1893,14452.jpg,34
1794,141757.jpg,27


In [15]:
labels.to_csv('../data/output/predictions/modellabel.csv', index=False)