## Bounding Box Model

In [1]:
# Suppress annoying stderr output when importing keras.
import sys
old_stderr = sys.stderr
sys.stderr = open('/dev/null', 'w')
import keras
sys.stderr = old_stderr

## Read the cropping dataset

In [2]:
with open('../data/cropping.txt', 'rt') as f: 
    data = f.read().split('\n')[:-1]
    data = [line.split(',') for line in data]
    data = [(p, [(int(coord[i]), int(coord[i+1])) for i in range(0, len(coord), 2)]) for p, *coord in data]
data[0]

SyntaxError: invalid syntax (<ipython-input-2-b70f637dafb2>, line 4)

In [2]:
from PIL import Image as pil_image
from PIL.ImageDraw import Draw
from os.path import isfile

def expand_path(p):
    if isfile('data/train/' + p):
        return 'data/train/' + p
    if isfile('data/test/' + p):
        return 'data/test' + p

def read_raw_image(p):
    print(p)
    p = expand_path(p)
    print(p)
    return pil_image.open(expand_path(p))

def draw_dot(draw, x, y):
    draw.ellipse(((x - 5,  y - 5)), fill='red', outline='red')

def draw_dots(draw, coordinates):
    for x, y in coordinates:
        draw_dot(draw, x, y)

def bounding_rectangle(boxes):
    x0, y0 = boxes[0]
    x1, y1 = x0, y0
    for x, y in boxes[1:]:
        x0 = min(x0, x)
        y0 = min(y0, x)
        x1 = max(x1, x)
        y1 = max(y1, y)
    return x0, y0, x1, y1

In [11]:
filename, coordinates = data[0]
box = bounding_rectangle(coordinates)
img = read_raw_image(filename)
draw = Draw(img)
draw_dots(draw, coordinates)
draw.rectangle(box, outline='red')
img

## Image preprocessing code

### Images are preprocessed by:
1. Converting to black&white;
2. Compressing horizontally by a factor of 2.15(the mean aspect ratio)
3. Apply a random image transformation(only for training)
4. Resizing to 128x128;
5. Normalizing to zero mean and unit variance.

In [3]:
# Define useful constants
img_shape = (128, 128, 1)
anisotropy = 2.15

In [4]:
import random
import numpy as np
from scipy.ndimage import affine_transform
from keras.preprocessing.image import img_to_array

# Read an image as black&white numpy array
def read_array(p):
    img = read_raw_image(p).convert('L')
    return img_to_array(img)

def build_transform(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    rotation = np.deg2rad(rotation)
    shear = np.deg2rad(shear)
    rotation_matrix = np.array([[np.cos(rotation), np.sin(rotation), 0],
                                [-np.sin(rotation), np.cos(rotation), 0],
                                [0, 0, 1]])
    shift_matrix = np.array([[1, 0, height_shift], [0, 1, width_shift], [0, 0, 1]])
    shear_matrix = np.array([[1, np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]])
    zoom_matrix = np.array([[1.0/height_zoom, 0, 0], [0, 1, -width_shift], [0, 0, 1]])
    return np.dot(np.dot(rotation_matrix, shear_matrix), np.dot(zoom_matrix, shift_matrix))


# Compute the coordinate transformation required to center the pictures, padding as required.
def center_transform(affine, input_shape):
    hi, wi = float(input_shape[0]), float(input_shape[1])
    ho, wo = float(img_shape[0]), float(img_shape[1])
    top, left, bottom, right = 0, 0, hi, wi
    if wi/hi/anisotropy < wo/ho:   # input image too narrow, extend width
        w = hi*wo/ho*anisotropy
        left = (wi - w)/2
        right = left + w
    else: # input image too wide, extend height
        h = wi*ho/wo/anisotropy
        top = (hi-h)/2
        bottom = top + h
    center_matrix = np.array([[1, 0, -h0/2], [0, 1, wo/2], [0, 0, 1]])
    scale_matrix = np.array([[(bottom - top)/ho, 0, 0], [0, (right - left)/wo, 0], [0, 0, 1]])
    decent_matrix = np.array([[1, 0, hi/2], [0, 1, wi/2], [0, 0, 1]])
    return np.dot(np.dot(decent_matrix, scale_matrix), np.dot(affine, center_matrix))

# Apply an affine tranformation to an image represented as a numpy array.
def transform_img(x, affine):
    matrix = affine[:2, :2]
    offset = affine[:2, 2]
    x = np.moveaxis(x, -1, 0)
    channels = [affine_transform(channel, matrix, offset, output_shape=img_shape[:-1], order=1,
                                mode='constant', cval=np.average(channel)) for channel in x]
    return np.moveaxis(np.stack(channels, axis=0), 0, -1)

# Read an image for validation, i.e. without data augmentation.
def read_for_validation(p):
    x = read_array(p)
    t = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
    t = center_transform(t, x.shape)
    x = transform_img(x, t)
    x -= np.mean(x, keepdims=True)
    x /= np.std(x, keepdims=True) + K.epsilon()
    return x, t

# Read an image for training, i.e. including a random affine transformation
def read_for_training(p):
    x = read_array(p)
    t = build_transform(
        random.uniform(-5, 5),
        random.uniform(-5, 5),
        random.uniform(0.9, 1.0), 
        random.uniform(0.9, 1.0), 
        random.uniform(-0.05*img_shape[0], 0.05*img_shape[0]), 
        random.uniform(-0.05*img_shape[1], 0.05*img_shape[1]))
    t = center_transform(t, x.shape)
    x = transform_img(x, t)
    x -= np.mean(x, keepdims=True)
    x /= np.std(x, keepdims=True) + K.epsilon()
    return x, t

# Tranform coordinates according to the provided affine transformation
def coord_transorm(boxes, trans):
    result = []
    for x, y in boxes:
        y, x, _ = trans.dot([y, x, 1]).astype(np.int)
        result.append((x, y))
    return result

## Keras Model

In [10]:
from keras.engine.topology import Input
from keras.layers import BatchNormalization, Concatenate, Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from keras.models import Model


def conv_bn_dp(x, with_dropout, conv_drop):
    x = Conv2D(filters=64, kernel_size=(2, 2), activation='relu', padding='same', strides=2)(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    if with_dropout:
#       x = Dropout(conv_drop, noise_shape=(None, 1, 1, int(x.shape[-1])))(x)
        x = Dropout(conv_drop)(x)
    return x

def build_model(with_dropout=True):
    kwargs = {'activation': 'relu', 'padding': 'same'}
    conv_drop = 0.2
    dense_drop = 0.
    inp = Input(shape=img_shape)
    
    x = inp
    
    x = Conv2D(filters=64, kernel_size=(9, 9), activation='relu', padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    if with_dropout:
        x = Dropout(conv_drop)(x)
    
    for i in range(5):
        x = conv_bn_dp(x, with_dropout, conv_drop)
    
    h = MaxPooling2D(pool_size=(1, int(x.shape[2])))(x)
    h = Flatten()(h)
    if with_dropout:
        h = Dropout(dense_drop)(h)
    h = Dense(16, activation='relu')(h)
    
    v = MaxPooling2D(pool_size=(int(x.shape[1]), 1))(x)
    v = Flatten()(v)
    if with_dropout:
        v = Dropout(dense_drop)(v)
    v = Dense(16, activation='relu')(v)
    
    x = Concatenate()([h, v])
    if with_dropout:
        x = Dropout(0.5)(x)
    x = Dense(4, activation='linear')(x)
    return Model(inp, x)

model = build_model(with_dropout=True)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 128, 128, 1)  0                                            
__________________________________________________________________________________________________
conv2d_68 (Conv2D)              (None, 128, 128, 64) 5248        input_6[0][0]                    
__________________________________________________________________________________________________
conv2d_69 (Conv2D)              (None, 128, 128, 64) 36928       conv2d_68[0][0]                  
__________________________________________________________________________________________________
batch_normalization_26 (BatchNo (None, 128, 128, 64) 256         conv2d_69[0][0]                  
__________________________________________________________________________________________________
dropout_41