#  Kaggle Facial Keypoints Detection

In [None]:
import os

import numpy as np
from pandas.io.parsers import read_csv
from sklearn.utils import shuffle
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD
from matplotlib import pyplot
%matplotlib inline

FTRAIN = 'training.csv'
FTEST = 'test.csv'

### Define the load function

In [None]:
def load(test=False, cols=None):
    """If test is true, we load the data from FTEST.
    If test is False, we load the data from FTRAIN.
    """

    fname = FTEST if test else FTRAIN
    df = read_csv(os.path.expanduser(fname)) # use pandas dataframe

    # convert the pixel value to numpy array
    df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))

    if cols:  
        df = df[list(cols) + ['Image']]

    print(df.count()) 
    df = df.dropna()  # drop the missing row

    X = np.vstack(df['Image'].values) / 255.  # convert the value from 0 to 1
    X = X.astype(np.float32)

    if not test:  # Only FTRAIN has labbels
        y = df[df.columns[:-1]].values
        y = (y - 48) / 48  # convert the value from 0 to 1
        X, y = shuffle(X, y, random_state=42)  # shuffle the data
        y = y.astype(np.float32)
    else:
        y = None

    return X, y

X, y = load()
print("X.shape == {}; X.min == {:.3f}; X.max == {:.3f}".format(
    X.shape, X.min(), X.max()))
print("y.shape == {}; y.min == {:.3f}; y.max == {:.3f}".format(
    y.shape, y.min(), y.max()))

### Model 1: One Hidden layer

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD

model = Sequential()
model.add(Dense(100, input_dim=9216)) # 96x96 input pixels per batch, number of units in hidden layer
model.add(Activation('relu'))
model.add(Dense(30)) # 30 target values

sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd,loss='mse')
# SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)
hist = model.fit(X, y, nb_epoch=100, validation_split=0.2)

### Learning Curve

In [None]:
from matplotlib import pyplot

pyplot.plot(hist.history['loss'], linewidth=3, label='train')
pyplot.plot(hist.history['val_loss'], linewidth=3, label='valid')
pyplot.grid()
pyplot.legend()
pyplot.xlabel('epoch')
pyplot.ylabel('loss')
pyplot.ylim(1e-3, 1e-2)
pyplot.yscale('log')
pyplot.show()

### Confirm the keypoint

In [None]:
def plot_sample(x, y, axis):
    img = x.reshape(96, 96)
    axis.imshow(img, cmap='gray')
    axis.scatter(y[0::2] * 48 + 48, y[1::2] * 48 + 48, marker='x', s=10)

X_test, _ = load(test=True)
y_test = model.predict(X_test)

fig = pyplot.figure(figsize=(6, 6))
fig.subplots_adjust(
    left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

for i in range(16):
    axis = fig.add_subplot(4, 4, i+1, xticks=[], yticks=[])
    plot_sample(X_test[i], y_test[i], axis)

pyplot.show()

### Save  the  model 1

In [None]:
from keras.models import model_from_json

json_string = model.to_json()
open('model1_architecture.json', 'w').write(json_string)
model.save_weights('model1_weights.h5')


### Load the model 1

In [None]:
# Load the train data
model = model_from_json(open('model1_architecture.json').read())
model.load_weights('model1_weights.h5')

## Model 2: Convolutional Neural Network

In [None]:
def load2d(test=False, cols=None):
    X, y = load(test, cols)
    X = X.reshape(-1, 1, 96, 96)
    return X, y

### Build the model

- convolutional layer: 3 layers

In [None]:
from keras.layers import Convolution2D, MaxPooling2D, Flatten

X, y = load2d()
model2 = Sequential()

model2.add(Convolution2D(32, 3, 3, input_shape=(1, 96, 96), dim_ordering='th'))
model2.add(Activation('relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))

model2.add(Convolution2D(64, 2, 2))
model2.add(Activation('relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))

model2.add(Convolution2D(128, 2, 2))
model2.add(Activation('relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))

model2.add(Flatten())
model2.add(Dense(500))
model2.add(Activation('relu'))
model2.add(Dense(500))
model2.add(Activation('relu'))
model2.add(Dense(30))

sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)
model2.compile(optimizer=sgd, loss='mse')
hist2 = model2.fit(X, y, nb_epoch=1000, validation_split=0.2)

### Save the model2

In [None]:
json_string = model.to_json()
open('model2_architecture.json', 'w').write(json_string)
model.save_weights('model2_weights.h5')

### Load the model 2

In [None]:
# Load the train data
model = model_from_json(open('model2_architecture.json').read())
model.load_weights('model2_weights.h5')

### Confirm the keypoint

In [None]:
sample1 = load(test=True)[0][6:7]
sample2 = load2d(test=True)[0][6:7]
y_pred1 = model.predict(sample1)[0]
y_pred2 = model2.predict(sample2)[0]

fig = pyplot.figure(figsize=(6, 3))
ax = fig.add_subplot(1, 2, 1, xticks=[], yticks=[])
plot_sample(sample1, y_pred1, ax)
ax = fig.add_subplot(1, 2, 2, xticks=[], yticks=[])
plot_sample(sample2, y_pred2, ax)
pyplot.show()

### Data Augmentation

**Flipping the image**

In [None]:
X, y = load2d()
X_flipped = X[:, :, :, ::-1]

fig = pyplot.figure(figsize=(6, 3))
ax = fig.add_subplot(1, 2, 1, xticks=[], yticks=[])
plot_sample(X[1], y[1], ax)
ax = fig.add_subplot(1, 2, 2, xticks=[], yticks=[])
plot_sample(X_flipped[1], y[1], ax)
pyplot.show()

**Flipping the keypoint**

In [None]:
flip_indices = [
    (0, 2), (1, 3),
    (4, 8), (5, 9), (6, 10), (7, 11),
    (12, 16), (13, 17), (14, 18), (15, 19),
    (22, 24), (23, 25),
    ]

df = read_csv(os.path.expanduser(FTRAIN))
for i, j in flip_indices:
    print("{} -> {}".format(df.columns[i], df.columns[j]))

** Flipped Image Data Generator Class**

In [None]:
class FlippedImageDataGenerator(ImageDataGenerator):
    flip_indices = [(0, 2), (1, 3),(4, 8), (5, 9), (6, 10), (7, 11),(12, 16), (13, 17), (14, 18), (15, 19),(22, 24), (23, 25),]

    def next(self):
        X_batch, y_batch = super(FlippedImageDataGenerator, self).next()
        batch_size = X_batch.shape[0]
        indices = np.random.choice(batch_size, batch_size/2, replace=False)
        X_batch[indices] = X_batch[indices, :, :, ::-1]

        if y_batch is not None:
            # flip the x coordinate
            y_batch[indices, ::2] = y_batch[indices, ::2] * -1

            # e.g.)left_eye_center_x -> right_eye_center_x
            for a, b in self.flip_indices:
                y_batch[indices, a], y_batch[indices, b] = (y_batch[indices, b], y_batch[indices, a])

        return X_batch, y_batch

In [None]:
from sklearn.cross_validation import train_test_split

X, y = load2d()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

model3 = Sequential()

model3.add(Convolution2D(32, 3, 3, input_shape=(1, 96, 96)))
model3.add(Activation('relu'))
model3.add(MaxPooling2D(pool_size=(2, 2)))

model3.add(Convolution2D(64, 2, 2))
model3.add(Activation('relu'))
model3.add(MaxPooling2D(pool_size=(2, 2)))

model3.add(Convolution2D(128, 2, 2))
model3.add(Activation('relu'))
model3.add(MaxPooling2D(pool_size=(2, 2)))

model3.add(Flatten())
model3.add(Dense(500))
model3.add(Activation('relu'))
model3.add(Dense(500))
model3.add(Activation('relu'))
model3.add(Dense(30))

sgd = SGD(lr='0.01', momentum=0.9, nesterov=True)
model3.compile(loss='mean_squared_error', optimizer=sgd)
flipgen = FlippedImageDataGenerator()
hist3 = model3.fit_generator(flipgen.flow(X_train, y_train),samples_per_epoch=X_train.shape[0],nb_epoch=3000,validation_data=(X_val, y_val))