This example tests several regressors on a set of faces. Basically it tries to predict how the lower part of a face is depending on the top part.

In [14]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_olivetti_faces
from sklearn.utils.validation import check_random_state

from sklearn.ensemble import ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
from sklearn import cross_validation

# Load the faces datasets
data = fetch_olivetti_faces()
targets = data.target
originaldata = data.images

# images have a shape of 64*64. The next line flattens them to a single array of pixels
data = data.images.reshape((len(data.images), -1))

In [26]:
train, test = cross_validation.train_test_split(
    data, test_size=0.4, random_state=0)

# Test on a subset of people
n_faces = 7
rng = check_random_state(4)
face_ids = rng.randint(test.shape[0], size=(n_faces, ))
test = test[face_ids, :]

n_pixels = data.shape[1]
X_train = train[:, :np.ceil(0.5 * n_pixels)]  # Upper half of the faces
y_train = train[:, np.floor(0.5 * n_pixels):]  # Lower half of the faces
X_test = test[:, :np.ceil(0.5 * n_pixels)]
y_test = test[:, np.floor(0.5 * n_pixels):]

# Fit estimators
ESTIMATORS = {
    "True faces" : None, #line to print the true face
    "Extra trees": ExtraTreesRegressor(n_estimators=10, max_features=32,
                                       random_state=0),
    "K-nn": KNeighborsRegressor(),
    "Linear regression": LinearRegression(),
    "Ridge": RidgeCV(),
}

# for each estimator, train and predict the estimations for the lower part
y_test_predict = dict()

y_test_predict["True faces"] = y_test
for name, estimator in ESTIMATORS.items():
    if estimator is not None:
        estimator.fit(X_train, y_train)
        y_test_predict[name] = estimator.predict(X_test)

# Plot the completed faces
image_shape = (64, 64)

# Number of columns to print, 1 true face + 1 for each estimator
n_cols = len(ESTIMATORS)
plt.figure(figsize=(2. * n_cols, 2.26 * n_faces))
plt.suptitle("Face completion with multi-output estimators", size=16)

def printImage(pixels, subplot):
    subplot.axis("off")
    subplot.imshow(pixels.reshape(image_shape),
               cmap=plt.cm.gray,
               interpolation="nearest")
    
# plot each face and their 
for i in range(n_faces):
    
    # Show the rest of the faces
    for j, est in enumerate(ESTIMATORS):
        completed_face = np.hstack((X_test[i], y_test_predict[est][i]))

        if i:
            sub = plt.subplot(n_faces, n_cols, i * n_cols + 1 + j)

        else:
            sub = plt.subplot(n_faces, n_cols, i * n_cols + 1 + j,
                              title=est)

        printImage(completed_face, sub)

plt.show()

