### Import Modules

In [None]:
from luma.preprocessing.scaler import StandardScaler
from luma.reduction.linear import PCA
from luma.model_selection.split import TrainTestSplit
from luma.model_selection.search import GridSearchCV
from luma.classifier.svm import KernelSVC
from luma.visual.result import ConfusionMatrix

from sklearn.datasets import fetch_lfw_people
import matplotlib.pyplot as plt
import numpy as np

### Load Dataset

In [None]:
lfw_people = fetch_lfw_people(min_faces_per_person=100, resize=0.4)
X = lfw_people.data
y = lfw_people.target

lfw_people.images.shape, X.shape, y.shape

### Split Train and Test Sets

In [None]:
X_train, X_test, y_train, y_test = TrainTestSplit(X, y, 
                                                  test_size=0.2, 
                                                  random_state=42).get
X_train.shape, X_test.shape

### Scale Through `StandardScaler`

In [None]:
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.fit_transform(X_test)

### Dimensionality Reduction Through `PCA`

In [None]:
pca = PCA(n_components=150)
pca.fit(X_train)

X_train_pca = pca.transform(X_train_std)
X_test_pca = pca.transform(X_test_std)

### Tuning Hyperparameters of `KernelSVC` with `GridSearchCV`

In [None]:
param_grid = {'C': np.logspace(0, 4, 4),
              'gamma': np.logspace(-4, -1, 4),
              'kernel': 'rbf'}

grid = GridSearchCV(estimator=KernelSVC(),
                    param_grid=param_grid,
                    cv=5,
                    refit=True,
                    random_state=42,
                    verbose=True)

grid.fit(X_train_pca, y_train)

### Score Heatmap of Searching Process

### Extract Best Model

In [None]:
svc = grid.best_model
grid.best_params, grid.best_score

### Evaluate Model

#### 1. Learning Curve of optimized `KernelSVC`

#### 2. Confusion Matrix

In [None]:
conf = ConfusionMatrix(y_true=y_test, y_pred=svc.predict(X_test_pca))
conf.plot(show=True)

#### 3. Actual Prediction Comparison

In [None]:
def plot_gallery(images, titles, h, w, n_row, n_col):
    plt.figure(figsize=(1.5 * n_col, 2 * n_row))
    plt.subplots_adjust(bottom=0, left=0.01, right=0.99, top=0.90, hspace=0.35)
    
    for i in range(n_row * n_col):
        plt.subplot(n_row, n_col, i + 1)
        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title(titles[i], size=12)
        plt.xticks(())
        plt.yticks(())
        
    plt.tight_layout()
    plt.show()

In [None]:
def get_title(pred, true, names, i):
    pred_name = names[pred[i]].rsplit(' ', 1)[-1]
    true_name = names[true[i]].rsplit(' ', 1)[-1]
    return 'pred: %s\ntrue: %s' % (pred_name, true_name)

In [None]:
test_pred = svc.predict(X_test_pca)
titles = [
    get_title(test_pred, y_test, lfw_people.target_names, i) 
    for i in range(y_test.shape[0])
]

plot_gallery(X_test, titles, *lfw_people.images.shape[1:], 4, 6)