In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score, KFold
from sklearn.svm import SVC
from sklearn import metrics
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

lfw_people = fetch_lfw_people(min_faces_per_person=105, resize=0.4)
data = lfw_people.data
target = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]
print("Data shape:", data.shape)
print("Number of classes:", n_classes)

def show_10_distinct_people(images, unique_ids, image_shape):
    fig, axarr = plt.subplots(nrows=2, ncols=5, figsize=(15, 6))
    axarr = axarr.flatten()
    for i, unique_id in enumerate(unique_ids[:10]):
        image_index = unique_id
        axarr[i].imshow(images[image_index].reshape(image_shape), cmap="gray")
        axarr[i].set_xticks([])
        axarr[i].set_yticks([])
        axarr[i].set_title("Person id: {}".format(unique_id))
    plt.suptitle("Showing 10 distinct people")
    plt.show()

show_10_distinct_people(data, np.unique(target), lfw_people.images[0].shape)

X = data
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size=0.3, stratify=target, random_state=42)
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

y_frame = pd.DataFrame()
y_frame['subject ids'] = y_train
y_frame.groupby(['subject ids']).size().plot.bar(figsize=(15, 8), title="Number of Samples for Each Class")
n_components = 150
pca = PCA(n_components=n_components, whiten=True).fit(X_train)
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
fig, ax = plt.subplots(1, 1, figsize=(8, 8))
ax.imshow(pca.mean_.reshape(lfw_people.images[0].shape), cmap="gray")
ax.set_xticks([])
ax.set_yticks([])
ax.set_title('Average Face')
eigen_faces = pca.components_.reshape((n_components, lfw_people.images[0].shape[0], lfw_people.images[0].shape[1]))
cols = 5
rows = int(n_components / cols)
fig, axarr = plt.subplots(nrows=rows, ncols=cols, figsize=(18, 36))
axarr = axarr.flatten()
for i in range(n_components):
    axarr[i].imshow(eigen_faces[i], cmap="gray")
    axarr[i].set_xticks([])
    axarr[i].set_yticks([])
    axarr[i].set_title("eigen id: {}".format(i + 1))
plt.suptitle("Eigenfaces")
clf = SVC(kernel='rbf', class_weight='balanced')
clf.fit(X_train_pca, y_train)
y_pred = clf.predict(X_test_pca)
plt.figure(1, figsize=(12, 8))
sns.heatmap(metrics.confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap="Blues")
plt.title("Confusion Matrix for SVM on Test Set")
plt.show()
print(metrics.classification_report(y_test, y_pred, target_names=target_names))
kfold = KFold(n_splits=5, shuffle=True, random_state=0)
cv_scores = cross_val_score(clf, pca.transform(X), target, cv=kfold)
print("Mean cross-validation score: {:.2f}".format(cv_scores.mean()))
print("Final accuracy score: {:.2f}".format(metrics.accuracy_score(y_test, y_pred)))


ModuleNotFoundError: No module named 'numpy'