In [1]:
#import necessary libraries
from sklearn.datasets import fetch_olivetti_faces
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score


In [2]:
faces = fetch_olivetti_faces()

X = faces.data
y = faces.target

# Split the data into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)


In [3]:
# Train an SVM classifier on the original features
svm_clf = SVC(random_state=42)
svm_clf.fit(X_train, y_train)

# Evaluate the SVM classifier on the validation set
y_pred_val = svm_clf.predict(X_val)
accuracy_val = accuracy_score(y_val, y_pred_val)
print("SVM Classifier - Validation Accuracy:", accuracy_val)




SVM Classifier - Validation Accuracy: 0.925


In [4]:
# Search for the number of clusters that provides the best performance
best_accuracy = 0.0
best_k = None

k_values = range(2, 21)

for k in k_values:
    kmeans = KMeans(n_clusters=k, random_state=42)
    X_train_reduced = kmeans.fit_transform(X_train)
    X_val_reduced = kmeans.transform(X_val)

    svm_clf_reduced = SVC(random_state=42)
    svm_clf_reduced.fit(X_train_reduced, y_train)

    y_pred_val_reduced = svm_clf_reduced.predict(X_val_reduced)
    accuracy = accuracy_score(y_val, y_pred_val_reduced)

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_k = k

print("Best Number of Clusters:", best_k)
print("Best Validation Accuracy (Dimensionality Reduction):", best_accuracy)



Best Number of Clusters: 19
Best Validation Accuracy (Dimensionality Reduction): 0.575


In [5]:
import numpy as np
# Append the features from the reduced set to the original features
kmeans_best = KMeans(n_clusters=best_k, random_state=42)
X_train_reduced_best = kmeans_best.fit_transform(X_train)
X_val_reduced_best = kmeans_best.transform(X_val)
X_train_augmented = np.concatenate((X_train, X_train_reduced_best), axis=1)
X_val_augmented = np.concatenate((X_val, X_val_reduced_best), axis=1)




In [6]:
# Train a classifier on the augmented features
svm_clf_augmented = SVC(random_state=42)
svm_clf_augmented.fit(X_train_augmented, y_train)


In [7]:
# Evaluate the augmented classifier on the validation set
y_pred_val_augmented = svm_clf_augmented.predict(X_val_augmented)
accuracy_val_augmented = accuracy_score(y_val, y_pred_val_augmented)
print("SVM Classifier (with Augmented Features) - Validation Accuracy:", accuracy_val_augmented)


SVM Classifier (with Augmented Features) - Validation Accuracy: 0.675
