In [31]:
import os
import numpy as np
from PIL import Image

def load_orl_faces(path):
    data, labels = [], []
    for person in range(1, 41):
        folder = os.path.join(path, f"s{person}")
        for img_name in os.listdir(folder):
            img_path = os.path.join(folder, img_name)
            img = Image.open(img_path).convert("L")
            img = img.resize((92, 112))
            data.append(np.array(img).flatten())
            labels.append(person)
    return np.array(data), np.array(labels)

path = "AT&T Database of Faces"
X, y = load_orl_faces(path)
print(X.shape, y.shape)


(400, 10304) (400,)


In [32]:
def normaliser(data):
    # data = data.astype(np.float64)

    # n, d  = data.shape
    # muo, std = [], []
    # for i in range(d):
    #     sm, sm_sq = 0, 0
    #     for j in range(n):
    #         sm+= data[j, i]
    #         sm_sq += data[j,i]**2

    #     mu = sm/n
    #     sigma = (sm_sq/n - mu**2)**0.5
    #     muo.append(mu)
    #     std.append(sigma if sigma else 1)
    
    # for i in range(d):
    #     for j in range(n):
    #         data[j, i] = (data[j, i] - muo[i])/std[i]
    
    # return np.array(data)
    muo = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    data = (data - muo) / std
    return data
        
def covariance_matrix(data):
    cov_matrix = np.dot(data.T, data) / (data.shape[0])
    return cov_matrix


In [33]:
X = normaliser(X)
C = covariance_matrix(X)

from scipy.sparse.linalg import eigsh
k = 30
eigvals, eigvecs = eigsh(C, k=k, which='LM')

idx = np.argsort(eigvals)[::-1]
eigvals, eigvecs = eigvals[idx], eigvecs[:, idx]

In [34]:
Z = X @ eigvecs 
print(Z.shape)


(400, 30)


In [35]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(Z, y, test_size=0.2, random_state=42)

def predict_1nn(X_train, y_train, X_test):
    preds = []
    for x in X_test:
        dists = np.linalg.norm(X_train - x, axis=1)
        idx = np.argmin(dists)
        preds.append(y_train[idx])

    return np.array(preds)

def mode(labels):
    vals, counts = np.unique(labels, return_counts=True)
    return vals[np.argmax(counts)]

def predict_knn(X_train, y_train, X_test, k=1):
    preds = []
    for x in X_test:
        dists = np.linalg.norm(X_train - x, axis=1)
        idx = np.argsort(dists)[:k]
        nearest_labels = y_train[idx]
        guess = mode(nearest_labels)
        preds.append(guess)

    return np.array(preds)

y_pred = predict_knn(X_train, y_train, X_test)
acc = np.mean(y_pred == y_test)
print(f"Accuracy: {acc}")


Accuracy: 0.95
