In [4]:
import os
import numpy as np
from PIL import Image
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

def load_data():
    data = []
    label = []
    for i in range(1, 41):
        for j in range(1, 11):
            img_path = f"./data/ORL人脸数据库/s{i}/{j}.bmp"
            img = np.array(Image.open(img_path))
            data.append(img.flatten())
            label.append(i)
            
    return np.array(data), np.array(label)    


def train(X_train, y_train, n_neighbors):
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(X_train, y_train)
    return knn

def test(knn, X_test, y_test):
    score = knn.score(X_test, y_test)
    return score


all_data, all_label = load_data()
n_components = [25, 50, 100, 150, 200, 250, 300, 350]
K = [1, 2, 3 ,4 ,5, 6, 7 ,8, 9, 10]
test_accuracy = []
for c in n_components:
    
    pca = PCA(n_components=c)
    pca.fit(all_data)
    reduced_data = pca.transform(all_data)

    X_train, X_test, y_train, y_test = train_test_split(reduced_data, 
                                                        all_label, 
                                                        test_size=0.3, random_state=42)
    
    for k in K:
        knn = train(X_train, y_train, k)
        score = test(knn, X_test, y_test)
        test_accuracy.append(score)

test_accuracy = np.around(np.array(test_accuracy), decimals=4)
test_accuracy = test_accuracy.reshape((-1, len(K)))
print(test_accuracy)    
    

[[0.9833 0.9083 0.8917 0.8917 0.85   0.8417 0.7917 0.7667 0.7417 0.6833]
 [0.9833 0.9083 0.9    0.8833 0.85   0.825  0.8083 0.7583 0.725  0.7083]
 [0.9833 0.9083 0.8917 0.8667 0.8167 0.8167 0.8167 0.75   0.7083 0.6833]
 [0.975  0.9    0.8833 0.85   0.8167 0.8167 0.8    0.7583 0.7167 0.6917]
 [0.9833 0.8917 0.875  0.85   0.8167 0.8167 0.8    0.725  0.7083 0.6833]
 [0.9833 0.9    0.8917 0.8583 0.825  0.825  0.7917 0.725  0.7083 0.675 ]
 [0.9833 0.9    0.8833 0.8583 0.825  0.825  0.7917 0.7333 0.7083 0.6833]
 [0.9833 0.9    0.8833 0.8583 0.8333 0.8167 0.7917 0.7333 0.7167 0.6917]]


In [5]:
n_components = [25, 50, 100, 150, 200, 250, 300, 350]
K = [1, 2, 3 ,4 ,5, 6, 7 ,8, 9, 10]
test_accuracy = []
for c in n_components:
    
    pca = PCA(n_components=c)
    pca.fit(all_data)
    reduced_data = pca.transform(all_data)

    X_train, X_test, y_train, y_test = train_test_split(reduced_data, 
                                                        all_label, 
                                                        test_size=0.4, random_state=42)
    
    for k in K:
        knn = train(X_train, y_train, k)
        score = test(knn, X_test, y_test)
        test_accuracy.append(score)

test_accuracy = np.around(np.array(test_accuracy), decimals=4)
test_accuracy = test_accuracy.reshape((-1, len(K)))
print(test_accuracy)

[[0.9375 0.8562 0.8562 0.8312 0.8125 0.7812 0.7375 0.7    0.6562 0.6375]
 [0.9562 0.8812 0.8688 0.85   0.8188 0.7688 0.7188 0.6875 0.675  0.6438]
 [0.9438 0.8688 0.8625 0.825  0.8    0.7438 0.6938 0.6375 0.65   0.6125]
 [0.9375 0.8562 0.8625 0.8125 0.8    0.75   0.6938 0.65   0.65   0.625 ]
 [0.9312 0.8562 0.8562 0.8062 0.7875 0.7562 0.6938 0.65   0.6438 0.6062]
 [0.9312 0.85   0.85   0.8062 0.8062 0.7562 0.675  0.6438 0.65   0.6062]
 [0.9312 0.85   0.85   0.8125 0.7875 0.7562 0.6938 0.6438 0.6438 0.6   ]
 [0.9375 0.8562 0.8438 0.8125 0.8    0.7562 0.6938 0.65   0.65   0.6188]]


In [6]:
n_components = [25, 50, 100, 150, 200, 250, 300, 350]
K = [1, 2, 3 ,4 ,5, 6, 7 ,8, 9, 10]
test_accuracy = []
for c in n_components:
    
    pca = PCA(n_components=c)
    pca.fit(all_data)
    reduced_data = pca.transform(all_data)

    X_train, X_test, y_train, y_test = train_test_split(reduced_data, 
                                                        all_label, 
                                                        test_size=0.5, random_state=42)
    
    for k in K:
        knn = train(X_train, y_train, k)
        score = test(knn, X_test, y_test)
        test_accuracy.append(score)

test_accuracy = np.around(np.array(test_accuracy), decimals=4)
test_accuracy = test_accuracy.reshape((-1, len(K)))
print(test_accuracy)

[[0.92  0.825 0.825 0.795 0.765 0.715 0.69  0.645 0.625 0.595]
 [0.92  0.84  0.845 0.805 0.795 0.68  0.66  0.64  0.65  0.605]
 [0.915 0.845 0.84  0.785 0.765 0.675 0.635 0.6   0.605 0.585]
 [0.92  0.825 0.825 0.775 0.76  0.68  0.635 0.575 0.57  0.55 ]
 [0.915 0.83  0.825 0.76  0.74  0.68  0.63  0.58  0.535 0.5  ]
 [0.91  0.825 0.805 0.77  0.745 0.67  0.635 0.555 0.535 0.505]
 [0.91  0.82  0.805 0.775 0.745 0.665 0.625 0.55  0.545 0.505]
 [0.915 0.825 0.81  0.77  0.75  0.675 0.625 0.555 0.545 0.51 ]]
