# 人脸数据读取、处理与变量提取

### 读取人脸照片数据

In [1]:
import os
names = os.listdir('olivettifaces')

names[0:5] 

['10_0.jpg', '10_1.jpg', '10_2.jpg', '10_3.jpg', '10_4.jpg']

In [2]:
from PIL import Image
img0 = Image.open('olivettifaces\\' + names[0])
img0.show()

### 人脸数据处理 - 特征变量提取

In [3]:
import numpy as np
X = []
for i in names:
    img = Image.open('olivettifaces\\' + i)
    img = img.convert('L')
    img = img.resize((32, 32))
    arr = np.array(img)
    X.append(arr.reshape(1, -1).flatten().tolist())

In [4]:
import pandas as pd
X = pd.DataFrame(X)
X 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,186,76,73,87,89,88,75,81,100,102,...,179,184,177,161,202,182,207,198,202,206
1,196,90,97,98,98,87,101,89,65,73,...,181,167,190,188,203,209,205,198,190,190
2,193,89,97,99,75,74,83,64,77,86,...,178,178,156,185,195,201,206,201,189,190
3,192,84,93,89,97,89,66,60,60,57,...,173,151,199,189,203,200,196,186,182,184
4,194,72,49,45,56,37,44,62,71,71,...,192,194,192,176,174,224,200,218,176,168
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,114,115,115,119,115,120,117,118,113,112,...,190,193,169,141,142,144,143,141,143,215
396,115,118,117,117,116,118,117,119,117,116,...,187,189,183,216,189,193,148,144,142,212
397,113,116,113,117,114,121,121,120,121,114,...,184,188,185,221,203,192,144,143,137,212
398,110,109,109,110,110,112,112,113,113,111,...,172,171,209,212,175,136,142,141,137,213


### 人脸数据处理 - 目标变量提取

In [5]:
y = [] 
for i in names:
    y.append(int(i.split('_')[0]))
len(y)

400

# 数据划分与降维

### 划分训练集和测试集

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

### 数据降维模型训练

In [7]:
from sklearn.decomposition import PCA
pca = PCA(n_components=100)
pca.fit(X_train)

PCA(n_components=100)

### 对训练集和测试集进行数据降维

In [8]:
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

### 验证PCA是否降维

In [9]:
print(X_train_pca.shape)
print(X_test_pca.shape)

(320, 100)
(80, 100)


# 模型的搭建与使用

### 模型搭建

In [10]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train_pca, y_train)

KNeighborsClassifier()

### 模型预测

In [11]:
y_pred = knn.predict(X_test_pca) 
print(y_pred)

[ 9 21  3 40 26  4 28 37 12 36 26  7 27 21  3 24  7  2 17 24 21 32  8  2
 11 19  6 29  6 29 18 10 25 35 10 18 15  5  9 22 34 29  2 16  8 18  8 38
 39 35 16 30 30 11 37 36 35 20 33  6  1 16 31 32  5 30  1 39 35 39  2 19
  5  8 11  4 14 27 22 24]


In [12]:
import pandas as pd
a = pd.DataFrame()
a['预测值'] = list(y_pred)
a['实际值'] = list(y_test)

a.head()

Unnamed: 0,预测值,实际值
0,9,9
1,21,21
2,3,3
3,40,40
4,26,26


### 查看预测准确度

In [13]:
from sklearn.metrics import accuracy_score
score = accuracy_score(y_pred, y_test)
print(score)

0.9125


In [14]:
score = knn.score(X_test_pca, y_test)
print(score)

0.9125


# 模型对比

In [15]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train) 
y_pred = knn.predict(X_test)

from sklearn.metrics import accuracy_score
score = accuracy_score(y_pred, y_test)
print(score)

0.9125
