In [None]:
import math
import numpy as np
import cv2, os
import pandas as pd
from numpy import linalg as LA
from PIL import Image
import matplotlib.pyplot as plt

# PCA 코드 살펴보기

## 1. DATA 불러오기 

In [None]:
S = []
Y = []

for name in os.listdir("../faces"):
    nameList = os.listdir("../faces/%s"%name)

    if len(nameList) <50 : 
        continue

    y = name.replace(".jpg","")

    count = 0
    for i in nameList:
        if count == 50 :
            break
        filePath = "../faces/%s/%s"%(name,i)

        # decoding path written by korean
        stream = open( filePath.encode("utf-8") , "rb")
        bytes = bytearray(stream.read())
        numpyArray = np.asarray(bytes, dtype=np.uint8)
        img = cv2.imdecode(numpyArray , cv2.IMREAD_UNCHANGED)

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        imgArr = np.array(gray)
        temp = np.reshape(imgArr, (150*150))
        Y.append(y)
        S.append(temp)
        count += 1

S = np.array(S).T

## 2. 히스토그램 평활화

In [None]:
SS = np.zeros_like(S)
for i in range(S.shape[1]):
    SS[:,i] = cv2.equalizeHist(S[:,i]).reshape(-1,)

## 3. 평균얼굴 구하기

In [None]:
m = np.mean(SS, axis=1)
m = m.reshape(-1, 1) 

## 4. 차이 벡터 구하기

In [None]:
D = SS-m

In [None]:
# 주의 : m = m.reshape(-1) 은 차이벡터 구할 때 오류 발생
# column vector 들을 복사해가면서 broadcasting 할 때는, 2-D array 로 만들어줘야 함
m = np.mean(SS, axis=1)
m = m.reshape(-1) 
D = SS-m

## 5.주성분 구하기
## 5-1. D.T × D  의 eigenvalue/ eigenvector 구하기

In [None]:
L = D.T @ D
w, v = LA.eig(L) 

## 5-2. 0인 eigenvalue 제거

In [None]:
zero_eigen = [i for i in range(len(w)) if abs(w[i]) <1e-4]
j=0
for i in zero_eigen:
    w = np.delete(w, i-j, axis=0)
    v = np.delete(v, i-j, axis=1)
    j += 1

## 5-3. eigenvalue 내림차순 정렬 - eigenvalue에 해당하는 eigenvector 가져오기

In [None]:
index = np.argsort(w.real)[::-1] 
ww = w.real[index]
vv = v.real[:,index] 

## 5-4. 주축의 개수 정하기

In [None]:
eigenface_num= 20 # 주축의 개수 

In [None]:
ww = ww[:eigenface_num]
vv = vv[:, :eigenface_num]

## 5-5. D × D.T 의 eigenvector 구하기

In [None]:
u = D @ vv
u = u/(LA.norm(u, axis=0))

## 5-6.  weight 값 찾기

In [None]:
weight = u.T @ D

In [None]:
weight[:, 0]

# 새로운 data가 왔을 때 코사인 유사도 계산?

In [None]:
FaceDB_info_df = pd.read_csv("../db/FaceDB_info.csv", index_col=0)
FaceDB_info_ar = np.array(FaceDB_info_df)
mean_face = FaceDB_info_ar[:, 0].reshape(-1,1)
eigenvector = FaceDB_info_ar[:, 1:]

In [None]:
SID_weight = pd.read_csv("../db/FaceDB_SID.csv", index_col=0)
SID_index = list(SID_weight.index)
SID_weight = np.array(SID_weight)    

In [None]:
new_img = cv2.imread("../faces/f/6.jpg", cv2.IMREAD_GRAYSCALE).reshape(-1)

In [None]:
threshold = 0.5

In [None]:
img = cv2.equalizeHist(new_img) - mean_face

img_weight = np.dot(img.T, eigenvector)
#dist1 = np.sqrt(np.sum(img_weight*img_weight))
dist1 = LA.norm(img_weight)

cosine_list = []
for i in range(SID_weight.shape[0]):
    #dist2 = np.sqrt(np.sum(SID_weight[i] * SID_weight[i]))
    dist2 = LA.norm(SID_weight[i])
    cosine_similiarity = np.dot(img_weight, SID_weight[i])/(dist1 * dist2)
    cosine_list.append(cosine_similiarity)


print(np.max(cosine_list))
if np.max(cosine_list)<threshold:          
    print("None") 
else:
    SID = SID_index[np.argmax(cosine_list)]
    print(SID)

In [None]:
cosine_list

In [None]:
new_img

# mean_face? eigen_face?

In [None]:
plt.imshow(mean_face.reshape(150, 150), cmap='gray')

In [None]:
for i in range(eigenvector.shape[1]):
    plt.figure()
    plt.imshow(eigenvector[:,i].reshape(150, 150),  cmap='gray')

In [None]:
# 새로운 이미지일 때 이미지 재건
face1 = eigenvector@ img_weight.T + mean_face
plt.figure()
plt.imshow(face1.reshape(150, 150), cmap='gray')

In [None]:
# weight 값 가지고 이미지 재건
face1 = eigenvector@ weight[:, 399] + mean_face.reshape(-1)
plt.figure()
plt.imshow(face1.reshape(150, 150), cmap='gray')