In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

data = np.array([[.78, .38, .49, .50],
                 [.54, .56, .36, .70],
                 [.34, .67, .77, .37],
                 [.68, .40, .48, .60],
                 [.41, .89, .42, .44],
                 [.54, .56, .41, .65],
                 [.64, .44, .48, .61],
                 [.98, .28, .52, .37],
                 [.36, .78, .50, .50],
                 [.71, .47, .42, .56]], dtype='float64')

E = data.mean(axis=0)
print('\nExpected :')
print(E.round(4))

# Mean centering.sklearn의 PCA()가 자동으로 수행하지만, 이해를 돕기위해 명시했음.
X = data - E

# PCA
# x의 공분산 행렬을 이용해서 eigenvalue와 eigenvector를 찾는다.
# 공분산 행렬은 대칭행렬이므로 eigenvalue들은 항상 실수이고, eigenvector들은 서로 직교한다 (eigenvector의 성질).
pca = PCA(n_components = 2)
pc = pca.fit_transform(X)
eig_val = pca.explained_variance_   # eigen values  (2,)
eig_vec = pca.components_           # eigen vectors (2, 4)
print('\neigen values :')
print(eig_val)
print('\neigen vectors :')
print(eig_vec)

# Beta
B = eig_vec
print('\nBeta :')
print(B.T)
# eigenvector 축으로 project되는 성분의 크기는 np.dot(X, eigenvector)임.
# 이것이 factor model의 F이다. F는 pc와 일치함.

F = np.dot(X, B.T)       # factors or principal components
print("\nFactors :")
print(F)

# E(F) = 0
print("\nE(F) :")
print(np.mean(F, axis=0))

# Cov(f1, f2) = 0
print("\nCov[f1, f2] :")
print(np.cov(F.T))

# 차원이 축소된 데이터를 원래 데이터로 복원
X_back = E + np.dot(F, B)
print('\n복원된 X = E + dot(F, B) :')
print(X_back.round(4))

# 오차
eps = X - X_back
print('\n오차 : RMSE =', np.mean(np.square(eps)).round(4))
print(eps.round(4))


Expected :
[0.598 0.543 0.485 0.53 ]

eigen values :
[0.07339986 0.02113566]

eigen vectors :
[[ 0.72776547 -0.68009596 -0.07984833  0.03809412]
 [ 0.05983156 -0.05714165  0.68810186 -0.72088199]]

Beta :
[[ 0.72776547  0.05983156]
 [-0.68009596 -0.05714165]
 [-0.07984833  0.68810186]
 [ 0.03809412 -0.72088199]]

Factors :
[[ 0.24176689  0.0452704 ]
 [-0.03731499 -0.21300431]
 [-0.30298751  0.28875662]
 [ 0.15999632 -0.0408248 ]
 [-0.37105153 -0.01092373]
 [-0.04321211 -0.14255512]
 [ 0.1040628  -0.05271255]
 [ 0.44798189  0.17730859]
 [-0.33673147  0.0041655 ]
 [ 0.1374897  -0.05548061]]

E(F) :
[-5.27355937e-17 -6.03683770e-17]

Cov[f1, f2] :
[[ 7.33998626e-02 -3.81287958e-18]
 [-3.81287958e-18  2.11356647e-02]]

복원된 X = E + dot(F, B) :
[[0.7767 0.376  0.4968 0.5066]
 [0.5581 0.5805 0.3414 0.6821]
 [0.3948 0.7326 0.7079 0.3103]
 [0.712  0.4365 0.4441 0.5655]
 [0.3273 0.796  0.5071 0.5237]
 [0.558  0.5805 0.3904 0.6311]
 [0.6706 0.4752 0.4404 0.572 ]
 [0.9346 0.2282 0.5712 0.4192]
 [