In [4]:
import numpy as np
from sklearn.decomposition import PCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = PCA(n_components=2)
pca.fit(X)

print(pca.explained_variance_ratio_)

print(pca.singular_values_)

[0.99244289 0.00755711]
[6.30061232 0.54980396]


In [2]:
X

array([[-1, -1],
       [-2, -1],
       [-3, -2],
       [ 1,  1],
       [ 2,  1],
       [ 3,  2]])

In [5]:
pca = PCA(n_components=2, svd_solver='full')
pca.fit(X)

print(pca.explained_variance_ratio_)

print(pca.singular_values_)

[0.99244289 0.00755711]
[6.30061232 0.54980396]


In [6]:
pca = PCA(n_components=1, svd_solver='arpack')
pca.fit(X)

print(pca.explained_variance_ratio_)

print(pca.singular_values_)

[0.99244289]
[6.30061232]


In [7]:
import numpy as np
from sklearn.decomposition import IncrementalPCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
ipca = IncrementalPCA(n_components=2, batch_size=3)
ipca.fit(X)

ipca.transform(X) # doctest: +SKIP

array([[-1.38340578, -0.2935787 ],
       [-2.22189802,  0.25133484],
       [-3.6053038 , -0.04224385],
       [ 1.38340578,  0.2935787 ],
       [ 2.22189802, -0.25133484],
       [ 3.6053038 ,  0.04224385]])

https://iksinc.online/2018/08/21/principal-component-analysis-pca-explained-with-examples/

In [1]:
import numpy as np
from numpy import linalg as LA
import matplotlib.pyplot as plt

In [6]:
X = np.array([[7,4,3], [4,1,8], [6,3,5], [8,6,1], [8,5,7],[7,2,9],[5,3,3],[9,5,8],[7,4,5],[8,2,2]])
Xmean = np.mean(X,axis=0)
print(Xmean)

[6.9 3.5 5.1]


In [8]:
C = np.cov(X.T)
print(C)

[[ 2.32222222  1.61111111 -0.43333333]
 [ 1.61111111  2.5        -1.27777778]
 [-0.43333333 -1.27777778  7.87777778]]


In [9]:
w, v = LA.eig(C)
print(w)
print(v)

[0.74992815 3.67612927 8.27394258]
[[-0.70172743  0.69903712 -0.1375708 ]
 [ 0.70745703  0.66088917 -0.25045969]
 [ 0.08416157  0.27307986  0.95830278]]


In [10]:
A = np.array([v[:,2], v[:,1]])
print(A)

[[-0.1375708  -0.25045969  0.95830278]
 [ 0.69903712  0.66088917  0.27307986]]


In [14]:
Y = np.matmul(A, (X-Xmean).T)
print(Y)

[[-2.15142276  3.80418259  0.15321328 -4.7065185   1.29375788  4.0993133
  -1.62582148  2.11448986 -0.2348172  -2.74637697]
 [-0.17311941 -2.88749898 -0.98688598  1.30153634  2.27912632  0.1435814
  -2.23208282  3.2512433   0.37304031 -1.06894049]]


In [18]:
Xhat = np.matmul(A.T,Y).T + Xmean
print(Xhat)

[[7.07495606 3.92443193 2.99101016]
 [4.35818659 0.63888882 7.95704095]
 [6.18905239 2.80940399 4.97732603]
 [8.45730172 5.53896441 0.94515359]
 [8.31521059 4.68221571 6.96219527]
 [6.43642293 2.56817868 9.06759253]
 [5.56335682 2.43204338 2.93243389]
 [8.88184769 5.11911702 8.01417058]
 [7.19307301 3.80535054 4.97684382]
 [6.53059219 3.48140552 2.17623319]]


In [24]:
mse = np.sum((X-Xhat)**2)/10
print(mse)
print(mse*10/9)

0.6749353375153226
0.749928152794803


In [31]:
from sklearn import decomposition
pca = decomposition.PCA(n_components=3)

pca.fit(X)

y = pca.transform(X)

print(y)

[[ 2.15142276 -0.17311941 -0.10681648]
 [-3.80418259 -2.88749898 -0.5104355 ]
 [-0.15321328 -0.98688598 -0.26941001]
 [ 4.7065185   1.30153634 -0.65167999]
 [-1.29375788  2.27912632 -0.44919235]
 [-4.0993133   0.1435814   0.80312818]
 [ 1.62582148 -2.23208282 -0.80281431]
 [-2.11448986  3.2512433   0.16837351]
 [ 0.2348172   0.37304031 -0.27513962]
 [ 2.74637697 -1.06894049  2.09398657]]


In [32]:
xhat = pca.inverse_transform(y)

mse = np.sum((X-xhat)**2)/10
print(mse)

1.262177448353619e-30


In [33]:
pca.explained_variance_

array([8.27394258, 3.67612927, 0.74992815])