In [1]:
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
import numpy as np

In [2]:
iris = load_iris()
X = iris.data

In [4]:
pca = PCA(n_components=2)
X2D = pca.fit_transform(X)
pcs = pca.components_
explained_variance_ratios = pca.explained_variance_ratio_

print("sklearn:")
print("PCs:\n", pcs)
print("Explained Variance Ratios:", explained_variance_ratios)

sklearn:
PCs:
 [[ 0.36138659 -0.08452251  0.85667061  0.3582892 ]
 [ 0.65658877  0.73016143 -0.17337266 -0.07548102]]
Explained Variance Ratios: [0.92461872 0.05306648]


In [7]:
X_centered = X - np.mean(X, axis=0)
U, s, Vt = np.linalg.svd(X_centered)
c1 = Vt.T[:, 0]
c2 = Vt.T[:, 1]
W2 = Vt.T[:, :2]
X2D = X_centered.dot(W2)

explained_variance = (s ** 2) / (X.shape[0] - 1)
explained_variance_ratio = explained_variance[:2] / explained_variance.sum()

print("svd:")
print("PCs:\n", W2)
print("Explained Variance Ratios:", explained_variance_ratio)

svd:
PCs:
 [[ 0.36138659 -0.65658877]
 [-0.08452251 -0.73016143]
 [ 0.85667061  0.17337266]
 [ 0.3582892   0.07548102]]
Explained Variance Ratios: [0.92461872 0.05306648]


In [8]:
X_cov = X_centered.T @ X_centered
eigenvalues, eigenvectors = np.linalg.eig(X_cov)
sorted = np.argsort(eigenvalues)[::-1]
c1 = eigenvectors[:, sorted[0]]
c2 = eigenvectors[:, sorted[1]]
W2 = eigenvectors[:, sorted[:2]]
X2D_ = X_centered.dot(W2)

explained_variance_ratio = eigenvalues[sorted[:2]] / eigenvalues.sum()

print("eig:")
print("PCs:\n", W2)
print("Explained Variance Ratios:", explained_variance_ratio)

eig:
PCs:
 [[ 0.36138659 -0.65658877]
 [-0.08452251 -0.73016143]
 [ 0.85667061  0.17337266]
 [ 0.3582892   0.07548102]]
Explained Variance Ratios: [0.92461872 0.05306648]
