In [1]:
import numpy as np

class PCA(object):
    def __init__(self, n_components=3):
        self.n_components = n_components
        self.mean = None
        self.v_components = None
        self.explained_variance_ratio_ = None
        
    def fit(self, w):
        self.mean = w.mean(axis=0)
        mean_x = w - self.mean
        
        # The returned eigenvectors are sorted in the
        # decreasing order of their corresponding eigenvalues.
        u, s, v = np.linalg.svd(mean_x, full_matrices=False)
        self.v_components = v[:self.n_components]
        
        ratios = []
        s_sum = np.dot(s, s)
        for i in range(self.n_components):
            ratio = np.dot(s[i], s[i]) / s_sum
            ratios.append(ratio)
        self.explained_variance_ratio_ = np.array(ratios)
        
    def transform(self, x):
        mean_x = x - self.mean
        return np.dot(mean_x, self.v_components.T)

    def fit_transform(self, x):
        self.fit(x)
        return self.transform(x)

    @staticmethod
    def eig_transform(x):
        mean_x = x - x.mean(axis=0)
        a = np.dot(mean_x.T, mean_x)
        w, v = np.linalg.eig(a)
        z = np.dot(mean_x, v)
        return z

In [3]:
from sklearn.decomposition import PCA as sk_PCA

np.random.seed(12)
np.set_printoptions(precision=6, suppress=True, linewidth=120)

data = np.random.random((6, 5))
sk_PCA_reduction = sk_PCA(n_components=0.95)
sk_PCA_all = sk_PCA(n_components=5)
vanilla_PCA = PCA(n_components=5)

x = np.array(data)

sk_reduction_out = sk_PCA_reduction.fit_transform(x)
sk_all_out = sk_PCA_all.fit_transform(x)
vanilla_svg_out = vanilla_PCA.fit_transform(x)
vanilla_eig_out = vanilla_PCA.eig_transform(x)

print("sklearn_reduction")
print(sk_reduction_out)
print("sklearn_all")
print(sk_all_out)
print("vanilla_svg")
print(vanilla_svg_out)
print("vanilla_eig")
print(vanilla_eig_out)
print("sklearn_reduction_ratio")
print(sk_PCA_reduction.explained_variance_ratio_)
print("sklearn_all_ratio")
print(sk_PCA_all.explained_variance_ratio_)
print("vanilla_all_ratio")
print(vanilla_PCA.explained_variance_ratio_)

sklearn_reduction
[[-0.259208  0.030738  0.385295  0.244341]
 [-0.361367  0.593193 -0.302241  0.004213]
 [-0.399717 -0.500964  0.039648 -0.177128]
 [-0.160464 -0.025134 -0.017979 -0.096181]
 [ 0.707208  0.275526  0.227706 -0.141657]
 [ 0.473548 -0.373358 -0.332429  0.166412]]
sklearn_all
[[-0.259208  0.030738  0.385295  0.244341 -0.00231 ]
 [-0.361367  0.593193 -0.302241  0.004213 -0.017801]
 [-0.399717 -0.500964  0.039648 -0.177128 -0.029721]
 [-0.160464 -0.025134 -0.017979 -0.096181  0.062779]
 [ 0.707208  0.275526  0.227706 -0.141657 -0.011581]
 [ 0.473548 -0.373358 -0.332429  0.166412 -0.001366]]
vanilla_svg
[[-0.259208 -0.030738  0.385295 -0.244341  0.00231 ]
 [-0.361367 -0.593193 -0.302241 -0.004213  0.017801]
 [-0.399717  0.500964  0.039648  0.177128  0.029721]
 [-0.160464  0.025134 -0.017979  0.096181 -0.062779]
 [ 0.707208 -0.275526  0.227706  0.141657  0.011581]
 [ 0.473548  0.373358 -0.332429 -0.166412  0.001366]]
vanilla_eig
[[-0.259208 -0.030738 -0.385295 -0.00231   0.2443