PCA without using built in Python libary



In [5]:
import random
import math

In [6]:
def generate_data(n_samples=10, n_features=3):
    return [[random.random() for _ in range(n_features)] for _ in range(n_samples)]

In [7]:
def center_data(X):
    n_samples = len(X)
    n_features = len(X[0])
    means = [sum(row[i] for row in X) / n_samples for i in range(n_features)]
    centered = [[row[i] - means[i] for i in range(n_features)] for row in X]
    return centered, means

In [8]:
def covariance_matrix(X):
    n_samples = len(X)
    n_features = len(X[0])
    cov = [[0.0] * n_features for _ in range(n_features)]

    for i in range(n_features):
        for j in range(n_features):
            cov[i][j] = sum(X[k][i] * X[k][j] for k in range(n_samples)) / (n_samples - 1)
    return cov


In [11]:
def power_iteration(matrix, num_iter=100):
    n = len(matrix)
    b = [random.random() for _ in range(n)]

    for _ in range(num_iter):
        # multiply matrix * b
        b_new = [sum(matrix[i][j] * b[j] for j in range(n)) for i in range(n)]
        # normalize
        norm = math.sqrt(sum(val**2 for val in b_new))
        b = [val / norm for val in b_new]
# eigenvalue = Rayleigh quotient
    Ab = [sum(matrix[i][j] * b[j] for j in range(n)) for i in range(n)]
    eigenvalue = sum(b[i] * Ab[i] for i in range(n))

    return eigenvalue, b

In [12]:
def deflate(matrix, eigenvalue, eigenvector):
    n = len(matrix)
    for i in range(n):
        for j in range(n):
            matrix[i][j] -= eigenvalue * eigenvector[i] * eigenvector[j]
    return matrix


In [13]:
def pca(X, n_components=2):
    X_centered, means = center_data(X)
    cov = covariance_matrix(X_centered)

    eigvals, eigvecs = [], []

    for _ in range(n_components):
        val, vec = power_iteration(cov)
        eigvals.append(val)
        eigvecs.append(vec)
        cov = deflate(cov, val, vec)

    # Project data onto eigenvectors
    transformed = []
    for row in X_centered:
        transformed.append([sum(row[i] * vec[i] for i in range(len(vec))) for vec in eigvecs])

    return transformed, eigvals, eigvecs


In [14]:
X = generate_data(12, 3)
transformed, eigvals, eigvecs = pca(X, n_components=2)

print("Original data (first 3 rows):", X[:3])
print("Eigenvalues:", eigvals)
print("Eigenvectors:", eigvecs)
print("Transformed data (first 3 rows):", transformed[:3])


Original data (first 3 rows): [[0.42662948016195523, 0.13261795520581776, 0.3807088287427013], [0.10992502325317888, 0.042788872032860525, 0.7364580261541074], [0.7879494789681868, 0.15225929056675547, 0.16624936540555146]]
Eigenvalues: [0.10476138367381072, 0.0778773671053624]
Eigenvectors: [[0.73200681288583, 0.538650560042112, 0.4171589625730802], [-0.5915986077232409, 0.19886514906178412, 0.7813217901918244]]
Transformed data (first 3 rows): [[-0.15347492499593338, -0.25225869082391805], [-0.2852872649552992, 0.19519395070857015], [0.03212990481456718, -0.6296709737940821]]
