In [1]:
import math

**Computes the mean vector for a dataset**

In [2]:
def mean_vector(data):
    n = len(data)
    m = len(data[0])
    means = [sum(data[i][j] for i in range(n)) / n for j in range(m)]
    return means

**Centers the dataset by subtracting the mean**

In [3]:
def subtract_mean(data, mean):
    return [[data[i][j] - mean[j] for j in range(len(mean))] for i in range(len(data))]

**Computes the transpose of a matrix**

In [4]:
def transpose(matrix):
    return [[matrix[j][i] for j in range(len(matrix))] for i in range(len(matrix[0]))]

**Multiplies two matrices A and B**

In [5]:
def matrix_multiply(A, B):
    result = [[sum(A[i][k] * B[k][j] for k in range(len(B))) for j in range(len(B[0]))] for i in range(len(A))]
    return result

**Computes the covariance matrix**

In [6]:
def covariance_matrix(data):
    n = len(data)
    data_T = transpose(data)
    cov_matrix = [[sum(data_T[i][k] * data_T[j][k] for k in range(n)) / (n - 1) for j in range(len(data_T))] for i in range(len(data_T))]
    return cov_matrix

**Finds the dominant eigenvector using power iteration**

In [7]:
def power_iteration(A, num_simulations=100):
    n = len(A)
    b_k = [1] * n  # Initial guess
    for _ in range(num_simulations):
        b_k1 = [sum(A[i][j] * b_k[j] for j in range(n)) for i in range(n)]
        norm = math.sqrt(sum(x ** 2 for x in b_k1))
        b_k = [x / norm for x in b_k1]

    return b_k

**Performs PCA on a dataset and reduces to num_components dimensions**

In [8]:
def pca(data, num_components=2):
    mean = mean_vector(data)
    centered_data = subtract_mean(data, mean)
    cov_matrix = covariance_matrix(centered_data)

    eigenvectors = []
    for _ in range(num_components):
        ev = power_iteration(cov_matrix)
        eigenvectors.append(ev)

    transformed_data = matrix_multiply(centered_data, transpose(eigenvectors))
    return transformed_data

In [9]:
data = [
    [2.5, 2.4],
    [0.5, 0.7],
    [2.2, 2.9],
    [1.9, 2.2],
    [3.1, 3.0],
    [2.3, 2.7],
    [2, 1.6],
    [1, 1.1],
    [1.5, 1.6],
    [1.1, 0.9]
]

reduced_data = pca(data, num_components=2)
print("Reduced Data:", reduced_data)

Reduced Data: [[0.8279701862010882, 0.8279701862010882], [-1.777580325280429, -1.777580325280429], [0.9921974944148888, 0.9921974944148888], [0.27421041597539964, 0.27421041597539964], [1.6758014186445402, 1.6758014186445402], [0.9129491031588084, 0.9129491031588084], [-0.09910943749844403, -0.09910943749844403], [-1.14457216379866, -1.14457216379866], [-0.4380461367624499, -0.4380461367624499], [-1.2238205550547403, -1.2238205550547403]]
