Importing the necessary libraries

In [26]:
import numpy as np
import pandas as pd
from numpy.linalg import norm

Functions

In [27]:
def standardize(x):
    if x.size == 0:
        raise ValueError("Input array x is empty.")
    mean = np.mean(x, axis=0)
    std_dev = np.std(x, axis=0)
    if np.any(std_dev == 0):
        raise ValueError("Standard deviation is zero for one or more features, cannot standardize.")
    x_std = (x - mean) / std_dev
    return x_std


Covariance

In [28]:
def covariance_matrix(x):
    if x.size == 0:
        raise ValueError("Input array x is empty.")
    m = x.shape[0]
    return (1 / (m - 1)) * x.T @ x


### QR Decomposition using Givens Rotation function

In [29]:
def givens_rotation(a, b):
    """Compute the cos and sin for Givens rotation."""
    if b == 0:
        return 1, 0
    else:
        r = np.hypot(a, b)
        c = a / r
        s = -b / r
        return c, s

In [30]:
def qr_decomposition_givens(A, tol=1e-8):
    """
    QR decomposition using Givens Rotations.
    Returns the matrices Q and R.
    """
    m, n = A.shape
    Q = np.eye(m)
    R = A.copy()

    for i in range(n):
        for j in range(i + 1, m):
            if abs(R[j, i]) < tol:
                continue
            c, s = givens_rotation(R[i, i], R[j, i])
            G = np.eye(m)
            G[i, i], G[i, j], G[j, i], G[j, j] = c, -s, s, c

            R = G @ R
            Q = Q @ G.T

    return Q, R

In [31]:
def qr_decomposition_givens_noQ(A, tol=1e-8):
    m, n = A.shape
    R = A.copy()

    for i in range(n):
        for j in range(i + 1, m):
            if abs(R[j, i]) < tol:
                continue
            c, s = givens_rotation(R[i, i], R[j, i])
            G = np.eye(m)
            G[i, i], G[i, j], G[j, i], G[j, j] = c, -s, s, c

            R = G @ R

    return R

In [32]:
def eigen_decomp(A, max_iter=100, tol=1e-8):
    """
    Compute the eigenvalues and eigenvectors using the QR algorithm with Givens rotations.
    """
    n = A.shape[0]
    A_k = A.copy()
    Q_total = np.eye(n)
    
    for _ in range(max_iter):
        Q, R = qr_decomposition_givens(A_k)
        A_k = R @ Q
        Q_total = Q_total @ Q
        
        if np.allclose(A_k - np.diag(np.diagonal(A_k)), 0, atol=tol):
            break

    eigenvalues = np.diagonal(A_k)
    eigenvectors = Q_total

    for i in range(n):
        eigenvectors[:, i] /= norm(eigenvectors[:, i])

    return eigenvalues, eigenvectors


In [33]:
def pca(x, threshold):
    x_std = standardize(x)
    cov_matrix = covariance_matrix(x_std)
    eigenvalues, eigenvectors = eigen_decomp(cov_matrix)
    total_variance = np.sum(eigenvalues)
    variance_ratio = eigenvalues / total_variance
    cumulative_variance_ratio = np.cumsum(variance_ratio)
    print(f"Explained variance ratio: {cumulative_variance_ratio}")
    n_components = np.argmax(cumulative_variance_ratio >= threshold) + 1
    V_k  = eigenvectors[:, :n_components]
    Z = np.dot(X_std, V_k)

    return Z, V_k, n_components

In [34]:
def normal_equation(x, y):
    if x.size == 0 or y.size == 0:
        raise ValueError("Input arrays x and y are empty.")
    x = np.c_[np.ones(x.shape[0]), x]
    return np.linalg.inv(x.T @ x) @ x.T @ y

In [None]:
def qr_with_q_equation(x, y):
    if x.size == 0 or y.size == 0:
        raise ValueError("Input arrays x or y are empty.")
    Q, R = qr_decomposition_with_q(x)
    QTy = Q.T @ y
    print("Shape of X:", x.shape)
    print("Shape of Q:", Q.shape)
    print("Shape of R:", R.shape)
    print("Shape of y:", y.shape)
    print("Shape of QTy:", QTy.shape)
    return np.linalg.inv(R) @ QTy

In [None]:
def mse(y_true, y_pred):
    if y_true.size == 0 or y_pred.size == 0:
        raise ValueError("Input arrays y_true or y_pred are empty.")
    return np.mean((y_true - y_pred) ** 2)

In [None]:
def residual_error_norm(y, predictions):
    residuals = y - predictions
    norm = np.linalg.norm(residuals)
    return norm

In [None]:
# Backwards substitution
def back_substitution(U, b):
    n = U.shape[0]
    x = np.zeros(n)
    for i in range(n - 1, -1, -1):
        x[i] = (b[i] - U[i, i + 1:] @ x[i + 1:]) / U[i, i]
    return x

# Opening the dataset

In [35]:
df = pd.read_csv("pokindex_data.csv")
df

Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,WinningPercentage
0,70,90,45,15,45,50,36.290323
1,40,27,60,37,50,66,36.220472
2,75,75,60,83,60,60,39.344262
3,85,115,80,105,80,50,30.630631
4,83,106,65,86,65,85,66.406250
...,...,...,...,...,...,...,...
195,50,65,64,44,48,43,21.969697
196,60,85,69,65,79,80,57.600000
197,45,50,43,40,38,62,40.441176
198,55,45,50,45,65,80,55.462185


In [36]:
X_df = df.iloc[:, :-1]
X_df

Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,70,90,45,15,45,50
1,40,27,60,37,50,66
2,75,75,60,83,60,60
3,85,115,80,105,80,50
4,83,106,65,86,65,85
...,...,...,...,...,...,...
195,50,65,64,44,48,43
196,60,85,69,65,79,80
197,45,50,43,40,38,62
198,55,45,50,45,65,80


In [37]:
X_std = standardize(X_df.values)
X_std

array([[ 0.15290907,  0.4052833 , -0.92270684, -1.78806664, -1.04526282,
        -0.64904062],
       [-1.18060024, -1.59650799, -0.433985  , -1.11071036, -0.85255212,
        -0.09489708],
       [ 0.37516062, -0.07133367, -0.433985  ,  0.30558005, -0.46713073,
        -0.30270091],
       ...,
       [-0.95834869, -0.8656953 , -0.98786975, -1.01834359, -1.31505779,
        -0.23343296],
       [-0.51384558, -1.02456762, -0.75979956, -0.86439898, -0.27442003,
         0.38997851],
       [ 0.15290907,  1.6762619 ,  0.86927325, -0.55650976,  0.30371206,
        -0.12953105]])

In [38]:
cov_matrix = covariance_matrix(X_std)
cov_matrix

array([[ 1.00502513,  0.55595839,  0.40771065,  0.42310765,  0.41420563,
         0.25004053],
       [ 0.55595839,  1.00502513,  0.49285226,  0.45023169,  0.28881028,
         0.39254772],
       [ 0.40771065,  0.49285226,  1.00502513,  0.26511939,  0.59231642,
        -0.05218178],
       [ 0.42310765,  0.45023169,  0.26511939,  1.00502513,  0.52750921,
         0.50622821],
       [ 0.41420563,  0.28881028,  0.59231642,  0.52750921,  1.00502513,
         0.19389947],
       [ 0.25004053,  0.39254772, -0.05218178,  0.50622821,  0.19389947,
         1.00502513]])

In [39]:
# QR decomposition with Givens rotations
Q, R = qr_decomposition_givens(cov_matrix)
print("Q: ", Q)
print("R: ", R)


Q:  [[ 0.72940987 -0.54500747 -0.24840545 -0.30687205 -0.12216702  0.01129624]
 [ 0.40349392  0.75994714 -0.16466594 -0.32524921  0.14040878 -0.32719482]
 [ 0.29590123  0.18570912  0.78317796 -0.10122822 -0.36807509  0.34476142]
 [ 0.3070758   0.09953746 -0.10013326  0.78853923 -0.41568206 -0.30196751]
 [ 0.30061505 -0.13379389  0.38231174  0.32291456  0.79276163 -0.11323736]
 [ 0.18147012  0.25129826 -0.37632858  0.25244288  0.16917076  0.81850378]]
R:  [[ 1.37786061e+00  1.25319031e+00  1.04364078e+00  1.12779520e+00
   1.09322398e+00  7.21453556e-01]
 [ 2.20613268e-17  6.57112083e-01  2.73006452e-01  3.17464948e-01
   7.05012515e-02  4.29358093e-01]
 [-1.15492757e-17  1.44232748e-17  8.24219748e-01 -6.10760271e-02
   5.71882787e-01 -5.22398205e-01]
 [ 4.58149373e-17 -1.45547223e-17 -1.75052304e-18  7.87520880e-01
   5.08444916e-01  5.16381194e-01]
 [-4.59659157e-19 -1.77982212e-17  2.41815066e-17  1.11746041e-17
   3.82203678e-01  1.57084194e-01]
 [-1.99857047e-17 -2.02497546e-17 -1

In [40]:
eigenvalues, eigenvectors = eigen_decomp(cov_matrix)
print("Eigenvalues: ", eigenvalues)
print("Eigenvectors: ", eigenvectors)

Eigenvalues:  [2.94914287 1.20231921 0.77529324 0.51470719 0.37558598 0.21310226]
Eigenvectors:  [[ 0.43354602 -0.06079579  0.39576924 -0.77440083  0.1765609   0.14435383]
 [ 0.44686717  0.06341416  0.57662134  0.39600227 -0.21287498 -0.51152972]
 [ 0.38639019 -0.57758995  0.0344706   0.41114813  0.10449576  0.57960513]
 [ 0.44243289  0.30046    -0.38034996 -0.09211419 -0.71540399  0.22141125]
 [ 0.42577579 -0.27598308 -0.6008831  -0.07390754  0.3132315  -0.52717315]
 [ 0.29328879  0.70159069 -0.06292644  0.24595817  0.55015842  0.2337151 ]]
