In [28]:
def convert_matrix_to_dataframe(matrix, columns = []):
    """ Converts a matrix into a dataframe with the given column labels. """
    return { label: list(col) for label, col in zip(columns, zip(*matrix)) }

def convert_dataframe_to_matrix(df):
    """ Converts a dataframe into a row matrix.

    # Example Format:
    sample_dataframe = convert_dataframe_to_matrix({
        "f1": [1, 5, 1, 5, 8],
        "f2": [2, 5, 4, 3, 1],
        "f3": [3, 6, 2, 2, 2],
        "f4": [4, 7, 3, 1, 2]
    })
    
    print(sample_dataframe)
    # Output:
    [
        [1, 2, 3, 4],
        [5, 5, 6, 7],
        [1, 4, 2, 3],
        [5, 3, 2, 1],
        [8, 1, 2, 2]
    ]
    """
    return [list(row) for row in zip(*df.values())]

def parse_arff_to_dataframe(file_path):
    with open(file_path, "r") as file:
        content = file.read()
    
    content = content.split("@data")
    attributes, data = content[0].split("@attribute"), content[1].strip().split("\n")
   
    # Parse labels from the attribute tags, and keep track of numeric
    # data types, which will be used for correcting data types of values later on
    labels = [] 
    is_col_numeric = []
    for col, attr in enumerate(attributes[1:]):
        attr_parts = attr.replace("\n", "").strip().split(" ")
        labels.append(attr_parts[0])
        
        if attr_parts[1] == "numeric":
            is_col_numeric.append(col)
    
    # Parse data rows
    data_rows = []
    for r in data:
        row = []
        for col, d in enumerate(r.split(",")):
            fd = None 
            try:
                # Integers for numeric columns else try to parse them as floats
                fd = int(d) if col in is_col_numeric else float(d) 
            except ValueError:
                # If an error occurs fallback to string value, if it is m then None
                fd = d if d != "m" else None
            row.append(fd)
        
        data_rows.append(row)
    
    # Just the relation tag for the arff
    data_label = attributes[0].replace("@relation", "").replace("\n", "").strip()
    
    # Reorganize parsed data to be stored column wise into a dict, with their key being their label
    return data_label, { l: [d[i] for d in data_rows] for i, l in enumerate(labels) }

def vector_magnitude(vector):
    return sum(v ** 2 for v in vector) ** 0.5

def identity(size):
    return [[1.0 if i == j else 0.0 for j in range(size)] for i in range(size)]  

def diagonal(matrix):
    return [matrix[i][i] for i in range(len(matrix))]

def transpose(matrix):
    return [list(col) for col in zip(*matrix)]

def subtract(matrix_a, matrix_b):
    assert len(matrix_a) == len(matrix_b) and len(matrix_a[0]) == len(matrix_b[0]), "Matrices not the same shape."
    
    return [[a - b for a, b in zip(row_a, row_b)] for row_a, row_b in zip(matrix_a, matrix_b)]

def scale(matrix, val):
    return [[item * val for item in row] for row in matrix]

def dot(matrix_a, matrix_b):
    assert len(matrix_a[0]) == len(matrix_b), "Dimensions of matrices are incompatible for dot product."
    
    result = []
    m, j, n = len(matrix_a), len(matrix_a[0]), len(matrix_b[0])
    for row in range(m):
        row_result = [sum(matrix_a[row][term] * matrix_b[term][col] for term in range(j)) for col in range(n)]
        result.append(row_result)

    return result

def determinant(matrix):
    n = len(matrix)

    # Base Case: Calculate the 2x2 matrix manually
    if n == 2:
        return matrix[0][0] * matrix[1][1] - matrix[0][1] * matrix[1][0]

    det = 0
    for cofactor_col in range(n):
        # Calculate the submatrix by excluding the current row and column
        submatrix = [[matrix[row][col] for col in range(n) if col != cofactor_col] for row in range(1, n)]
        
        # Calculate the determinant recursively
        det += ((-1) ** cofactor_col) * matrix[0][cofactor_col] * determinant(submatrix)

    return det

# Standardize using standard normal distribution
def snd_standardize_list(data):
    assert type(data) == list and len(data) != 0, "List must not be empty"
    
    mean = sum(d for d in data) / len(data)
    sample_std = (sum((mean - d) ** 2 for d in data) / (len(data) - 1)) ** 0.5
    
    return [(d - mean) / sample_std for d in data]

# Calculates the covariance of two lists (population)
def calculate_covariance(list_a, list_b):
    assert type(list_a) == list and type(list_b) == list and len(list_a) == len(list_b), "Lists must be of the same length"

    mean_a = sum(a for a in list_a) / len(list_a)
    mean_b = sum(b for b in list_b) / len(list_b)

    return sum((a - mean_a) * (b - mean_b) for a, b in zip(list_a, list_b)) / len(list_a)

# Calculate eigenvalues and eigenvector values through Jacobi eigenvalue algorithm
# https://en.wikipedia.org/wiki/Jacobi_eigenvalue_algorithm
def jacobi_method_eigen(matrix, max_iterations = 5, tolerance = 1.0e-9, diff_tolerance = 1.0e-36):
    def max_off_diag_elem(matrix):
        row, col = 0, 1
        max_elem = matrix[row][col]
        n = len(matrix)

        for r in range(n - 1):
            for c in range(r + 1, n):
                if abs(matrix[r][c]) >= max_elem:
                    max_elem = abs(matrix[r][c])
                    row, col = r, c
        
        return max_elem, row, col

    def mutating_rotation(matrix, a, b, k, l, i, j):
        m_kl = matrix[k][l]
        m_ij = matrix[i][j]

        matrix[k][l] = m_kl - a * (m_ij + b * m_kl)
        matrix[i][j] = m_ij + a * (m_kl - b * m_ij)

    n = len(matrix)
    eigenvectors = identity(n)
    matrix = [row[:] for row in matrix]
    for _ in range(max_iterations * (n ** 2)):
        max_elem, max_elem_row, max_elem_col = max_off_diag_elem(matrix)
    
        if max_elem < tolerance:
            eigenvalues = diagonal(matrix)
            sorted_pairs = sorted(zip(eigenvalues, transpose(eigenvectors)), key=lambda item : item[0], reverse=True) 
            
            return [e_val for e_val, _ in sorted_pairs], transpose([e_vec for _, e_vec in sorted_pairs])

        diff = matrix[max_elem_col][max_elem_col] - matrix[max_elem_row][max_elem_row]
        
        if max_elem < abs(diff) * diff_tolerance:
            t = max_elem / diff
        else:
            phi = diff / (2.0 * max_elem)
            t = 1.0 / (abs(phi) + (phi ** 2 + 1.0) ** 0.5)
            if phi < 0.0:
                t = -t
        
        c = 1.0 / (t ** 2 + 1.0) ** 0.5
        s = t * c
        tau = s / (1.0 + c)

        matrix[max_elem_row][max_elem_col] = 0.0
        matrix[max_elem_row][max_elem_row] -= t * max_elem
        matrix[max_elem_col][max_elem_col] += t * max_elem
        
        for i in range(max_elem_row): 
            mutating_rotation(matrix, s, tau, i, max_elem_row, i, max_elem_col)
        for i in range(max_elem_row + 1, max_elem_col): 
            mutating_rotation(matrix, s, tau, max_elem_row, i, i, max_elem_col)
        for i in range(max_elem_col + 1, n): 
            mutating_rotation(matrix, s, tau, max_elem_row, i, max_elem_col, i)
        
        for i in range(n):
            mutating_rotation(eigenvectors, s, tau, i, max_elem_row, i, max_elem_col)
    
    raise RuntimeError("Jacobi wasn't able to converge the values")

print("Dependencies loaded.")

Dependencies loaded.


In [29]:
def parse_and_preprocess(file_path):
    _, df = parse_arff_to_dataframe(file_path)
    mat = convert_dataframe_to_matrix(df)
    none_removed = [d for d in mat if None not in d] 
    feature_wise_mat = transpose(none_removed)[2:84]
    standardized_mat = transpose([snd_standardize_list(data) for data in feature_wise_mat])
    
    return df, standardized_mat

_, data_2017 = parse_and_preprocess("./Data/2017.arff")
_, data_2018 = parse_and_preprocess("./Data/2018.arff")
_, data_2019 = parse_and_preprocess("./Data/2019.arff")
_, data_2020 = parse_and_preprocess("./Data/2020.arff")

dataset = data_2017 + data_2018 + data_2019 + data_2020

# This is just for printing the dataset
import pandas as pd
df = pd.DataFrame(dataset, columns=["X" + str(i + 1) for i in range(len(dataset[0]))])
df.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X73,X74,X75,X76,X77,X78,X79,X80,X81,X82
0,-0.057138,-1.224555,-0.358822,-0.938151,0.139986,-0.363696,-0.755941,-0.94022,-1.098258,-0.37577,...,-1.719696,-1.872468,-1.872468,-1.872468,-0.087667,-1.872468,-1.872468,-1.872468,-1.872468,-1.872468
1,-0.057138,-1.224555,-0.358822,-0.938151,0.139986,-0.363696,-0.755941,-0.94022,-1.098258,-0.37577,...,-1.719696,-1.872468,-1.872468,-1.872468,-0.087667,-1.872468,-1.872468,-1.872468,-1.872468,-1.872468
2,0.1757,0.350628,-0.180613,-0.116434,0.188742,0.130324,-0.051519,0.597902,0.431794,0.176036,...,0.577931,0.530778,0.530778,0.530778,-0.065442,0.530778,0.530778,0.530778,0.530778,0.530778
3,-0.057138,-1.224555,-0.358822,-0.938151,0.139986,-0.363696,-0.755941,-0.94022,-1.098258,-0.37577,...,-1.719696,-1.872468,-1.872468,-1.872468,-0.087667,-1.872468,-1.872468,-1.872468,-1.872468,-1.872468
4,-0.057138,-1.224555,-0.358822,-0.938151,0.139986,-0.363696,-0.755941,-0.94022,-1.098258,-0.37577,...,-1.719696,-1.872468,-1.872468,-1.872468,-0.087667,-1.872468,-1.872468,-1.872468,-1.872468,-1.872468


# Sklearn PCA vs Custom PCA

In [30]:
def pca(data, n_components):
    assert len(data[0]) >= n_components and n_components > 0, f"Components must be between n and {len(data[0])}"

    data_t = transpose(data)
    cov_matrix = [[calculate_covariance(feat_b, feat_a) for feat_b in data_t] for feat_a in data_t]
    
    # import numpy as np
    # eig = np.linalg.eig(np.array(cov_matrix))
    # eigenvalues = eig.eigenvalues.tolist()
    # eigenvectors = eig.eigenvectors.tolist()
    # top_eigenvalues = eigenvalues[:n_components]
    # top_eigenvectors = transpose(eigenvectors)[:n_components]
    
    eigenvalues, eigenvectors = jacobi_method_eigen(cov_matrix, max_iterations=1000, tolerance=1.0e-128, diff_tolerance=1.0e-128)
    top_eigenvalues = eigenvalues[:n_components] 
    top_eigenvectors = transpose(eigenvectors)[:n_components] 
    
    explained_variance_ratio = [v / sum(eigenvalues) for v in top_eigenvalues]
    cumulative_explained_variance = [sum(explained_variance_ratio[:i + 1]) for i in range(len(explained_variance_ratio))]

    return dot(data, transpose(top_eigenvectors)), explained_variance_ratio, cumulative_explained_variance

n_components = 2
my_pca_results, explained_variance_ratio, cumulative_explained_variance = pca(dataset, n_components)

# This is just for printing the results of my pca calculations
import pandas as pd
df = pd.DataFrame(my_pca_results, columns=["PC" + str(i + 1) for i in range(n_components)])
df.head()

Unnamed: 0,PC1,PC2
0,-4.260178,-3.266624
1,-4.260178,-3.266624
2,0.935873,1.056347
3,-4.260178,-3.266624
4,-4.260178,-3.266624


In [31]:

my_pca_results_stats = transpose([explained_variance_ratio, cumulative_explained_variance])
stats_df = pd.DataFrame(my_pca_results_stats, columns=["Explained Variance Ratio", "Cumulative Explained Variance"])
stats_df.head()

Unnamed: 0,Explained Variance Ratio,Cumulative Explained Variance
0,0.15815,0.15815
1,0.092943,0.251093


In [32]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA

A = np.matrix(dataset)
df = pd.DataFrame(A)

n_components = 2
sklearn_pca = PCA(n_components=n_components)
sklearn_pca_results = sklearn_pca.fit_transform(df)

principal_df = pd.DataFrame(sklearn_pca_results, columns=["PC" + str(i + 1) for i in range(n_components)])
principal_df.head()

Unnamed: 0,PC1,PC2
0,-4.658684,-4.286464
1,-4.658684,-4.286464
2,1.060236,1.198579
3,-4.658684,-4.286464
4,-4.658684,-4.286464


In [33]:

sklearn_pca_results_stats = transpose([sklearn_pca.explained_variance_ratio_.tolist(), np.cumsum(sklearn_pca.explained_variance_ratio_)])
stats_df = pd.DataFrame(sklearn_pca_results_stats, columns=["Explained Variance Ratio", "Cumulative Explained Variance"])
stats_df.head()

Unnamed: 0,Explained Variance Ratio,Cumulative Explained Variance
0,0.159073,0.159073
1,0.101397,0.26047


In [34]:

my_pca_results_pd = pd.DataFrame(my_pca_results_stats, columns=["EV Ratio", "Cumulative EV"])
sklearn_pca_results_pd = pd.DataFrame(sklearn_pca_results_stats, columns=["EV Ratio", "Cumulative EV"])

merged_results = pd.merge(my_pca_results_pd, sklearn_pca_results_pd, left_index=True, right_index=True, suffixes=('_my', '_sk'))

merged_results['EV Ratio Diff'] = abs(merged_results['EV Ratio_my'] - merged_results['EV Ratio_sk'])
merged_results['Cumulative EV Diff'] = abs(merged_results['Cumulative EV_my'] - merged_results['Cumulative EV_sk'])

merged_results.head()

Unnamed: 0,EV Ratio_my,Cumulative EV_my,EV Ratio_sk,Cumulative EV_sk,EV Ratio Diff,Cumulative EV Diff
0,0.15815,0.15815,0.159073,0.159073,0.000923,0.000923
1,0.092943,0.251093,0.101397,0.26047,0.008454,0.009377


In [35]:
my_pca_results_t = transpose(my_pca_results)
sklearn_pca_results_t = transpose(sklearn_pca_results.tolist())

correlation_coefficients = [[np.corrcoef(my_pca_results_t[i], sklearn_pca_results_t[i])[0, 1] for i in range(len(my_pca_results_t))]]
correlation_coefficient_df = pd.DataFrame(correlation_coefficients, columns=[f"PC{i + 1} Correlation Coefficient" for i in range(len(my_pca_results_t))])
correlation_coefficient_df.head()

Unnamed: 0,PC1 Correlation Coefficient,PC2 Correlation Coefficient
0,0.987848,0.92336


# Sklearn SVD vs Custom SVD

In [36]:
def svd(data, n_components):
    assert len(data[0]) >= n_components and n_components > 0, f"Components must be between n and {len(data[0])}"

    data_t = transpose(data)
    ata = dot(data_t, data)

    # Calculate Vt
    eigenvalues, eigenvectors = jacobi_method_eigen(ata, max_iterations=1000, tolerance=1.0e-128, diff_tolerance=1.0e-128)
    Vt = transpose(eigenvectors)[:n_components] 

    # Calculate Sigma
    singular_values = [val ** 0.5 for val in eigenvalues]
    Sigma = [[0.0] * n_components for _ in range(n_components)]
    for i in range(n_components):
        Sigma[i][i] = singular_values[i]

    # Calculate U using PCA, this is faster compared to calculating using dot(data, data_t) <- aat
    cov_matrix = [[calculate_covariance(feat_b, feat_a) for feat_b in data_t] for feat_a in data_t]    
    eigenvalues, eigenvectors = jacobi_method_eigen(cov_matrix, max_iterations=1000, tolerance=1.0e-128, diff_tolerance=1.0e-128)
    top_eigenvalues = eigenvalues[:n_components] 
    top_eigenvectors = transpose(eigenvectors)[:n_components] 
    
    explained_variance_ratio = [v / sum(eigenvalues) for v in top_eigenvalues]
    cumulative_explained_variance = [sum(explained_variance_ratio[:i + 1]) for i in range(len(explained_variance_ratio))]

    U = dot(data, transpose(top_eigenvectors))
    
    return U, Sigma, Vt, explained_variance_ratio, cumulative_explained_variance

n_components = 2
my_U, my_Sigma, my_Vt, explained_variance_ratio, cumulative_explained_variance = svd(dataset, n_components)

# # This is just for printing out the results
import pandas as pd

u_df = pd.DataFrame(my_U)
u_df.style.set_caption("Custom SVD - U Result")
u_df.head()

In [None]:

sigma_df = pd.DataFrame(my_Sigma)
sigma_df.style.set_caption("Custom SVD - Sigma Result")
sigma_df.head()

Unnamed: 0,0,1
0,93.352451,0.0
1,0.0,71.56498


In [None]:

vt_df = pd.DataFrame(my_Vt)
vt_df.style.set_caption("Custom SVD - Vt Result")
vt_df.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,72,73,74,75,76,77,78,79,80,81
0,0.170804,0.04129,0.129903,0.162097,0.136731,0.190064,0.159489,0.061789,0.220343,0.211865,...,0.07976,0.096534,0.077184,0.063039,0.014752,0.039712,0.063707,0.050033,0.041418,0.031968
1,-0.075889,0.143128,-0.067841,0.018214,0.230533,-0.209857,0.03081,0.167054,0.032102,-0.240026,...,0.102988,0.101054,0.105586,0.059428,0.023114,0.130468,0.106441,0.119602,0.059156,0.061256


In [None]:

my_svd_results_stats = transpose([explained_variance_ratio, cumulative_explained_variance])
stats_df = pd.DataFrame(my_svd_results_stats, columns=["Explained Variance Ratio", "Cumulative Explained Variance"])
stats_df.style.set_caption("Custom SVD - Stats Result")
stats_df.head()

Unnamed: 0,Explained Variance Ratio,Cumulative Explained Variance
0,0.15815,0.15815
1,0.092943,0.251093


In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD

A = np.matrix(dataset)
df = pd.DataFrame(A)

n_components = 2
sklearn_svd = TruncatedSVD(n_components=n_components, n_iter=10, random_state=5)

sk_U = sklearn_svd.fit_transform(df)
sk_Sigma = np.diag(sklearn_svd.singular_values_)
sk_Vt = sklearn_svd.components_

u_df = pd.DataFrame(sk_U)
u_df.style.set_caption("Sklearn SVD - U Result")
u_df.head()

Unnamed: 0,0,1
0,-4.658684,-4.286459
1,-4.658684,-4.286459
2,1.060236,1.198579
3,-4.658684,-4.286459
4,-4.658684,-4.286459


In [None]:

sigma_df = pd.DataFrame(sk_Sigma)
sigma_df.style.set_caption("Sklearn SVD - Sigma Result")
sigma_df.head()

Unnamed: 0,0,1
0,93.624562,0.0
1,0.0,74.748853


In [None]:

vt_df = pd.DataFrame(sk_Vt)
vt_df.style.set_caption("Sklearn SVD - Vt Result")
vt_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,72,73,74,75,76,77,78,79,80,81
0,0.146929,-0.010275,0.173865,0.195203,0.071171,0.163453,0.186113,0.103689,0.208173,0.194371,...,0.090587,0.120228,0.098108,0.067321,-0.011909,0.055828,0.082026,0.078365,0.046369,0.032234
1,-0.178107,0.194147,-0.015424,0.097098,-0.156742,-0.133665,0.056472,0.185317,0.022471,-0.155589,...,0.129964,0.137389,0.131745,0.066235,0.029922,0.105299,0.105068,0.146135,0.070393,0.067953


In [None]:

sklearn_svd_results_stats = transpose([sklearn_svd.explained_variance_ratio_, np.cumsum(sklearn_svd.explained_variance_ratio_)])
stats_df = pd.DataFrame(sklearn_svd_results_stats, columns=["Explained Variance Ratio", "Cumulative Explained Variance"])
stats_df.head()

Unnamed: 0,Explained Variance Ratio,Cumulative Explained Variance
0,0.159073,0.159073
1,0.101397,0.26047


In [None]:
my_svd_results_pd = pd.DataFrame(my_svd_results_stats, columns=["EV Ratio", "Cumulative EV"])
sklearn_svd_results_pd = pd.DataFrame(sklearn_svd_results_stats, columns=["EV Ratio", "Cumulative EV"])

merged_results = pd.merge(my_svd_results_pd, sklearn_svd_results_pd, left_index=True, right_index=True, suffixes=('_my', '_sk'))

merged_results['EV Ratio Diff'] = abs(merged_results['EV Ratio_my'] - merged_results['EV Ratio_sk'])
merged_results['Cumulative EV Diff'] = abs(merged_results['Cumulative EV_my'] - merged_results['Cumulative EV_sk'])

merged_results.head()

Unnamed: 0,EV Ratio_my,Cumulative EV_my,EV Ratio_sk,Cumulative EV_sk,EV Ratio Diff,Cumulative EV Diff
0,0.15815,0.15815,0.159073,0.159073,0.000923,0.000923
1,0.092943,0.251093,0.101397,0.26047,0.008454,0.009377


In [None]:

RMSE = lambda U, V: (sum(sum((u - v) ** 2 for u, v in zip(row_u, row_v)) for row_u, row_v in zip(U, V)) / (len(U) * len(U[0]))) ** 0.5

df = pd.DataFrame([[RMSE(my_U, sk_U.tolist())], [RMSE(my_Sigma, sk_Sigma.tolist())], [RMSE(my_Vt, sk_Vt.tolist())]], columns=["RMSE"])
df.index = ["U", "Sigma", "Vt"]
df.head()

Unnamed: 0,RMSE
U,0.908975
Sigma,1.59774
Vt,0.061394


# Results Discussion

Looking at the results in various metrics, such as `Explained Variance Ratio`, `Cumulative Explained Variance`, and the `Correlation Coefficient`. I can see that the values that I obtained from my algorithms varies from those obtained from `sklearn`, but only by a small amount.

### Summary of Results for PCA
|     | EV Ratio_my  | Cumulative EV_my  | EV Ratio_sk  | Cumulative EV_sk  | EV Ratio Diff  | Cumulative EV Diff  |
| --- | -----------  | ----------------- | -----------  | ----------------- | -------------  | ------------------- |
| PC1 |   0.158150   |       0.158150    |   0.159073   |       0.159073    |    0.000923    |       0.000923      |
| PC2 |   0.092943   |       0.251093    |   0.101397   |       0.260470    |    0.008454    |       0.009377      |

Looking at the Explained Variance (EV) Ratio amd Cumulative EV above, it can be seen that each Principal Component (PC) has an almost similar variance between my calculations (`_my`) and those of `sklearn` (`_sk`). Calculating for the differences between these values, it can be seen that the values that I have obtained is off by at most 2-3 decimal places. 

|     |  Correlation Coefficient  |
| --- | ------------------------- | 
| PC1 |           0.987848        |
| PC2 |           0.923359        |

Furthermore, looking at the correlations of each PCs between my calculations, and `sklearn` shows that the values I obtained are quite identical for PC1, with a correlation coefficient of 0.987848, and it drops off for PC2 with a correlation coefficient of 0.923359.

### Summary of Results for SVD
|              | EV Ratio_my  | Cumulative EV_my  | EV Ratio_sk  | Cumulative EV_sk  | EV Ratio Diff  | Cumulative EV Diff  |
| ------------ | -----------  | ----------------- | -----------  | ----------------- | -------------  | ------------------- |
| Eigenvalue 1 |   0.158150   |       0.158150    |   0.159073   |       0.159073    |    0.000923    |       0.000923      |
| Eigenvalue 2 |   0.092943   |       0.251093    |   0.101397   |       0.260470    |    0.008454    |       0.009377      |

The table above shows the same results as those of the PCA. It can be seen that most EV Ratio and Cumulative EV are off by at most 2-3 decimal places.

| Metric | RMSE |
|---|--- |
| U | 0.908975 |
| Sigma | 1.597740 |
| Vt | 0.061394 |

Looking at the above table, the `Vt` has the least deviations from the values retrieved from `sklearn` being `±0.061` off from the values. The biggest offenders to look at are `U` and `Sigma`. `U` can be somewhat ignored, since the `U` component is essentially the results of PCA, and looking back at our discussion with PCA it can be seen that each PC is quite similar with the values retrieved from `sklearn`, even though it deviates `±0.91` from it. For `Sigma` it's deviation can be explained with just being the result of having a small sample size. Comparing the values directly, it can be seen that they are quite identical, with `sklearn` calculating singular values of 93.624562, and 74.748853, while my SVD calculated 93.3542451, and 71.56498.

These differences found between my calculations and those of `sklearn` can be attributed mostly between multiple things, but primarily is mostly driven by the eigenvalue and eigenvector algorithm used. `Sklearn` might be using a more accurate and precise algorithm under the hood when calculating eigenvalues and eigenvectors which are a primary component for both PCA and SVD calculations. In my case, I had implemented Jacobi's eigenvalue algorithm to approximate these eigenvalues and eigenvectors, which gives me a pretty good approximate of their values. Comparing it to the  `numpy` libraries implementation, with their `np.linalg.eig` function, I found that my implementation generates values that is accurate up to 2 decimal places. However, that accuracy is enough to cause deviations between my calculations and those of `sklearn`. Aside from the eigenvalue, and eigenvectors calculations, I also believe that a small percentage of the deviation was also brought onto the cumulation of small errors in the calculations, such as rounding errors, probably some faulty programming, and not running enough iterations to converge to a much a accurate result.