In [1]:
import numpy as np
import pandas
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler


def oja(values: np.ndarray, eta: float, w: np.ndarray, epoch: int, tolerance: float) -> np.ndarray:
    iteration: int = 0
    for _ in range(epoch):
        for i in range(np.shape(values)[0]):
            iteration += 1
            sum: np.ndarray = np.dot(values[i], w)
            prev_w: np.ndarray = np.copy(w)
            w += eta * sum * (values[i] - sum * w)
            if np.linalg.norm(w - prev_w) < tolerance:
                print(iteration)
                return w / np.linalg.norm(w)
    return w / np.linalg.norm(w)

In [2]:
europe = pandas.read_csv('europe.csv')
values = StandardScaler().fit_transform(europe.values[:, 1:])

l_rate = 0.001
initial_weights = np.random.uniform(-1, 1, np.size(values, 1))
epoch_count = 1500
e_tolerance = 1e-8


pca = PCA()
pca.fit(values)

oja_weights = oja(values, l_rate, initial_weights, epoch_count, e_tolerance)

diff= np.abs(np.array(pca.components_[0]) - oja_weights)

pc = pandas.DataFrame(data=[pca.components_[0], oja_weights, diff], columns=europe.columns.values[1:])
pc_title = pandas.DataFrame(data=[['PC1'], ['Oja'], ['Diff']], columns=['PC'])
pandas.concat([pc_title, pc], axis = 1)

Unnamed: 0,PC,Area,GDP,Inflation,Life.expect,Military,Pop.growth,Unemployment
0,PC1,0.124874,-0.500506,0.406518,-0.482873,0.188112,-0.475704,0.271656
1,Oja,-0.131902,0.499125,-0.412998,0.483684,-0.181898,0.473477,-0.267751
2,Diff,0.256776,0.999631,0.819517,0.966557,0.37001,0.94918,0.539407
