###### https://matrixcalc.org/en/vectors.html#eigenvectors(%7B%7B1295%2e2,280%2e5,562%2e4%7D,%7B280%2e5,112,83%2e5%7D,%7B562%2e4,83%2e5,290%2e8%7D%7D)

###### https://calculator.academy/normalize-vector-calculator/#f1p1|f2p0

In [76]:
import time
from datetime import timedelta
import numpy as np
import pandas as pd
import sklearn.decomposition
import scipy.linalg as la

Read dataset

In [77]:
# dataset = pd.read_csv('df_original_100000.csv')
dataset = pd.DataFrame([[71, 29, 33, 1], [75, 19, 43, 1], [7, 9, 3, 1], [13, 21, 7, 0], [3, 2, 17, 1]])
dataset.columns = ['A', 'B', 'C', 'Label']
print(dataset)

    A   B   C  Label
0  71  29  33      1
1  75  19  43      1
2   7   9   3      1
3  13  21   7      0
4   3   2  17      1


Erase label column

In [78]:
y = np.array(dataset['Label'])
del dataset['Label']

X = np.array(dataset)

print('Dataset', X)

Dataset [[71 29 33]
 [75 19 43]
 [ 7  9  3]
 [13 21  7]
 [ 3  2 17]]


In [79]:
pca = sklearn.decomposition.PCA()
X_transformed = pca.fit_transform(X)
print(X_transformed)

[[ 40.80227232  -6.4478562   -0.44690464]
 [ 46.4508894    7.06558354   0.76967717]
 [-32.38220261  -2.65694898   4.61882848]
 [-23.11542429  -9.99949491  -2.87876039]
 [-31.75553483  12.03871656  -2.06284062]]


Calculate the covariance matrix

In [80]:
X_centered = X - np.mean(X, axis=0)
cov_matrix = np.cov(X_centered, rowvar=False)
print('Covariance Matrix:\n', cov_matrix)
print('Cetered Data:\n', X_centered)

Covariance Matrix:
 [[1295.2  280.5  562.4]
 [ 280.5  112.    83.5]
 [ 562.4   83.5  290.8]]
Cetered Data:
 [[ 37.2  13.   12.4]
 [ 41.2   3.   22.4]
 [-26.8  -7.  -17.6]
 [-20.8   5.  -13.6]
 [-30.8 -14.   -3.6]]


Calculate the eigenvalues and eigenvectors of the covariance matrix

In [81]:
evals, evecs = la.eig(cov_matrix)

# sort them
idx = np.argsort(evals)[::-1]

#each columns of this matrix is an eingvector
evecs = evecs[:,idx]
evals = evals[idx]

variance_retained=np.cumsum(evals)/np.sum(evals)

print('Variance Retained:\n', variance_retained)
print('Eigenvalues:\n', evals)
print('Eignvectors:\n', evecs)

Variance Retained:
 [0.94432486+0.j 0.99489572+0.j 1.        +0.j]
Eigenvalues:
 [1603.4636077 +0.j   85.86932329+0.j    8.66706901+0.j]
Eignvectors:
 [[ 0.89779641 -0.11748943  0.42445004]
 [ 0.1910642  -0.76443552 -0.61573761]
 [ 0.39680735  0.63390423 -0.66385944]]


Dimensionality Reduction

In [82]:
X_transformed=np.dot(evecs.T, X_centered.T).T
print('Transformed Data:\n', X_transformed)

Transformed Data:
 [[ 40.80227232  -6.4478562   -0.44690464]
 [ 46.4508894    7.06558354   0.76967717]
 [-32.38220261  -2.65694898   4.61882848]
 [-23.11542429  -9.99949491  -2.87876039]
 [-31.75553483  12.03871656  -2.06284062]]


Generate random eignvectors with the same mean and standard deviation values

In [143]:
new_evecs = []

for j in range(len(evecs[0])):
    v = np.array(evecs[ : , j:j+1 ])
    u = np.random.normal(loc=v.mean(axis=0), scale=v.std(axis=0), size=len(v))
    new_evecs.append(u)
    print(u)
new_evecs = np.array(new_evecs).T

print('Generated Eignvectors:\n', new_evecs)

[0.58035101 0.42233305 0.55184285]
[ 0.31608178  0.20657425 -0.17651367]
[-1.25837269 -0.75393667 -1.1197669 ]
Generated Eignvectors:
 [[ 0.58035101  0.31608178 -1.25837269]
 [ 0.42233305  0.20657425 -0.75393667]
 [ 0.55184285 -0.17651367 -1.1197669 ]]


Go Back to the Original Dimension

In [112]:
original_dimension = np.dot(X_transformed, new_evecs.T) 
original_dimension += np.mean(X, axis=0)
print(original_dimension)


[[48.12175239 21.87124564 13.81476906]
 [59.72291886 35.62896197 21.67012472]
 [16.37565709  7.26981769 17.40857472]
 [16.90741796 -0.72385265 17.5942345 ]
 [27.87225369 15.95382736 32.51229699]]
