In [None]:
import numpy as np
import numpy.linalg as la
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale

In [None]:
def data_centering(A:np.ndarray)->np.ndarray:
  mean_cols = np.mean(A,axis=0)
  A_center = np.copy(A)
  for i in range(len(mean_cols)):
    A_center[:,i] -= mean_cols[i]
  return A_center

def get_Covariance(A:np.ndarray):
  M = A.shape[0]
  A_center = data_centering(A)
  
  return (1/(M-1))*A_center.T@A_center

def get_Variance_SVD(Sigma:np.ndarray, bound:float):
  if(len(Sigma.shape)!=2):
    vars = Sigma**2
  else:
    vars = np.diag(Sigma)**2
  for i in range(vars.shape[0]):
    accuracy = np.divide(sum(vars[:i]),vars.sum())
    if(accuracy)>=bound:
      print("=== Min number of principal cpmponents: "+ str(i))
      print("=== Accuracy: "+ str(accuracy))
      return vars[:i]


# if you want first 2 pca components projection
# set rank as 2
def PCA_projection(A, V, rank=0, data_point=-1):
  Vk = np.copy(V)
  if(rank!=0): Vk = V[:,:rank]
  projection = A@Vk
  if(0<=data_point<projection.shape[0]):
    return projection[data_point]
  return projection



In [None]:
# PCA Projection

X = np.array([[1.5553080511, -0.4284483500, -0.1093220174], [-5.0624356051, 0.2019304498, 0.7171028714], [2.1401685583, -2.4134296141, -0.3243502392], [-1.8931264890, 0.3713478731, -0.4466719918], [2.7663157486, 4.1721859548, -0.9064581591], [0.4937697360, -1.9035863137, 1.0696995360]])
U = np.array([[-0.2125560655, -0.1424636012, -0.0013756412], [0.7354516675, 0.2335001883, 0.1346940128], [-0.2420323759, -0.5277843028, -0.4422838252], [0.2502573309, 0.1618985775, -0.5323890493], [-0.5409489476, 0.6771667947, 0.1478551036], [0.0098283906, -0.4023176566, 0.6934993994]])
S = np.array([[6.7540690019, 0.0000000000, 0.0000000000], [0.0000000000, 5.2222172008, 0.0000000000], [0.0000000000, 0.0000000000, 1.1105358862]])
Vt = np.array([[-0.9678770471, -0.2012133394, 0.1507554766], [-0.2231028275, 0.9638036935, -0.1459711228], [0.1159273480, 0.1749160724, 0.9777347379]])# first 2 principal: rank = 2
# datapoint 3 : data_poimt = 3
PCA_projection(X, Vt.T, rank=2)

array([[-1.43561833, -0.74397587],
       [ 4.96729131,  1.2193887 ],
       [-1.63470337, -2.75620426],
       [ 1.69025528,  0.84546954],
       [-3.65360652,  3.53631208],
       [ 0.06638163, -2.10099019]])

In [None]:
# PCA Number of Components
U = np.array([[-0.2, 0.4, 0.0, 0.2, 0.4, 0.2, 0.5, -0.1, 0.1], [0.4, -0.5, -0.1, -0.1, -0.1, 0.2, 0.5, 0.1, 0.0], [0.2, -0.1, -0.2, 0.4, -0.1, -0.3, -0.4, 0.1, 0.1], [-0.2, 0.0, -0.1, -0.2, 0.1, 0.0, 0.0, 0.2, -0.5], [0.0, 0.4, -0.2, 0.0, -0.5, -0.1, 0.1, -0.1, 0.0], [-0.2, -0.2, 0.1, 0.1, -0.3, 0.1, 0.0, 0.3, -0.3], [0.2, 0.2, 0.0, -0.6, -0.1, 0.3, -0.3, -0.3, 0.0], [0.0, -0.2, -0.4, -0.3, 0.4, -0.3, 0.0, -0.3, -0.1], [0.0, -0.1, 0.3, 0.0, 0.1, -0.4, -0.1, -0.3, -0.2], [0.7, 0.3, 0.3, -0.1, 0.1, -0.4, 0.2, 0.2, -0.1], [-0.2, 0.1, -0.4, -0.4, 0.0, -0.4, 0.1, 0.5, 0.3], [-0.3, -0.1, 0.2, -0.1, -0.2, -0.3, 0.4, -0.2, -0.1], [0.2, 0.1, -0.5, 0.4, 0.0, 0.0, 0.1, -0.2, -0.3], [0.0, 0.0, -0.1, 0.0, -0.4, -0.2, 0.2, -0.4, 0.1], [0.1, -0.1, 0.1, 0.1, 0.0, 0.0, 0.1, -0.2, 0.6]])
S = np.array([[39,  0,  0,  0,  0,  0,  0,  0,  0], [ 0, 38,  0,  0,  0,  0,  0,  0,  0], [ 0,  0, 29,  0,  0,  0,  0,  0,  0], [ 0,  0,  0, 24,  0,  0,  0,  0,  0], [ 0,  0,  0,  0, 23,  0,  0,  0,  0], [ 0,  0,  0,  0,  0, 22,  0,  0,  0], [ 0,  0,  0,  0,  0,  0, 15,  0,  0], [ 0,  0,  0,  0,  0,  0,  0,  7,  0], [ 0,  0,  0,  0,  0,  0,  0,  0,  5]])
V = np.array([[-0.3, 0.5, -0.1, -0.6, 0.3, 0.2, -0.3, 0.4, 0.1], [-0.1, 0.0, 0.4, -0.1, -0.1, 0.3, -0.6, -0.6, 0.1], [0.3, -0.1, -0.3, 0.1, -0.6, 0.2, -0.4, 0.4, 0.2], [-0.4, -0.3, -0.4, -0.4, -0.3, 0.2, 0.2, -0.3, -0.3], [-0.8, -0.1, 0.0, 0.3, -0.2, -0.4, -0.2, 0.2, 0.3], [0.3, 0.1, -0.3, -0.3, 0.0, -0.7, -0.2, -0.3, 0.2], [0.0, 0.6, 0.3, -0.1, -0.6, 0.0, 0.4, -0.1, 0.2], [-0.1, 0.6, -0.5, 0.5, 0.1, 0.1, -0.1, -0.2, -0.3], [0.0, 0.0, -0.4, 0.1, 0.2, 0.4, 0.3, -0.2, 0.7]])

get_Variance_SVD(S, bound=0.85)

=== Min number of principal cpmponents: 5
=== Accuracy: 0.8624868282402529


array([1521, 1444,  841,  576,  529])

In [None]:
X = np.array([[2.442, 1.660, -0.231], [-0.553, -0.165, -0.552], [-1.506, -0.912, -1.073], [-0.682, 0.633, 1.419]])
X = data_centering(X)
la.svd(X)


(array([[-0.80079079, -0.28033413, -0.17362859,  0.5       ],
        [ 0.20215121, -0.19330302,  0.81961505,  0.5       ],
        [ 0.56264879, -0.38442237, -0.5344584 ,  0.5       ],
        [ 0.03599078,  0.85805953, -0.11152806,  0.5       ]]),
 array([3.50747228, 1.94893859, 0.00813094]),
 array([[-0.83798682, -0.52830673, -0.13663858],
        [-0.29961739,  0.23617226,  0.92436578],
        [ 0.45607841, -0.81554563,  0.35619912]]))

In [None]:
U = np.array([[-0.1, 0.0, 0.2, 0.0, 0.0, -0.1, -0.2, 0.4, -0.4, 0.2, -0.2, -0.2, 0.0,  0.0], [-0.2, -0.1, 0.1, -0.3, -0.1, 0.3, 0.2, 0.0, -0.1, -0.1, 0.0, -0.7, 0.3,  0.0], [-0.3, -0.1, -0.3, 0.1, 0.4, -0.2, -0.3, 0.5, 0.0, 0.1, 0.0, 0.0, 0.3,  0.2], [-0.4, 0.0, 0.0, 0.0, -0.2, 0.3, 0.0, -0.2, -0.5, 0.1, 0.1, 0.4, -0.1,  0.1], [0.1, -0.2, -0.1, 0.4, -0.1, 0.1, 0.1, 0.1, -0.1, 0.4, 0.1, -0.3, -0.6,  0.1], [0.0, -0.1, -0.1, 0.2, -0.5, -0.4, 0.3, 0.1, 0.0, 0.0, -0.1, 0.1, 0.2,  0.3], [0.4, 0.1, 0.2, -0.1, -0.1, 0.1, 0.0, 0.3, 0.1, -0.1, 0.7, 0.1, 0.2, 0.2], [0.2, -0.3, -0.4, 0.1, 0.2, -0.1, -0.1, -0.4, -0.1, 0.1, 0.1, 0.0, 0.1,  0.2], [0.3, -0.4, -0.1, -0.2, 0.3, -0.2, 0.0, -0.1, -0.1, -0.1, 0.1, -0.1,  -0.1, -0.3], [-0.2, 0.2, 0.4, 0.6, 0.2, 0.0, 0.0, 0.0, 0.1, -0.3, 0.2, 0.0, -0.1,  -0.2], [0.4, -0.2, 0.3, 0.3, -0.1, 0.3, 0.0, 0.0, -0.1, 0.1, -0.4, 0.1, 0.2,  0.2], [0.3, 0.3, -0.1, 0.2, 0.0, -0.1, -0.1, 0.0, -0.4, -0.1, -0.1, -0.1, 0.3,  -0.3], [0.1, -0.3, 0.2, -0.1, -0.3, 0.1, -0.7, 0.0, 0.2, 0.2, 0.0, 0.1, 0.0,  -0.2], [0.0, -0.1, -0.5, 0.3, -0.2, 0.5, -0.1, 0.1, 0.0, -0.3, 0.1, -0.1, 0.1,  -0.1], [-0.1, -0.1, 0.0, 0.0, -0.2, -0.2, -0.3, -0.1, 0.0, -0.7, -0.1, -0.2,  -0.2, 0.3], [-0.3, -0.1, 0.1, 0.3, 0.0, -0.2, -0.2, -0.5, 0.1, 0.3, 0.3, -0.2, 0.3,  0.1], [0.0, 0.2, -0.3, 0.1, -0.3, -0.1, 0.1, 0.0, 0.4, 0.1, -0.2, -0.1, 0.1,  -0.4], [-0.1, -0.4, 0.1, 0.1, 0.3, 0.2, 0.2, 0.1, 0.3, -0.1, -0.3, 0.2, 0.1,  0.0], [0.3, 0.5, -0.1, -0.1, 0.1, 0.2, -0.2, -0.2, 0.2, 0.1, -0.2, -0.1, -0.1,  0.5]])
S = np.array([[39,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0], [ 0, 37,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0], [ 0,  0, 35,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0], [ 0,  0,  0, 34,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0], [ 0,  0,  0,  0, 33,  0,  0,  0,  0,  0,  0,  0,  0,  0], [ 0,  0,  0,  0,  0, 31,  0,  0,  0,  0,  0,  0,  0,  0], [ 0,  0,  0,  0,  0,  0, 29,  0,  0,  0,  0,  0,  0,  0], [ 0,  0,  0,  0,  0,  0,  0, 27,  0,  0,  0,  0,  0,  0], [ 0,  0,  0,  0,  0,  0,  0,  0, 25,  0,  0,  0,  0,  0], [ 0,  0,  0,  0,  0,  0,  0,  0,  0, 19,  0,  0,  0,  0], [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 16,  0,  0,  0], [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 15,  0,  0], [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  7,  0], [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  3]])
V = np.array([[-0.5, 0.3, -0.3, 0.1, 0.1, 0.1, 0.1, 0.0, 0.5, -0.2, 0.4, -0.1, -0.1,  0.1], [0.1, -0.3, -0.2, 0.2, -0.2, 0.5, -0.3, -0.4, 0.2, -0.1, -0.3, 0.1, 0.0,  0.4], [-0.4, -0.4, -0.3, -0.3, -0.5, -0.1, 0.0, 0.2, -0.1, -0.3, -0.2, 0.1,  -0.2, -0.3], [-0.2, 0.2, -0.1, -0.3, -0.2, 0.0, -0.7, -0.2, -0.1, 0.3, 0.2, -0.2, 0.2,  -0.1], [-0.3, 0.0, -0.2, 0.0, -0.1, 0.3, 0.5, -0.1, -0.1, 0.6, -0.2, -0.3, 0.2,  0.0], [-0.2, 0.3, 0.3, -0.2, 0.1, -0.1, 0.2, -0.7, 0.0, -0.1, -0.3, 0.2, -0.2,  -0.2], [-0.5, -0.1, 0.5, 0.5, 0.1, -0.1, -0.2, 0.2, 0.1, 0.2, -0.2, 0.1, -0.1,  0.0], [0.2, 0.0, -0.2, 0.5, -0.1, -0.2, 0.0, -0.2, 0.3, 0.0, -0.1, 0.1, 0.5,  -0.5], [-0.3, -0.2, 0.2, -0.3, 0.2, 0.3, 0.0, 0.1, 0.0, -0.2, 0.1, 0.3, 0.6,  -0.1], [0.0, -0.3, 0.2, 0.0, -0.4, -0.3, 0.2, -0.3, 0.0, 0.1, 0.6, 0.2, 0.0,  0.3], [0.1, 0.6, 0.0, -0.1, -0.4, 0.2, 0.0, 0.3, 0.1, 0.2, -0.1, 0.6, 0.0, 0.0], [-0.1, 0.2, -0.1, 0.0, -0.2, -0.4, 0.0, 0.0, 0.0, -0.2, -0.3, -0.2, 0.4,  0.6], [-0.2, 0.2, -0.3, 0.4, 0.0, 0.1, 0.0, -0.1, -0.7, -0.2, 0.2, 0.2, 0.0,  0.0], [0.0, -0.1, -0.5, -0.1, 0.4, -0.4, -0.1, -0.1, 0.0, 0.4, -0.1, 0.5, -0.1,  0.1]])

get_Variance_SVD(S, .74)

=== Min number of principal cpmponents: 7
=== Accuracy: 0.7836021505376344


array([1521, 1369, 1225, 1156, 1089,  961,  841])

In [None]:
X = np.array([[1.364, 1.318, -0.428], [0.174, -0.028, 0.263], [-0.896, 1.344, -0.838], [-0.016, 0.076, -0.143]])
X = data_centering(X)
la.svd(X)

(array([[-0.69141343,  0.52116683,  0.01823721,  0.5       ],
        [-0.09402749, -0.58224482, -0.63415282,  0.5       ],
        [ 0.71260136,  0.47071557, -0.1436181 ,  0.5       ],
        [ 0.07283957, -0.40963758,  0.7595337 ,  0.5       ]]),
 array([1.63501434, 1.49902875, 0.19033631]),
 array([[-0.97803727,  0.03340759, -0.20573537],
        [ 0.12965329,  0.87037024, -0.47502176],
        [ 0.16319661, -0.49126325, -0.85558593]]))

In [None]:
X = np.array([[-0.595, 1.784, 1.130], [-1.819, 0.725, -1.020], [-0.782, 1.105, 1.849], [-0.050, 0.725, -0.118]])
X = data_centering(X)

la.svd(X)

(array([[-0.36014605,  0.08612348,  0.78286497, -0.5       ],
        [ 0.73910963,  0.4504256 , -0.02887446, -0.5       ],
        [-0.54459098,  0.29994947, -0.60286895, -0.5       ],
        [ 0.16562741, -0.83649856, -0.15112155, -0.5       ]]),
 array([2.37452941, 1.12138823, 0.6306227 ]),
 array([[-0.30008672, -0.24777088, -0.92117184],
        [-0.94820257,  0.1829746 ,  0.25967708],
        [ 0.10421063,  0.95138314, -0.28984523]]))

In [None]:
U = np.array([[-0.1, 0.0, 0.0, 0.1, 0.1, -0.5, 0.1, 0.0, -0.5, 0.4, 0.1], [-0.3, -0.3, -0.1, -0.4, 0.2, -0.1, -0.3, 0.0, 0.2, 0.2, -0.1], [0.0, 0.2, -0.6, 0.1, -0.1, 0.1, -0.5, -0.2, 0.0, -0.4, 0.1], [-0.2, 0.2, 0.0, -0.1, -0.6, 0.1, 0.0, 0.4, -0.3, -0.1, 0.1], [0.1, -0.5, -0.2, -0.2, 0.0, -0.4, -0.2, 0.4, -0.2, -0.2, 0.1], [0.1, -0.3, 0.2, -0.2, -0.3, -0.1, 0.1, -0.2, 0.2, -0.3, 0.0], [0.0, 0.3, 0.1, -0.2, -0.2, 0.1, -0.3, 0.1, -0.1, 0.3, -0.5], [0.5, -0.5, -0.1, 0.2, -0.1, 0.4, 0.0, -0.1, -0.3, 0.2, -0.3], [0.1, 0.1, 0.4, -0.1, 0.4, 0.4, 0.0, 0.3, -0.3, -0.4, 0.0], [-0.4, -0.1, 0.0, -0.2, 0.1, 0.1, 0.1, -0.5, -0.5, -0.3, -0.3], [-0.2, 0.0, -0.1, 0.6, 0.0, -0.2, 0.2, 0.1, 0.0, -0.3, -0.3], [-0.1, 0.1, -0.6, -0.3, 0.1, 0.1, 0.5, 0.3, 0.1, 0.0, -0.3], [0.4, 0.2, 0.1, -0.1, 0.4, -0.4, -0.2, 0.1, 0.1, -0.2, -0.4], [-0.4, -0.2, 0.3, 0.3, -0.1, 0.1, -0.2, 0.2, 0.2, 0.0, -0.4], [-0.1, -0.1, -0.1, 0.1, 0.4, 0.3, -0.2, 0.2, 0.0, 0.1, 0.3]])
S = np.array([[39,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0], [ 0, 37,  0,  0,  0,  0,  0,  0,  0,  0,  0], [ 0,  0, 35,  0,  0,  0,  0,  0,  0,  0,  0], [ 0,  0,  0, 32,  0,  0,  0,  0,  0,  0,  0], [ 0,  0,  0,  0, 30,  0,  0,  0,  0,  0,  0], [ 0,  0,  0,  0,  0, 15,  0,  0,  0,  0,  0], [ 0,  0,  0,  0,  0,  0, 13,  0,  0,  0,  0], [ 0,  0,  0,  0,  0,  0,  0, 12,  0,  0,  0], [ 0,  0,  0,  0,  0,  0,  0,  0,  7,  0,  0], [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  0], [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1]])
V = np.array([[-0.1, 0.1, 0.1, 0.1, 0.8, 0.3, -0.3, -0.1, 0.1, -0.4, -0.1], [0.4, -0.2, 0.1, -0.4, -0.3, 0.5, -0.5, -0.1, 0.2, 0.1, -0.3], [-0.6, -0.2, 0.0, 0.3, -0.3, 0.0, -0.4, 0.1, 0.3, -0.1, 0.1], [-0.3, 0.3, 0.5, -0.3, 0.0, 0.1, -0.1, 0.6, -0.2, 0.2, 0.0], [-0.1, -0.3, 0.3, -0.2, 0.0, 0.3, 0.2, -0.4, -0.1, 0.2, 0.7], [0.1, -0.5, 0.0, 0.0, 0.4, -0.5, -0.3, 0.1, 0.1, 0.5, 0.1], [0.5, -0.4, 0.2, 0.5, -0.1, 0.2, 0.1, 0.5, -0.1, -0.3, 0.2], [0.2, 0.3, 0.6, 0.1, -0.1, -0.2, 0.1, -0.1, 0.7, 0.1, 0.0], [0.1, 0.4, -0.4, 0.3, 0.1, 0.4, -0.1, 0.1, 0.1, 0.5, 0.2], [0.1, 0.2, 0.4, 0.4, -0.1, -0.1, -0.4, -0.3, -0.6, 0.1, -0.1], [0.2, 0.3, -0.2, -0.2, 0.0, -0.3, -0.4, 0.1, 0.0, -0.4, 0.5]])

get_Variance_SVD(S, .61)

=== Min number of principal cpmponents: 3
=== Accuracy: 0.6205700497662494


array([1521, 1369, 1225])