In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

In [4]:
np.random.seed(42)
X, _ = make_classification(
    n_samples=500,
    n_features=50,
    n_informative=30,
    n_redundant=10,
    random_state=42
)
print(X[:3])
print(X.shape)

[[ 9.71024813e+00 -1.33379755e+00  2.72416424e+00 -5.46687822e-01
   3.97404033e-01  1.37451971e+00  1.51838648e+01  5.89023080e-01
   1.25004029e+00 -7.85867623e-01  2.18369324e+00 -5.47233940e+00
  -9.74645867e-01 -1.59843585e+00  4.41002631e+00  8.00268737e-01
  -1.04786631e+00 -3.76392848e-01  1.86357335e+00 -3.43568958e+00
  -1.34744251e+00 -1.35885321e+00  1.10326470e+00  4.57391290e-02
   1.99154853e+00  3.15801187e+00 -8.78096937e+00  1.64582294e+00
  -3.59888034e+00  3.49941835e-01  1.25393180e+00  3.54375120e+00
   6.69441385e+00  2.91168554e+00  1.43594648e+01  5.12583153e-01
   4.24372418e+00 -1.80649937e+01  1.86871355e+00  5.89947402e-01
  -1.85165696e+00 -1.05448202e+01 -4.30024219e-01  2.47089777e+00
  -2.00258121e+00 -6.68384212e+00  5.80421940e-01 -1.07166395e+01
  -1.34615433e-01  4.10194810e+00]
 [-5.35070012e-01  3.26638114e+00  1.79949587e+00 -8.75209201e-01
   2.34801851e+00 -1.83820231e+00  9.66776042e+00  3.58909981e-01
  -2.09784474e-01  9.04385701e+00  4.6410

In [5]:
##Normalize
X = X - np.mean(X, axis = 0)

In [10]:
##Generate Covariance matrix
cov_matrix = np.cov(X, rowvar=False)
print(cov_matrix.shape)

(50, 50)


In [13]:
##Eigen Decomposition
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print(eigenvalues.shape)
print(eigenvectors.shape)
print(eigenvectors[0].shape)

(50,)
(50, 50)
(50,)


In [14]:
##Taking the eigen vectors with highest k eigen values
indexes = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[indexes]
eigenvectors = eigenvectors[:, indexes]
k = 3
eigenvectors = eigenvectors[:, :k]

In [16]:
##PCA
X = np.dot(X, eigenvectors)
print(X[:3])
print(X.shape) ##Reduced to dimension of k

[[ 14.10344289+0.j  21.96660462+0.j  -2.10484598+0.j]
 [  9.58991958+0.j  15.05233052+0.j  -9.85981932+0.j]
 [-15.04701561+0.j   0.72760985+0.j   0.2337782 +0.j]]
(500, 3)
