### Principal Component Analysis

In [19]:
import numpy as np

dtypes = [('Attribute1','float'),('Attribute2','float')]

values = [(4,11), (8,4), (13,5), (7,14)]

### Convert dtypes to array

In [23]:
# Create a structured numpy array
array = np.array(values, dtype=dtypes)

# Print only the Attribute1 values
print(array['Attribute1'])
x1 = array['Attribute1']
x2 = array['Attribute2']

[ 4.  8. 13.  7.]


In [25]:
print(x1)
print(x2)

[ 4.  8. 13.  7.]
[11.  4.  5. 14.]


### Find mean of each attribute x1 and x2

In [31]:
mean_x1 = np.mean(x1)
mean_x1

8.0

In [33]:
mean_x2 = np.mean(x2)
mean_x2

8.5

### Find covariance matrix 

In [76]:
covar_x1_x2 = np.cov(x1,x2)
covar_x1_x2

array([[ 14., -11.],
       [-11.,  23.]])

### Find eigen value

In [125]:
eigenVal, eigenVect = np.linalg.eig(covar_x1_x2)
print(eigenVal)
print('\n', eigenVect)

[ 6.61513568 30.38486432]

 [[-0.83025082  0.55738997]
 [-0.55738997 -0.83025082]]


In [149]:
# Step 4: Sort eigenvectors by the magnitude of eigenvalues in descending order
idx = eigenVal.argsort()[::-1]
eigenvalues = eigenVal[idx]
eigenvectors = eigenVect[:, idx]
print("eigenvalues : ", eigenvalues)
print("eigenvectors : \n", eigenvectors)

eigenvalues :  [30.38486432  6.61513568]
eigenvectors : 
 [[ 0.55738997 -0.83025082]
 [-0.83025082 -0.55738997]]


### Find Eigen vector

In [139]:
    # unit_eigenvectors = np.array([vec / np.linalg.norm(vec) for vec in eigenvectors.T]).T
for vec in eigenvectors[0].T:
 print("i \n", vec)

i 
 0.5573899686393251
i 
 -0.8302508192469622


### Principal component of (4, 11) is 

In [207]:
eigenvectors[0].T[0]*(x1[0]-mean_x1)+eigenvectors[0].T[1]*(x2[0]-mean_x2)

-4.3051869226747055

### Principal component of all 

In [201]:
for i in range(len(x1)):
        print("Attribute1: ",x1[i],"\t Attribute2: ", x2[i], "\t Principal Component: ", eigenvectors[0].T[0]*(x1[i]-mean_x1)+eigenvectors[0].T[1]*(x2[i]-mean_x2))

Attribute1:  4.0 	 Attribute2:  11.0 	 Principal Component:  -4.3051869226747055
Attribute1:  8.0 	 Attribute2:  4.0 	 Principal Component:  3.73612868661133
Attribute1:  13.0 	 Attribute2:  5.0 	 Principal Component:  5.692827710560993
Attribute1:  7.0 	 Attribute2:  14.0 	 Principal Component:  -5.1237694744976165


### Using scikit library 

In [113]:
from sklearn.decomposition import PCA
data = np.array([[4,11], [8,4], [13,5], [7,14]])
data 

array([[ 4, 11],
       [ 8,  4],
       [13,  5],
       [ 7, 14]])

In [115]:
# Perform PCA (2 attributes)
pca = PCA(n_components=2)
pca.fit(data)

In [119]:
# Principal components
principal_components = pca.components_

In [121]:
transformed_data = pca.transform(data)

# Print the results
print("Principal Components:\n", principal_components)
print("\nTransformed Data:\n", transformed_data)


Principal Components:
 [[ 0.55738997 -0.83025082]
 [-0.83025082 -0.55738997]]

Transformed Data:
 [[-4.30518692  1.92752836]
 [ 3.73612869  2.50825486]
 [ 5.69282771 -2.20038921]
 [-5.12376947 -2.23539401]]
