# PCA (Principle Component Analysis)

## Importing the necessary Libraries

In [2]:
import numpy as np
import pandas as pd

## Step 1: Define the data (5 students, 3 subjects)

In [3]:
data = {
    'math': [90, 90, 60, 60, 30],
    'eng':  [60, 90, 60, 60, 30],
    'art':  [90, 30, 60, 90, 30]
}

In [4]:
df = pd.DataFrame(data)
X = df.values  # 5x3 matrix

In [5]:
print("Original data (5 students × 3 subjects):")
print(X)

Original data (5 students × 3 subjects):
[[90 60 90]
 [90 90 30]
 [60 60 60]
 [60 60 90]
 [30 30 30]]


## Step 2: Center the data (subtract feature means)

In [6]:
mu = X.mean(axis=0)
X_centered = X - mu

In [7]:
print("\nStep 1: Centered data (mean subtracted):")
print(X_centered)


Step 1: Centered data (mean subtracted):
[[ 24.   0.  30.]
 [ 24.  30. -30.]
 [ -6.   0.   0.]
 [ -6.   0.  30.]
 [-36. -30. -30.]]


## Optional: Standardize (zero mean, unit variance)

In [8]:
std = X_centered.std(axis=0, ddof=1)  # sample std
X_scaled = X_centered / std

In [9]:
print("\nStep 2: Standardized data (centered and scaled):")
print(X_scaled)


Step 2: Standardized data (centered and scaled):
[[ 0.95618289  0.          1.        ]
 [ 0.95618289  1.41421356 -1.        ]
 [-0.23904572  0.          0.        ]
 [-0.23904572  0.          1.        ]
 [-1.43427433 -1.41421356 -1.        ]]


## Step 3: Compute covariance matrix

In [12]:
n = X_scaled.shape[0]
Sigma = (X_scaled.T @ X_scaled) / (n - 1)  # 3x3 covariance

In [13]:
print("\nStep 3: Covariance matrix Σ:")
print(Sigma)


Step 3: Covariance matrix Σ:
[[1.         0.84515425 0.29880715]
 [0.84515425 1.         0.        ]
 [0.29880715 0.         1.        ]]


## Step 4: Compute eigenvalues and eigenvectors


In [None]:
eigenvals, W = np.linalg.eigh(Sigma)  # W has columns = eigenvectors

# Sort in descending order of eigenvalues
idx = np.argsort(eigenvals)[::-1]
eigenvals = eigenvals[idx]
W = W[:, idx]

In [15]:
print("\nStep 4: Eigenvalues (variances):")
print(eigenvals)


Step 4: Eigenvalues (variances):
[1.89642146 1.         0.10357854]


In [16]:
print("\nStep 4: Eigenvectors (principal components, columns):")
print(W)


Step 4: Eigenvectors (principal components, columns):
[[-0.70710678  0.         -0.70710678]
 [-0.66666667 -0.33333333  0.66666667]
 [-0.23570226  0.94280904  0.23570226]]


## Step 5: Choose number of components (keep 2)

In [17]:
k = 2
W_k = W[:, :k]  # 3x2 projection matrix

print("\nStep 5: Projection matrix W_k (first 2 PCs):")
print(W_k)


Step 5: Projection matrix W_k (first 2 PCs):
[[-0.70710678  0.        ]
 [-0.66666667 -0.33333333]
 [-0.23570226  0.94280904]]


## Step 6: Project data onto 2D PCA space

In [18]:
X_reduced = X_scaled @ W_k  # 5x2

In [19]:
print("\nStep 6: PCA‑reduced data (5 students × 2 PC components):")
print(X_reduced)


Step 6: PCA‑reduced data (5 students × 2 PC components):
[[-0.91182566  0.94280904]
 [-1.38323018 -1.41421356]
 [ 0.16903085  0.        ]
 [-0.06667141  0.94280904]
 [ 2.19269641 -0.47140452]]


### Optional: Show explained variance ratios

In [20]:
total_var = eigenvals.sum()
explained_ratio = eigenvals[:k] / total_var
print("\nExplained variance ratio (PC1, PC2):")
print(explained_ratio)


Explained variance ratio (PC1, PC2):
[0.63214049 0.33333333]
