In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [3]:
# Load the Iris dataset
data = load_iris()
X = data.data  # Features
y = data.target  # Labels
target_names = data.target_names

# Step 1: Standardize the data (important for PCA)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

cov_matrix = np.cov(X_scaled, rowvar=False)  # Covariance matrix

In [6]:
X_scaled.shape  

(150, 4)

In [5]:
cov_matrix

array([[ 1.00671141, -0.11835884,  0.87760447,  0.82343066],
       [-0.11835884,  1.00671141, -0.43131554, -0.36858315],
       [ 0.87760447, -0.43131554,  1.00671141,  0.96932762],
       [ 0.82343066, -0.36858315,  0.96932762,  1.00671141]])

In [7]:
U, S, Vt = np.linalg.svd(cov_matrix)  # SVD decomposition
eigenvalues = S  # Singular values squared give eigenvalues
eigenvectors = Vt.T  # Rows of Vt are eigenvectors

In [10]:
eigenvectors

array([[-0.52106591, -0.37741762,  0.71956635,  0.26128628],
       [ 0.26934744, -0.92329566, -0.24438178, -0.12350962],
       [-0.5804131 , -0.02449161, -0.14212637, -0.80144925],
       [-0.56485654, -0.06694199, -0.63427274,  0.52359713]])

In [11]:
top_2_eigenvectors = eigenvectors[:, :2]  # Select the top 2 eigenvectors
X_pca = np.dot(X_scaled, top_2_eigenvectors)  # Project data  

In [13]:
X_pca.shape

(150, 2)