In [1]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
import numpy as np

# Step 1: Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Step 2: Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Compute the covariance matrix
cov_matrix = np.cov(X_scaled, rowvar=False)

# Step 4: Compute eigenvectors and eigenvalues
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Step 5: Select principal components
# Sort eigenvalues and corresponding eigenvectors in descending order
idx = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]

# Select the top two eigenvectors
top_eigenvectors = eigenvectors[:, :2]

# Step 6: Project data onto principal components
X_pca = np.dot(X_scaled, top_eigenvectors)

# Print the reduced dimensionality data
print("Reduced dimensionality data (first 5 samples):")
print(X_pca[:5])


Reduced dimensionality data (first 5 samples):
[[-2.26470281 -0.4800266 ]
 [-2.08096115  0.67413356]
 [-2.36422905  0.34190802]
 [-2.29938422  0.59739451]
 [-2.38984217 -0.64683538]]
