In [None]:
import numpy as np

def pca(X, num_components):
    """
    X is the original data matrix with shape (n_samples, n_features)
    num_components is the number of principal components you want to keep
    X_reduced will be the data transformed with 'num_components' features
    """
    # Step 1: Standardize the dataset
    X_standardized = (X - np.mean(X, axis=0)) / np.std(X, axis=0)  # n_samples, n_features

    # Step 2: Compute the covariance matrix
    covariance_matrix = np.cov(X_standardized.T)  # 

    # Step 3: Compute the eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

    # Step 4: Sort eigenvalues and eigenvectors
    sorted_indices = np.argsort(eigenvalues)[::-1]
    sorted_eigenvalues = eigenvalues[sorted_indices]
    sorted_eigenvectors = eigenvectors[:, sorted_indices]

    # Step 5: Choose the top 'num_components' eigenvectors
    eigenvector_subset = sorted_eigenvectors[:, 0:num_components]

    # Step 6: Transform the data
    X_reduced = np.dot(eigenvector_subset.T, X_standardized.T).T

    return X_reduced