In [1]:
import numpy as np

def pca(X, num_components):
    """
    Performs Principal Component Analysis (PCA) on the dataset X and returns
    the projected data along with eigenvalues and eigenvectors.

    Parameters:
        X (numpy.ndarray): Input data of shape (n_samples, n_features).
        num_components (int): The number of principal components to retain.

    Returns:
        X_reduced (numpy.ndarray): Data projected onto the top principal components.
        sorted_eigenvalues (numpy.ndarray): Eigenvalues sorted in descending order.
        sorted_eigenvectors (numpy.ndarray): Eigenvectors corresponding to the sorted eigenvalues.
    """
    # Step 1: Mean center the data (zero mean for each feature)
    X_meaned = X - np.mean(X, axis=0)

    # Step 2: Compute the covariance matrix of the mean-centered data
    # Note: rowvar=False indicates that each column represents a feature.
    cov_matrix = np.cov(X_meaned, rowvar=False)

    # Step 3: Compute eigenvalues and eigenvectors of the covariance matrix.
    # Using np.linalg.eigh because the covariance matrix is symmetric.
    eigen_values, eigen_vectors = np.linalg.eigh(cov_matrix)

    # Step 4: Sort the eigenvalues and corresponding eigenvectors in descending order.
    sorted_index = np.argsort(eigen_values)[::-1]
    sorted_eigenvalues = eigen_values[sorted_index]
    sorted_eigenvectors = eigen_vectors[:, sorted_index]

    # Step 5: Select the top 'num_components' eigenvectors (principal components).
    eigenvector_subset = sorted_eigenvectors[:, :num_components]

    # Step 6: Transform the data using the selected eigenvectors.
    # The new representation is given by projecting the mean-centered data onto the principal components.
    X_reduced = np.dot(X_meaned, eigenvector_subset)

    return X_reduced, sorted_eigenvalues, sorted_eigenvectors

def main():
    # Example dataset: Each row is an observation and each column is a feature.
    X = np.array([[2.5, 2.4],
                  [0.5, 0.7],
                  [2.2, 2.9],
                  [1.9, 2.2],
                  [3.1, 3.0],
                  [2.3, 2.7],
                  [2.0, 1.6],
                  [1.0, 1.1],
                  [1.5, 1.6],
                  [1.1, 0.9]])

    # Number of principal components to retain (e.g., reduce to 1 dimension)
    num_components = 1

    # Perform PCA on the dataset
    X_reduced, eigenvalues, eigenvectors = pca(X, num_components)

    # Print the results
    print("Original Data:\n", X)
    print("\nReduced Data (Projected onto {} principal component{}):\n".format(num_components, "s" if num_components > 1 else ""), X_reduced)
    print("\nEigenvalues (sorted):\n", eigenvalues)
    print("\nEigenvectors (sorted by eigenvalues):\n", eigenvectors)

if __name__ == "__main__":
    main()


Original Data:
 [[2.5 2.4]
 [0.5 0.7]
 [2.2 2.9]
 [1.9 2.2]
 [3.1 3. ]
 [2.3 2.7]
 [2.  1.6]
 [1.  1.1]
 [1.5 1.6]
 [1.1 0.9]]

Reduced Data (Projected onto 1 principal component):
 [[ 0.82797019]
 [-1.77758033]
 [ 0.99219749]
 [ 0.27421042]
 [ 1.67580142]
 [ 0.9129491 ]
 [-0.09910944]
 [-1.14457216]
 [-0.43804614]
 [-1.22382056]]

Eigenvalues (sorted):
 [1.28402771 0.0490834 ]

Eigenvectors (sorted by eigenvalues):
 [[ 0.6778734  -0.73517866]
 [ 0.73517866  0.6778734 ]]
