In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

def pca(X, num_components):

    cov_matrix = np.cov(X, rowvar=False)

    eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_indices]
    eigenvectors = eigenvectors[:, sorted_indices]

    # Select the top 'num_components' eigenvectors
    principal_components = eigenvectors[:, :num_components]

    # Project the standardized data onto the principal components
    transformed_data = np.dot(X, principal_components)

    return transformed_data, eigenvalues, principal_components

data = pd.read_csv("play_tennis.csv")

# Convert categorical features to numerical values using label encoding
label_encoder = LabelEncoder()
for column in data.select_dtypes(include=['object']).columns:
    data[column] = label_encoder.fit_transform(data[column])

X = data.drop('play', axis=1).values
y = data['play'].values
# Apply PCA
num_components = 2
transformed_data, eigenvalues, principal_components = pca(X, num_components)

print("Original data shape:", X.shape)
print("Transformed data shape:", transformed_data.shape)
print("Eigenvalues:", eigenvalues)
print("Principal components:")
print(principal_components)


Original data shape: (14, 5)
Transformed data shape: (14, 2)
Eigenvalues: [17.72778608  0.75207765  0.56455197  0.25996934  0.16264793]
Principal components:
[[-0.99333695 -0.05833223]
 [-0.00625889 -0.81081696]
 [ 0.1127664  -0.51332595]
 [-0.02117184  0.268104  ]
 [-0.00883338 -0.06156241]]
