In [8]:
import numpy as np
import pandas as pd
import sklearn 
from sklearn.preprocessing import StandardScaler

In [9]:
X = np.array([4,8,13,7])

In [10]:
Y = np.array([11,4,5,14])

In [11]:
def covariance(x, y):
    
    if not isinstance(x, np.ndarray) or not isinstance(y, np.ndarray):
        return None  

    if x.shape != y.shape: 
        return None  

    if np.isnan(x).any() or np.isnan(y).any():  
        return np.nan

    n = len(x)

    if n == 0:  
        return np.nan  

    x_mean = np.mean(x)
    y_mean = np.mean(y)

    covariance = np.sum((x - x_mean) * (y - y_mean)) / (n - 1)  

    return covariance


In [12]:
cov_xy = covariance(X,Y)
cov_xx = covariance(X,X)
cov_yy = covariance(Y,Y)
print(cov_xy,cov_xx,cov_yy)

-11.0 14.0 23.0


In [13]:
cov_matrix = np.array([[cov_xx,cov_xy],[cov_xy,cov_yy]])
cov_matrix

array([[ 14., -11.],
       [-11.,  23.]])

In [14]:
eigen_values ,eigen_vectors = np.linalg.eig(cov_matrix)
print(f" eigen vector{eigen_vectors}") 
print('\n')
print("ei{eigen_values}")

[[-0.83025082  0.55738997]
 [-0.55738997 -0.83025082]]


[ 6.61513568 30.38486432]


In [15]:
def pca(eigen_values, eigen_vectors, X, Y, k=2): 
    
    if not isinstance(eigen_values, np.ndarray) or not isinstance(eigen_vectors, np.ndarray) or not isinstance(X, np.ndarray) or not isinstance(Y, np.ndarray):
        print("Error: Inputs must be NumPy arrays.")
        return None, None

    if eigen_values.ndim != 1 or eigen_vectors.ndim != 2:
        print("Error: eigen_values must be 1D, eigen_vectors must be 2D.")
        return None, None

    if eigen_vectors.shape[0] != eigen_vectors.shape[1]:
        print("Error: eigen_vectors must be a square matrix.")
        return None, None

    if k <= 0 or k > eigen_values.shape[0]:
        print("Error: k must be a positive integer less than or equal to the number of eigenvalues.")
        return None, None

    if X.shape != Y.shape:
        print("Error: X and Y must have the same shape.")
        return None, None

    sorted_indices = np.argsort(eigen_values)[::-1]
    sorted_eigen_values = eigen_values[sorted_indices]
    sorted_eigen_vectors = eigen_vectors[:, sorted_indices]

    data = np.array([X, Y]).T
    scaler = StandardScaler()  
    scaled_data = scaler.fit_transform(data)

    top_k_eigenvectors = sorted_eigen_vectors[:, :k] 
    principal_components = np.dot(scaled_data, top_k_eigenvectors)

    explained_variance_ratio = sorted_eigen_values[:k] / np.sum(eigen_values)

    return principal_components, explained_variance_ratio



In [16]:
principal_components, explained_variance_ratio = pca(eigen_values, eigen_vectors, X, Y, k=2)
if principal_components is not None:
    print("Principal Components:\n", principal_components)
    print("\nExplained Variance Ratio:\n", explained_variance_ratio)

Principal Components:
 [[-1.18780938  0.68937453]
 [ 0.89955408  0.6039168 ]
 [ 1.55972457 -0.81139176]
 [-1.27146926 -0.48189957]]

Explained Variance Ratio:
 [0.82121255 0.17878745]
