In [17]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

np.random.seed(23)

mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)

df = pd.DataFrame(class1_sample, columns=['feature1','feature2','feature3'])
df['target'] = 1

mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)

df1 = pd.DataFrame(class2_sample, columns=['feature1','feature2','feature3'])
df1['target'] = 0

# FIX: Use concat instead of append
df = pd.concat([df, df1], ignore_index=True)

df = df.sample(40)


In [18]:
df.head()

Unnamed: 0,feature1,feature2,feature3,target
2,-0.367548,-1.13746,-1.322148,1
34,0.177061,-0.598109,1.226512,0
14,0.420623,0.41162,-0.071324,1
11,1.968435,-0.547788,-0.679418,1
12,-2.50623,0.14696,0.606195,1


In [21]:
df.shape

(40, 4)

In [23]:
X = df[['feature1','feature2','feature3']].values
y = df['target'].values

# Standardize (VERY IMPORTANT for PCA)
scaler = StandardScaler()
X_std = scaler.fit_transform(X)

In [43]:
X_std.T

array([[-7.00808909e-01, -2.24480873e-01, -1.14564810e-02,
         1.34229583e+00, -2.57134993e+00,  8.67117739e-01,
         1.56619581e+00, -9.74766674e-02, -1.01191670e+00,
         2.09004923e+00, -1.59495163e+00, -6.69382831e-01,
         2.04020404e-01, -1.26266138e+00, -4.27309418e-02,
         1.08727791e+00, -1.68465650e-01, -4.19083565e-01,
        -1.24747084e+00,  8.78271280e-01, -1.25049533e-01,
         2.60757039e-01, -1.27833370e+00, -7.47422684e-01,
        -1.04782923e+00,  1.37957585e+00,  4.06862164e-01,
         5.04227153e-01,  4.50354319e-01,  8.58528850e-01,
         7.14580368e-01, -2.00362737e-01,  2.75623104e-01,
         5.34963139e-01, -1.98695948e+00, -2.13040769e-01,
        -3.99055844e-01,  7.29021846e-01,  6.99852972e-01,
         1.17071536e+00],
       [-1.52558622e+00, -1.01075678e+00, -4.69345322e-02,
        -9.62723488e-01, -2.99561164e-01,  9.35790500e-01,
        -2.19914261e-01, -3.82240727e-01,  9.54984015e-01,
        -7.57570343e-01,  1.96

In [25]:
# 1) Covariance matrix
cov_mat = np.cov(X_std.T)

# 2) Eigenvalues & Eigenvectors
eig_vals, eig_vecs = np.linalg.eig(cov_mat)

print("Eigenvalues (Eigen-decomposition):")
print(eig_vals)
print("\nEigenvectors:")
print(eig_vecs)

# 3) Project data
X_pca_eig = X_std @ eig_vecs


Eigenvalues (Eigen-decomposition):
[1.3536065  0.94557084 0.77774573]

Eigenvectors:
[[-0.53875915 -0.69363291  0.47813384]
 [-0.65608325 -0.01057596 -0.75461442]
 [-0.52848211  0.72025103  0.44938304]]


In [37]:
cov_mat

array([[1.02564103, 0.20478114, 0.080118  ],
       [0.20478114, 1.02564103, 0.19838882],
       [0.080118  , 0.19838882, 1.02564103]])

In [39]:
np.linalg.eig(cov_mat)

EigResult(eigenvalues=array([1.3536065 , 0.94557084, 0.77774573]), eigenvectors=array([[-0.53875915, -0.69363291,  0.47813384],
       [-0.65608325, -0.01057596, -0.75461442],
       [-0.52848211,  0.72025103,  0.44938304]]))

In [27]:
#---------------

In [33]:
# SVD
U, S, VT = np.linalg.svd(X_std, full_matrices=False)

# Eigenvectors (principal components)
eigenvectors_svd = VT.T   # columns = eigenvectors

# Eigenvalues
n_samples = X_std.shape[0]
eigenvalues_svd = (S ** 2) / (n_samples - 1)

print("Eigenvalues from SVD:")
print(eigenvalues_svd)

print("\nEigenvectors from SVD:")
print(eigenvectors_svd)


Eigenvalues from SVD:
[1.3536065  0.94557084 0.77774573]

Eigenvectors from SVD:
[[ 0.53875915 -0.69363291 -0.47813384]
 [ 0.65608325 -0.01057596  0.75461442]
 [ 0.52848211  0.72025103 -0.44938304]]


In [45]:
np.linalg.svd(X_std, full_matrices=False)

SVDResult(U=array([[-0.31695102, -0.12475461, -0.00546672],
       [-0.07218339,  0.08566607, -0.15908608],
       [-0.05233832, -0.0756576 ,  0.0475694 ],
       [-0.07353183, -0.29209142, -0.15181949],
       [-0.22164836,  0.28781743,  0.18659806],
       [ 0.11778046, -0.15125159,  0.08773494],
       [ 0.12980968, -0.12383192, -0.20371938],
       [-0.15115137, -0.16660259,  0.0788221 ],
       [-0.03694946,  0.0354081 ,  0.27271205],
       [ 0.21251381, -0.03204649, -0.42653059],
       [-0.05730366,  0.25230054,  0.11692455],
       [-0.23315735,  0.07487551, -0.21216549],
       [-0.11477435, -0.1732672 ,  0.02906419],
       [-0.06578052,  0.19234119,  0.07404611],
       [ 0.0219067 , -0.07692407,  0.17183893],
       [ 0.3539981 , -0.20839785,  0.44668307],
       [-0.18571164, -0.06222693, -0.11255263],
       [-0.02977789, -0.03251148,  0.16688698],
       [-0.11257382,  0.19823206, -0.01070496],
       [ 0.14562714, -0.05991208, -0.02121149],
       [ 0.04587698,  0.0464

In [31]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
X_pca_sklearn = pca.fit_transform(X_std)

print("\nEigenvalues (sklearn PCA):")
print(pca.explained_variance_) 

print("\nPCA Components (sklearn):")
print(pca.components_)



Eigenvalues (sklearn PCA):
[1.3536065  0.94557084 0.77774573]

PCA Components (sklearn):
[[ 0.53875915  0.65608325  0.52848211]
 [-0.69363291 -0.01057596  0.72025103]
 [-0.47813384  0.75461442 -0.44938304]]


In [47]:
#---------------------------

In [51]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

np.random.seed(23)

mu_vec1 = np.array([0,0,0])
cov_mat1 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class1_sample = np.random.multivariate_normal(mu_vec1, cov_mat1, 20)

df = pd.DataFrame(class1_sample, columns=['feature1','feature2','feature3'])
df['target'] = 1

mu_vec2 = np.array([1,1,1])
cov_mat2 = np.array([[1,0,0],[0,1,0],[0,0,1]])
class2_sample = np.random.multivariate_normal(mu_vec2, cov_mat2, 20)

df1 = pd.DataFrame(class2_sample, columns=['feature1','feature2','feature3'])
df1['target'] = 0

# Combine both classes
df = pd.concat([df, df1], ignore_index=True)
df = df.sample(40, random_state=23)   # shuffle

# -----------------------------
# 1) Take only features (X)
# -----------------------------
X = df[['feature1', 'feature2', 'feature3']].values   # shape: (40, 3)

# (Optional but standard for PCA) – standardize
scaler = StandardScaler()
X_std = scaler.fit_transform(X)   # centered + scaled

# We'll do SVD of X_std (40 x 3) **without** np.linalg.svd

# -----------------------------
# 2) Compute X^T X  (3 x 3)
# -----------------------------
XtX = X_std.T @ X_std   # Gram matrix

# -----------------------------
# 3) Eigen-decomposition of X^T X
#    XtX = V * diag(eigvals) * V^T
# -----------------------------
# XtX is symmetric, so use eigh (for symmetric matrices)
eigvals, eigvecs = np.linalg.eigh(XtX)

# Sort eigenvalues (and eigenvectors) in descending order
idx = np.argsort(eigvals)[::-1]
eigvals = eigvals[idx]
eigvecs = eigvecs[:, idx]

# -----------------------------
# 4) Build Sigma (singular values)
#    singular_values^2 = eigenvalues
# -----------------------------
# Numerical safety: clip tiny negative values to 0
eigvals_clipped = np.clip(eigvals, 0, None)
singular_values = np.sqrt(eigvals_clipped)   # shape: (3,)

Sigma = np.diag(singular_values)            # shape: (3, 3)

# Right singular vectors (V)
V = eigvecs                                 # shape: (3, 3)

# -----------------------------
# 5) Compute U = X_std * V * Sigma^{-1}
# -----------------------------
# Handle possible zero singular values
tol = 1e-12
inv_sing = np.zeros_like(singular_values)
nonzero_mask = singular_values > tol
inv_sing[nonzero_mask] = 1.0 / singular_values[nonzero_mask]

# U = X_std @ V @ diag(1/sigma)
U = X_std @ V * inv_sing   # broadcasting: each column scaled by 1/sigma_i

# -----------------------------
# 6) Check reconstruction X_std ≈ U Sigma V^T
# -----------------------------
X_recon = U @ Sigma @ V.T

print("Eigenvalues (of X^T X):")
print(eigvals)

print("\nSingular values (Sigma diagonal):")
print(singular_values)

print("\nRight singular vectors V (columns):")
print(V)

print("\nLeft singular vectors U (first 5 rows):")
print(U)

print("\nReconstruction error ||X_std - UΣV^T||_F:")
print(np.linalg.norm(X_std - X_recon, 'fro'))


Eigenvalues (of X^T X):
[52.79065361 36.87726278 30.33208361]

Singular values (Sigma diagonal):
[7.26571769 6.07266521 5.50745709]

Right singular vectors V (columns):
[[-0.53875915  0.69363291 -0.47813384]
 [-0.65608325  0.01057596  0.75461442]
 [-0.52848211 -0.72025103 -0.44938304]]

Left singular vectors U (first 5 rows):
[[ 0.23315735 -0.07487551 -0.21216549]
 [-0.22774954 -0.11386652 -0.03506495]
 [ 0.18571164  0.06222693 -0.11255263]
 [-0.3539981   0.20839785  0.44668307]
 [-0.21251381  0.03204649 -0.42653059]
 [ 0.03694946 -0.0354081   0.27271205]
 [-0.06730871  0.09167659 -0.07932651]
 [-0.01730197  0.13212051 -0.20752245]
 [ 0.05233832  0.0756576   0.0475694 ]
 [-0.14562714  0.05991208 -0.02121149]
 [-0.10319976 -0.2825613  -0.22248479]
 [-0.09557283 -0.02684426 -0.17223116]
 [ 0.05730366 -0.25230054  0.11692455]
 [ 0.15115137  0.16660259  0.0788221 ]
 [-0.05557284  0.04617302 -0.04668467]
 [-0.23639993 -0.01743138  0.2188634 ]
 [ 0.05573905  0.23103883  0.11568694]
 [ 0.0297