<a href="https://colab.research.google.com/github/Allin-13/MachineLearning/blob/main/PCA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np

# -----------------------------------
# Step 1: Collect data from user
# 5 students, 4 subjects (or attributes)
# -----------------------------------
data = []

print("Enter marks for 5 students in 4 subjects (space-separated):")
for i in range(5):
    row = list(map(float, input(f"Student {i+1}: ").split()))
    if len(row) != 4:
        print("Error: Enter exactly 4 numbers")
        exit()
    data.append(row)

data = np.array(data)

print("\nSTEP 1: ORIGINAL DATASET")
print(data)

# -----------------------------------
# Step 2: Compute mean
# -----------------------------------
mean = np.mean(data, axis=0)
print("\nSTEP 2: MEAN OF EACH ATTRIBUTE")
print(mean)

# -----------------------------------
# Step 3: Mean-center the data
# -----------------------------------
centered_data = data - mean
print("\nSTEP 3: MEAN-CENTERED DATA")
print(centered_data)

# -----------------------------------
# Step 4: Covariance matrix
# -----------------------------------
cov_matrix = np.cov(centered_data.T)  # columns = variables
print("\nSTEP 4: COVARIANCE MATRIX")
print(cov_matrix)

# -----------------------------------
# Step 5: Eigenvalues and Eigenvectors
# -----------------------------------
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print("\nSTEP 5: EIGENVALUES")
print(eigenvalues)
print("\nSTEP 5: EIGENVECTORS")
print(eigenvectors)

# -----------------------------------
# Step 6: Explained Variance (%)
# -----------------------------------
total_variance = np.sum(eigenvalues)
explained_variance = (eigenvalues / total_variance) * 100
print("\nSTEP 6: EXPLAINED VARIANCE (%)")
print(explained_variance)

# -----------------------------------
# Step 7: Minimum components to retain >=90% variance
# -----------------------------------
sorted_index = np.argsort(eigenvalues)[::-1]  # descending
sorted_variance = explained_variance[sorted_index]
cumulative_variance = np.cumsum(sorted_variance)
num_components = np.where(cumulative_variance >= 90)[0][0] + 1

print("\nMINIMUM NUMBER OF COMPONENTS REQUIRED")
print(num_components)

# -----------------------------------
# Step 8: Project data onto top principal components
# -----------------------------------
selected_vectors = eigenvectors[:, sorted_index[:num_components]]
reduced_data = centered_data.dot(selected_vectors)

print("\nSTEP 8: REDUCED DATASET")
print(reduced_data)

print("\nTOTAL VARIANCE RETAINED (%)")
print(cumulative_variance[num_components - 1])

Enter marks for 5 students in 4 subjects (space-separated):
Student 1: 67 78 89 79
Student 2: 67 87 96 78
Student 3: 56 47 68 97
Student 4: 67 56 89 76
Student 5: 78 68 96 90

STEP 1: ORIGINAL DATASET
[[67. 78. 89. 79.]
 [67. 87. 96. 78.]
 [56. 47. 68. 97.]
 [67. 56. 89. 76.]
 [78. 68. 96. 90.]]

STEP 2: MEAN OF EACH ATTRIBUTE
[67.  67.2 87.6 84. ]

STEP 3: MEAN-CENTERED DATA
[[  0.   10.8   1.4  -5. ]
 [  0.   19.8   8.4  -6. ]
 [-11.  -20.2 -19.6  13. ]
 [  0.  -11.2   1.4  -8. ]
 [ 11.    0.8   8.4   6. ]]

STEP 4: COVARIANCE MATRIX
[[ 60.5   57.75  77.   -19.25]
 [ 57.75 260.7  142.1  -85.25]
 [ 77.   142.1  132.3  -68.25]
 [-19.25 -85.25 -68.25  82.5 ]]

STEP 5: EIGENVALUES
[414.8035327    1.40774305  69.9187896   49.86993465]

STEP 5: EIGENVECTORS
[[-0.25307091 -0.66625297 -0.66138465  0.23373584]
 [-0.7508508  -0.13495397  0.54575606  0.34664221]
 [-0.52266994  0.68074631 -0.50899752 -0.06574284]
 [ 0.31463364  0.27290818 -0.07511448  0.90602683]]

STEP 6: EXPLAINED VARIANCE (%)