In [24]:
import pandas as pd
import numpy as np
import scipy as sp
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [25]:
df = pd.read_csv('student_scores.csv')

# **Tasks**

## Part A: Vector & Matrix Fundamentals
1. Represent each student's subject scores as a vector.

2. Compute:
- Norm-1 and Norm-2 of vectors.
- Dot product and angle between two students' score vectors.
- Cross product (for 3D selected subjects).
3. Find the projection of one vector onto another.

In [26]:
# Representing the each students score as a vector.
student_scores = df.to_numpy()

In [27]:
# Finding the magnitude of the vectors or length.
# Norm 1 (Manhattan distance)
np.linalg.norm(student_scores, ord=1, axis=1)

# find the norm-2 (euclidean distance)
np.linalg.norm(student_scores, ord=2, axis=1)

array([175.60751692, 190.52821313, 171.16950663, 153.5187285 ,
       180.72354578, 194.43250757, 191.9635382 , 195.26136331,
       204.87313147, 138.5712813 , 190.63053271, 172.44129436,
       195.98724448, 176.65786142, 150.37951988, 181.56266136,
       161.92590898, 184.71599822, 181.08009278, 204.18863827,
       172.19756096, 144.82403116, 148.73802473, 185.03242959,
       175.50498568, 178.12355263, 179.42965195, 162.4961538 ,
       140.4635184 , 165.24224641, 185.22148903, 205.40447902,
       171.37677789, 166.23477374, 172.36879068, 194.45050784,
       170.32028652, 195.18708974, 188.07711185, 151.12246689,
       186.58510123, 187.669923  , 208.6791796 , 167.79451719,
       207.43432696, 161.61683081, 159.16657941, 179.04468716,
       186.94919096, 165.73774464, 155.89419489, 175.62175264,
       190.        , 155.0709515 , 182.75393293, 172.69047455,
       166.30694513, 205.22426757, 176.05680901, 183.02185662,
       166.12645786, 185.1404872 , 179.68861956, 179.83

In [28]:
# Dot Product between 2 students.
stu_1 = student_scores[0]
stu_2 = student_scores[1]
dot_product = np.dot(stu_1,stu_2)
print(f"The dot product between 2 students are: {dot_product}")
print("------------------------------------------------------")

# Angle between 2 vectors.
stu_1_dist = np.linalg.norm(stu_1,ord=2)
stu_2_dist = np.linalg.norm(stu_2,ord=2)
multiplied_distance = stu_1_dist * stu_2_dist

# find the angle
ang_radians = np.arccos(dot_product / multiplied_distance)

# convert radians to degree
ang_degree = np.degrees(ang_radians)
print(f"Angle between two students score is {ang_degree:.2f}")

The dot product between 2 students are: 31472
------------------------------------------------------
Angle between two students score is 19.84


In [29]:
# Cross Product.
# pick any two students
stu_1 = student_scores[0,:3]
stu_2 = student_scores[1,:3]

# perform the cross product
cross_product = np.cross(stu_1, stu_2)

print(f"The Cross Product of two student's three subject is {cross_product}")

The Cross Product of two student's three subject is [ 3018   716 -4420]


In [30]:
# Finding the Projection of one vector to another.
student_3 = np.array(student_scores[2])
student_4 = np.array(student_scores[3])

# Finding the dot product.
dot_pro = np.dot(student_3,student_4)

# Finding the euclidean distance of any 1 student.
distance = np.linalg.norm(student_3,ord=2)
sqr_distance = distance**2

# Finding the projection of 1 student on another student.
projection = (dot_pro/sqr_distance) * student_3
print(f"Projection of the vector student 4 on 3 is: {projection} ")

Projection of the vector student 4 on 3 is: [43.70285675 43.70285675 55.06559951 80.41325643 65.55428513 69.05051367] 


## **Part B: Matrix Operations**
4. Form a matrix of students x subjects. 

Perform:
- Matrix addition and multiplication.
- Transpose and Inverse (if possible).
- Determinant.

In [31]:
# Creating Matrix.
matrix_1 = np.vstack([student_scores[0]])
matrix_2 = np.vstack([student_scores[1]])

# matrix Addition
added_matrix = matrix_1 + matrix_2

# matrix multiplication
mul_matrix = matrix_1*matrix_2

print(f"Matrix addition is {added_matrix}")
print(f"Matrix multiplication is {mul_matrix}")

Matrix addition is [[178 151 146 151 140 109]]
Matrix multiplication is [[7800 5460 5304 5238 4756 2914]]


In [32]:
# Transpose and Inverse of matrix
transpose_matrix_1 = matrix_1.T
transpose_matrix_2 = matrix_2.T

print(f"Shape of Matrix 1 is {matrix_1.shape}")
print(f"Shape of Matrix 1 transpose is {transpose_matrix_1.shape}") 

"""Matrix Inverse and determinants of matrix can only be perfromed on Square Matrix"""

Shape of Matrix 1 is (1, 6)
Shape of Matrix 1 transpose is (6, 1)


'Matrix Inverse and determinants of matrix can only be perfromed on Square Matrix'

## **Part C: Linear Transformations & Geometry**
5. Explain line, plane, and hyperplane with respect to your dataset dimensions.
6. Show how dimensionality increases from 2D → 3D → higher dimensions with
hyperplanes.

In [33]:
# Explanation with respect to dataset dimensions
print("A line is a 1D subspace (like scores changing along one subject).")
print("A plane is a 2D subspace (like variation across two subjects).")
print("A hyperplane is (d-1) dimensional.")

print("")
# Show how dimensionality increases
print("Dimensionality examples:")
print("2D: points form a plane, a line can split it.")
print("3D: points in space, a plane (2D) can split it.")
print("4D and higher: hyperplanes (3D in 4D space, etc.) generalize this idea.")

A line is a 1D subspace (like scores changing along one subject).
A plane is a 2D subspace (like variation across two subjects).
A hyperplane is (d-1) dimensional.

Dimensionality examples:
2D: points form a plane, a line can split it.
3D: points in space, a plane (2D) can split it.
4D and higher: hyperplanes (3D in 4D space, etc.) generalize this idea.


## **Part D: Eigenvalues & Decomposition**

7. Compute the eigenvalues and eigenvectors of the covariance matrix.
8. Perform LU Decomposition of the dataset matrix.
9. Perform Singular Value Decomposition (SVD) and explain its role in dimensionality
reduction.

In [34]:
# Computing the eigenvalues and eigenvectors
cov = np.cov(student_scores, rowvar=False)

# find the eigen_values and eigen_vectors
eigen_values, eigen_vectors = np.linalg.eig(cov)

print("Covariance matrix are given below")
for i in cov:
    print(i)
    
print("\nEigen Values are given below")
for i in eigen_values:
    print(i)

print("\nEigen Vectors are given below")
for i in eigen_vectors:
    print(i)

Covariance matrix are given below
[ 3.48409548e+02 -3.40703518e-01 -1.73409548e+01 -6.66557789e+00
 -6.16256281e+00  5.29497487e+00]
[ -0.34070352 293.72221106 -29.59879397 -10.89708543  -2.87718593
 -46.7441206 ]
[-17.34095477 -29.59879397 287.74047739  11.50972362   0.57439698
  33.82407035]
[ -6.66557789 -10.89708543  11.50972362 357.43816583   1.09841709
  20.03532663]
[ -6.16256281  -2.87718593   0.57439698   1.09841709 311.53645729
   2.04065327]
[  5.29497487 -46.7441206   33.82407035  20.03532663   2.04065327
 290.9478392 ]

Eigen Values are given below
244.0115795630619
259.4711735655514
388.4200630377345
350.69490775598473
310.6694448984453
336.52752967168476

Eigen Vectors are given below
[-0.07784315 -0.1741462   0.21984069  0.94620619 -0.12631825 -0.0634126 ]
[ 0.58440726 -0.5208137   0.42241546 -0.17536548 -0.00880064 -0.42184823]
[-0.21818479 -0.82202943 -0.38939287 -0.04403609  0.08993836  0.33912846]
[-0.06317724 -0.00202894 -0.63894462  0.08855092 -0.02488286 -0.76111

In [35]:
from scipy import linalg
# Performing Lu Decomposition on the dataset.
square_mat = student_scores

square_mat = square_mat = square_mat.T.dot(square_mat)

P, L, U = linalg.lu(square_mat)
print(f"The Permutation Matric Is :\n{P}")

print(f"\nThe Lower Triangular Matric Is :\n{L}")

print(f"\nThe Upper Triangular Matric Is :\n{np.round(U,2)}")

The Permutation Matric Is :
[[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]]

The Lower Triangular Matric Is :
[[1.         0.         0.         0.         0.         0.        ]
 [0.92761687 1.         0.         0.         0.         0.        ]
 [0.94360314 0.51081462 1.         0.         0.         0.        ]
 [0.95940202 0.53288308 0.41696864 1.         0.         0.        ]
 [0.94370766 0.5362781  0.37989214 0.21509409 1.         0.        ]
 [0.92137566 0.43391362 0.47204126 0.24503526 0.18410105 1.        ]]

The Upper Triangular Matric Is :
[[1061974.    985105.   1002082.   1018860.   1002193.    978477.  ]
 [      0.    122411.98   62529.83   65231.27   65646.86   53116.23]
 [      0.         0.     98344.13   41006.42   37360.16   46422.49]
 [      0.         0.         0.     90271.63   19416.89   22119.73]
 [      0.         0.         0.         0.     76959.52   14168.33]
 [      0.       

In [36]:
# Perform Singular Value Decomposition (SVD) and explain its role in dimensionality reduction.
U, S, V_t = np.linalg.svd(student_scores, full_matrices=False)

print(f"U matrix (left singular vectors) shape: {U.shape}")
print(f"S (singular values): {S}")
print(f"V_t matrix (right singular vectors) shape: {V_t.shape}")

"SVD breaks data into parts. By keeping only the most important parts (components), we reduce the number of features while keeping most of the information. This makes the data simpler and easier to work with."

U matrix (left singular vectors) shape: (200, 6)
S (singular values): [2464.55722096  274.11179373  260.94654415  256.33412243  240.64902884
  222.3132763 ]
V_t matrix (right singular vectors) shape: (6, 6)


'SVD breaks data into parts. By keeping only the most important parts (components), we reduce the number of features while keeping most of the information. This makes the data simpler and easier to work with.'

## **Part E: Dimensionality Reduction**
10. Apply Principal Component Analysis (PCA) to reduce the dataset from multiple
subjects to 2 dimensions.
11. Apply Linear Discriminant Analysis (LDA) to classify students into "Above Average"
and "Below Average" categories.

In [37]:
# Principal Component Analysis.
scaler = StandardScaler()
standard_data = scaler.fit_transform(student_scores)
pca = PCA(n_components = 2)
standard_pca = pca.fit_transform(standard_data)
print("PCA - 2D projection of the data:")
print(standard_pca[:, :])

PCA - 2D projection of the data:
[[-1.83761461e+00 -2.53916840e-01]
 [ 4.53300275e-01  1.35975274e+00]
 [ 1.11984377e+00 -7.01860617e-01]
 [ 6.19072521e-01  5.80637162e-01]
 [-5.62775831e-01 -3.85487608e-02]
 [ 1.76185962e+00 -4.59479850e-01]
 [-4.11346408e-01 -8.74201657e-01]
 [ 5.59293475e-01  1.26885759e+00]
 [ 2.32109859e-01  8.97097859e-01]
 [-1.13602463e+00 -7.33562006e-01]
 [-1.90712497e+00 -1.38571835e+00]
 [-1.23067994e+00  3.14412142e-01]
 [ 1.91321657e+00  1.95181030e+00]
 [ 4.96894954e-01  2.15916670e-01]
 [-3.12332322e-01 -1.30555270e+00]
 [-4.17097335e-01  1.02335792e+00]
 [-1.14564926e+00 -1.32928406e+00]
 [ 1.16072392e+00 -1.40443407e+00]
 [ 1.51529426e+00 -3.95036194e-01]
 [ 1.45863090e+00 -6.63237469e-01]
 [-1.74266890e+00 -6.81692385e-03]
 [ 3.70528521e-01  1.24918032e+00]
 [-8.69978780e-01 -7.64047617e-01]
 [ 5.70785079e-01  6.99862785e-01]
 [-1.54762464e+00  2.94167156e-01]
 [ 1.66385575e+00 -9.04312553e-01]
 [ 7.90866859e-01 -5.22544201e-01]
 [ 2.07882760e-02 -1.3

In [38]:
average_score = np.mean(student_scores, axis=1)
threshold = np.median(average_score)  # Set threshold as the median score

# Classify as Above Average (1) or Below Average (0)
y = np.where(average_score > threshold, 1, 0)

# Apply LDA
lda = LinearDiscriminantAnalysis(n_components=1)
X_lda = lda.fit_transform(standard_pca, y)

# Display the LDA transformed data 
print("LDA - Student classification into Above Average/Below Average (first 5 students):")
print(X_lda[:, :])

# Display the class labels
print("\nClass labels (Above Average: 1, Below Average: 0):")
print(y[:])

LDA - Student classification into Above Average/Below Average (first 5 students):
[[-1.71434677]
 [ 0.47761075]
 [ 1.00858924]
 [ 0.59843018]
 [-0.52337041]
 [ 1.61402085]
 [-0.41823305]
 [ 0.57204171]
 [ 0.25303037]
 [-1.08414272]
 [-1.82653347]
 [-1.12768868]
 [ 1.85606115]
 [ 0.469775  ]
 [-0.34463427]
 [-0.34351739]
 [-1.11819576]
 [ 1.01685126]
 [ 1.3881509 ]
 [ 1.32430494]
 [-1.61589973]
 [ 0.3962094 ]
 [-0.8387801 ]
 [ 0.5586928 ]
 [-1.4223792 ]
 [ 1.5043974 ]
 [ 0.71116215]
 [ 0.0134454 ]
 [-0.91573083]
 [ 0.0273833 ]
 [ 1.44519469]
 [ 0.5379983 ]
 [ 0.57059183]
 [-0.3656671 ]
 [-1.38560314]
 [ 0.35153718]
 [-0.89059759]
 [-0.93431401]
 [-1.1124857 ]
 [ 0.51650179]
 [ 0.57075837]
 [ 1.55159442]
 [ 1.37167729]
 [ 0.87555576]
 [ 0.50739533]
 [ 1.31123149]
 [-0.40639019]
 [ 0.56253724]
 [-1.36444742]
 [-1.09892381]
 [ 0.13439105]
 [-2.77333611]
 [-0.2736028 ]
 [ 0.75064469]
 [ 0.48940284]
 [ 1.5040823 ]
 [-0.69688915]
 [ 1.15433588]
 [-2.4475543 ]
 [-0.47090867]
 [-1.24002075]
 [ 