In [6]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from scipy.linalg import lu

In [5]:
! pip install  scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.8.0-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting joblib>=1.3.0 (from scikit-learn)
  Downloading joblib-1.5.3-py3-none-any.whl.metadata (5.5 kB)
Collecting threadpoolctl>=3.2.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.8.0-cp313-cp313-win_amd64.whl (8.0 MB)
   ---------------------------------------- 0.0/8.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.0 MB ? eta -:--:--
   - -------------------------------------- 0.3/8.0 MB ? eta -:--:--
   --- ------------------------------------ 0.8/8.0 MB 1.4 MB/s eta 0:00:05
   ----- ---------------------------------- 1.0/8.0 MB 1.5 MB/s eta 0:00:05
   ------- -------------------------------- 1.6/8.0 MB 1.7 MB/s eta 0:00:04
   ---------- ----------------------------- 2.1/8.0 MB 1.9 MB/s eta 0:00:04
   ------------- -------------------------- 2.6/8.0 MB 2.0 MB/s eta 0:00:03
   -----------------

In [None]:
df = pd.read_csv(r"C:\Users\parth\Downloads\student_performance_500.csv")

subjects = df.columns[1:]
X = df[subjects].values

print("Dataset Shape:", X.shape)

Dataset Shape: (500, 5)


In [15]:
v1 = X[0]
v2 = X[1]

norm1_L1 = np.linalg.norm(v1, 1)
norm1_L2 = np.linalg.norm(v1, 2)

print("\nL1 Norm:", norm1_L1)
print("L2 Norm:", norm1_L2)



L1 Norm: 424.0
L2 Norm: 190.45734430575263


In [16]:

dot_product = np.dot(v1, v2)
angle = np.degrees(
    np.arccos(dot_product / (np.linalg.norm(v1) * np.linalg.norm(v2)))
)

print("\nDot Product:", dot_product)
print("Angle (degrees):", angle)



Dot Product: 30395
Angle (degrees): 14.454035622037377


In [17]:

cross_product = np.cross(v1[:3], v2[:3])
print("\nCross Product (3 subjects):", cross_product)



Cross Product (3 subjects): [ 2378 -1252 -1254]


In [18]:

projection = (dot_product / np.dot(v2, v2)) * v2
print("\nProjection (first 5 values):", projection[:5])


Projection (first 5 values): [ 87.28728692  60.42966017 105.19237141  57.07245683  91.76355804]


In [None]:
X_T = X.T
cov_matrix = np.cov(X, rowvar=False)
print("\nCovariance Matrix Shape:", cov_matrix.shape)


Covariance Matrix Shape: (5, 5)


## Line, Plane, and Hyperplane in the Context of the Student Performance Dataset

In this dataset, each student is represented by a vector of subject scores.  
If a student has scores in **n subjects**, then that student corresponds to a point in **n-dimensional space**.

---

## 1. Line (1D)

A **line** exists in **one-dimensional space**.

### In our dataset:
- If we consider **only one subject** (e.g., Mathematics),
- Each student has a single value (Math score).

\[
\mathbf{x} = [x]
\]

All students lie along a **single line (number line)**.

**Interpretation**:  
Performance is compared using only **one criterion**.

---

## 2. Plane (2D)

A **plane** exists in **two-dimensional space**.

### In our dataset:
- If we select **two subjects** (e.g., Mathematics and Physics),
- Each student is represented as:

\[
\mathbf{x} = [x_1, x_2]
\]

Each student becomes a **point on a 2D plane**.

**Interpretation**:  
We can visually compare students using a **scatter plot**, observing correlations between two subjects.

---

## 3. 3D Space (3D)

A **three-dimensional space** is defined by **three independent axes**.

### In our dataset:
- If we select **three subjects** (e.g., Math, Physics, Chemistry),
- Each student is represented as:

\[
\mathbf{x} = [x_1, x_2, x_3]
\]

Students now lie in a **3D coordinate space**.

**Interpretation**:  
Patterns in student performance become harder to visualize but capture **more information**.

---

## 4. Hyperplane (Higher Dimensions)

A **hyperplane** is a generalization of a line and plane to **n dimensions**.

### In our dataset:
- With **n subjects**, each student is a point in **n-dimensional space**:

\[
\mathbf{x} = [x_1, x_2, \dots, x_n]
\]

- All students collectively form a cloud of points in an **n-dimensional hyperplane**.

**Interpretation**:  
Human visualization is no longer possible, but mathematical tools such as **PCA, LDA, and SVD** help analyze patterns.

---

## 5. Dimensionality Growth: 2D → 3D → nD

| Number of Subjects | Dimension | Geometric Form |
|-------------------|-----------|----------------|
| 1 | 1D | Line |
| 2 | 2D | Plane |
| 3 | 3D | 3D Space |
| n > 3 | nD | Hyperplane |

As the number of subjects increases:
- **Information captured increases**
- **Visualization becomes difficult**
- **Dimensionality reduction becomes essential**

---

## 6. Why Hyperplanes Matter in This Dataset

- Each student’s performance is a point in a high-dimensional hyperplane
- **Decision boundaries in LDA** are hyperplanes separating “Above Average” and “Below Average” students
- **PCA projects data from a high-dimensional hyperplane to a lower-dimensional plane (2D)**

---

## Conclusion

- A **line** models performance in one subject
- A **plane** models performance in two subjects
- A **hyperplane** models real-world student performance across many subjects
- Increasing dimensionality provides richer information but requires linear algebra techniques to analyze effectively



In [11]:
print("\nEach student is a point in", X.shape[1], "dimensional space (hyperplane).")


Each student is a point in 5 dimensional space (hyperplane).


In [19]:
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print("\nTop 5 Eigenvalues:")
print(eigenvalues[:5])


Top 5 Eigenvalues:
[249.55012843 228.13838326 212.90533937 194.81564332 198.83585632]


In [20]:
P, L, U = lu(cov_matrix)
print("\nLU Decomposition completed.")
print("L Shape:", L.shape)
print("U Shape:", U.shape)


LU Decomposition completed.
L Shape: (5, 5)
U Shape: (5, 5)


In [21]:
U_svd, S_svd, Vt_svd = np.linalg.svd(X, full_matrices=False)
print("\nTop 3 Singular Values:")
print(S_svd[:3])



Top 3 Singular Values:
[3781.91420073  342.2177717   326.33421191]


In [22]:
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
print("\nPCA Explained Variance Ratio:")
print(pca.explained_variance_ratio_)


PCA Explained Variance Ratio:
[0.2301602  0.21041214]


In [23]:
average_scores = X.mean(axis=1)
labels = np.where(average_scores >= average_scores.mean(), 1, 0)
lda = LDA(n_components=1)
X_lda = lda.fit_transform(X, labels)
print("\nLDA completed. Reduced shape:", X_lda.shape)


LDA completed. Reduced shape: (500, 1)
