In [None]:
# Importing necessary libraries for numerical operations, data handling, and visualization
import numpy as np  # For numerical operations
import pandas as pd  # For handling data in DataFrame format
import matplotlib.pyplot as plt  # For visualization (if needed)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA  # For library-based LDA


In [None]:
    # Sample data: 2x2 dataset with two classes
    # Let's create a toy dataset with two features (feature 1 and feature 2) and two classes (0 and 1)
    X = np.array([[1, 2],  # First sample (feature 1 = 1, feature 2 = 2)
                [2, 3],  # Second sample (feature 1 = 2, feature 2 = 3)
                [3, 4],  # Third sample (feature 1 = 3, feature 2 = 4)
                [6, 5],  # Fourth sample (feature 1 = 6, feature 2 = 5)
                [7, 8],  # Fifth sample (feature 1 = 7, feature 2 = 8)
                [8, 9]])  # Sixth sample (feature 1 = 8, feature 2 = 9)

    # Class labels for each sample
    y = np.array([0, 0, 0, 1, 1, 1])  # Class 0 for the first three samples, class 1 for the last three


In [3]:
# Create the LDA model using scikit-learn
lda = LDA()

# Fit the model with data X and target labels y
lda.fit(X, y)

# Print the LDA coefficients and intercept
print("LDA Coefficients:", lda.coef_)  # Coefficients learned by LDA
print("LDA Intercept:", lda.intercept_)  # Intercept learned by LDA

# Predict on the same dataset
y_pred = lda.predict(X)
print("Predictions using LDA (library):", y_pred)


LDA Coefficients: [[16.4 -7.6]]
LDA Intercept: [-34.53333333]
Predictions using LDA (library): [0 0 0 1 1 1]


In [4]:
# Separate the samples based on their class labels
X_class_0 = X[y == 0]
X_class_1 = X[y == 1]

# Calculate the mean of each class
mean_class_0 = np.mean(X_class_0, axis=0)
mean_class_1 = np.mean(X_class_1, axis=0)

# Calculate the overall mean of the entire dataset
mean_overall = np.mean(X, axis=0)

print("Mean of Class 0:", mean_class_0)
print("Mean of Class 1:", mean_class_1)
print("Overall Mean:", mean_overall)


Mean of Class 0: [2. 3.]
Mean of Class 1: [7.         7.33333333]
Overall Mean: [4.5        5.16666667]


In [5]:
# Calculate the within-class scatter matrix (S_W)
S_W = np.zeros((X.shape[1], X.shape[1]))

# Within-class scatter matrix for class 0
for x in X_class_0:
    x = x.reshape(-1, 1)  # Reshape to column vector
    mean_class_0 = mean_class_0.reshape(-1, 1)  # Reshape to column vector
    S_W += (x - mean_class_0) @ (x - mean_class_0).T

# Within-class scatter matrix for class 1
for x in X_class_1:
    x = x.reshape(-1, 1)  # Reshape to column vector
    mean_class_1 = mean_class_1.reshape(-1, 1)  # Reshape to column vector
    S_W += (x - mean_class_1) @ (x - mean_class_1).T

print("Within-Class Scatter Matrix (S_W):\n", S_W)

# Calculate the between-class scatter matrix (S_B)
mean_class_0 = mean_class_0.reshape(-1, 1)
mean_class_1 = mean_class_1.reshape(-1, 1)
mean_overall = mean_overall.reshape(-1, 1)

S_B = (mean_class_0 - mean_overall) @ (mean_class_0 - mean_overall).T + \
      (mean_class_1 - mean_overall) @ (mean_class_1 - mean_overall).T

print("Between-Class Scatter Matrix (S_B):\n", S_B)


Within-Class Scatter Matrix (S_W):
 [[ 4.          6.        ]
 [ 6.         10.66666667]]
Between-Class Scatter Matrix (S_B):
 [[12.5        10.83333333]
 [10.83333333  9.38888889]]


In [6]:
# Compute the inverse of the within-class scatter matrix
S_W_inv = np.linalg.inv(S_W)

# Compute the eigenvalues and eigenvectors of the matrix S_W_inv * S_B
eigenvalues, eigenvectors = np.linalg.eig(S_W_inv @ S_B)

print("Eigenvalues:\n", eigenvalues)
print("Eigenvectors:\n", eigenvectors)

# Select the eigenvector corresponding to the largest eigenvalue (LDA direction)
lda_direction = eigenvectors[:, np.argmax(eigenvalues)]

print("LDA Direction (Eigenvector corresponding to largest eigenvalue):\n", lda_direction)


Eigenvalues:
 [ 6.13333333e+00 -3.55271368e-15]
Eigenvectors:
 [[ 0.90731061 -0.65493054]
 [-0.42046101  0.75568908]]
LDA Direction (Eigenvector corresponding to largest eigenvalue):
 [ 0.90731061 -0.42046101]


In [7]:
# Project the data points onto the LDA direction
X_lda = X @ lda_direction

# Print the transformed dataset
print("Transformed Dataset (LDA projection):\n", X_lda)
                                                                                                                                                                        

Transformed Dataset (LDA projection):
 [0.06638858 0.55323817 1.04008777 3.34155858 2.98748614 3.47433574]


In [8]:
# Compare the results of manual LDA vs. library-based LDA
# For simplicity, we classify the projected points based on their sign.
# Sign of the transformed dataset will correspond to the predicted class.

manual_predictions = np.sign(X_lda)  # Using sign function to get class labels

print("Predictions using Manual LDA:", manual_predictions)
print("Predictions using LDA (library):", y_pred)


Predictions using Manual LDA: [1. 1. 1. 1. 1. 1.]
Predictions using LDA (library): [0 0 0 1 1 1]
