In [1]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_iris
data = load_iris()
X=data.data
y=data.target

from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=1)

In [2]:
def class_mean_vectors(X,y):
  class_mean=[]
  classes=np.unique(y)
  for c in classes:
    class_mean.append(np.mean(X[y==c],axis=0))
  return np.array(class_mean)

mean_vectors = class_mean_vectors(x_train,y_train)
print(mean_vectors)

[[4.96153846 3.36666667 1.46666667 0.23333333]
 [5.94594595 2.73243243 4.22972973 1.30540541]
 [6.525      2.95227273 5.53409091 2.02045455]]


In [3]:
def within_class_scatter_matrix(X,y):
  n_features=X.shape[1]
  class_scatter_matrix=np.zeros((n_features,n_features))
  classes=np.unique(y)
  for c in classes:
    class_scatter_matrix += np.cov(X[y == c].T)
  return class_scatter_matrix

Sw = within_class_scatter_matrix(x_train,y_train)
print(Sw)

[[0.78828342 0.24938374 0.52690563 0.12042372]
 [0.24938374 0.33751488 0.15220968 0.09895524]
 [0.52690563 0.15220968 0.55554041 0.13915734]
 [0.12042372 0.09895524 0.13915734 0.13248827]]


In [4]:
def between_class_scatter_matrix(X,y,mean_vectors):
  overall_mean = np.mean(X, axis=0)
  n_features = X.shape[1]
  Sb = np.zeros((n_features,n_features))
  for i, mean_vec in enumerate(mean_vectors):
    n = X[y == i+1,:].shape[0]
    mean_vec = mean_vec.reshape(-1,1)
    overall_mean = overall_mean.reshape(-1,1)
    Sb += n * (mean_vec - overall_mean).dot((mean_vec - overall_mean).T)
  return Sb

Sb = between_class_scatter_matrix(x_train, y_train, mean_vectors)
print(Sb)

[[ 28.95400302 -12.63106375  78.0084394   32.39016015]
 [-12.63106375   8.08550818 -35.42484717 -13.76336643]
 [ 78.0084394  -35.42484717 210.92642514  87.06770246]
 [ 32.39016015 -13.76336643  87.06770246  36.28632564]]


In [5]:
eigenvalues,eigenvectors = np.linalg.eig(np.linalg.inv(Sw).dot(Sb))

In [6]:
eigen_pairs = [(np.abs(eigenvalues[i]),eigenvectors[:,i]) for i in range(len(eigenvalues))]
eigen_pairs = sorted(eigen_pairs, key=lambda k: k[0], reverse=True)

print(eigen_pairs)

[(770.563768550306, array([-0.3152988 , -0.35428878,  0.67267328,  0.56795843])), (7.815731727335231, array([-0.14169308,  0.67961822, -0.1239499 ,  0.70899828])), (5.962107939380014e-14, array([-0.28080368, -0.28629583, -0.30133019,  0.86509197])), (3.502412594530579e-14, array([ 0.87283888, -0.00281439, -0.12900706, -0.47063951]))]


In [7]:
W = np.hstack((eigen_pairs[0][1][:,np.newaxis].real, eigen_pairs[1][1][:,np.newaxis].real))

In [8]:
x_train_lda = x_train.dot(W)
x_test_lda = x_test.dot(W)

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr = LogisticRegression()
lr.fit(x_train_lda,y_train)
y_pred=lr.predict(x_test_lda)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:",accuracy)

Accuracy: 1.0


In [10]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit LDA model
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)

# Make predictions on test data
y_pred = lda.predict(X_test)

# Evaluate model performance (accuracy)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 1.0
