# Import key libraries

In [2]:
from sklearn.datasets import load_iris
import numpy as np
import matplotlib.pyplot as plt

# Get the data

In [3]:
iris_data = load_iris()

In [4]:
iris_data

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [8]:
iris_data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [12]:
X = iris_data['data']
y = iris_data['target']

In [14]:
iris_data['target_names']

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [22]:

class LDA:
    def __init__(self, X=None, y=None):
        self.X = X
        self.y = y
    
    def mean(self):
        classes = np.unique(self.y)
        means = {}
        for c in classes:
            means[c] = np.mean(self.X[self.y == c], axis=0)
        return means
    
    def prior(self):
        classes, counts = np.unique(self.y, return_counts=True)
        priors = {}
        for c, count in zip(classes, counts):
            priors[c] = count / len(self.y)
        return priors

    def cov_mat(self):
        classes = np.unique(self.y)
        n, d = self.X.shape
        pooled_cov = np.zeros((d, d))
        for c in classes:
            X_c = self.X[self.y == c]
            cov_c = np.cov(X_c, rowvar=False, bias=True)
            pooled_cov += (len(X_c) - 1) * cov_c
        pooled_cov /= (n - len(classes))
        return pooled_cov

    def fit(self, X, y):
        self.X, self.y = X, y
        self.class_priors = self.prior()
        self.covariance = self.cov_mat()
        self.means = self.mean()

    def discriminant_function(self, x, c):
        inv_cov = np.linalg.inv(self.covariance)
        mean_vec = self.means[c]
        term1 = x @ inv_cov @ mean_vec
        term2 = -0.5 * mean_vec @ inv_cov @ mean_vec
        term3 = np.log(self.class_priors[c])
        return term1 + term2 + term3

    def predict(self, X):
        predictions = []
        for x in X:
            scores = {c: self.discriminant_function(x, c) for c in self.means}
            predictions.append(max(scores, key=scores.get))
        return np.array(predictions)


In [23]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

lda = LDA()
lda.fit(X_train, y_train)

y_pred = lda.predict(X_test)

accuracy = np.mean(y_pred == y_test)
print("Predictions:", y_pred[:10])
print("Accuracy:", accuracy)


Predictions: [2 1 1 1 2 2 1 1 0 2]
Accuracy: 0.9777777777777777
