# Homework

In [None]:
# Please implement LDA class below

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split

In [19]:
class LDA:
    def __init__(self, n_components):
        self.n_components = n_components
        self.linear_discriminats = None
    
    def fit(self, X, y):
        number_of_features = X.shape[1]
        labels = np.unique(y)
        
        mean_overall = np.mean(X, axis=0)
        
        s_w = np.zeros((number_of_features, number_of_features))
        s_b = np.zeros((number_of_features, number_of_features))
        
        for i in labels:
            X_c = X[i == y]
            mean_c = np.mean(X_c, axis=0)
            s_w += (X_c - mean_c).T.dot(X_c - mean_c)
            
            n_c = X_c.shape[0]
            mean_difference = (mean_c - mean_overall).reshape(number_of_features, 1)
            
            s_b += n_c * (mean_difference).dot(mean_difference.T)
            
        A = np.linalg.inv(s_w).dot(s_b)
        
        eigenvalues, eigenvectors = np.linalg.eig(A)
        eigenvectors = eigenvectors.T
        indices = np.argsort(abs(eigenvalues))[::1]
        eigenvalues = eigenvalues[indices]
        eigenvectors = eigenvectors[indices]
        self.linear_discriminats = eigenvectors[0:self.n_components]

        
    def transform(self, X):
        return np.dot(X,self.linear_discriminats.T)
        
    def score(self, X, y):
        X_transformed = self.transform(X)
        predictions = np.argmax(X_transformed, axis=1)
        accuracy = np.mean(predictions == y)
        return accuracy
            
        
        
        
        
        

# Testing

In [24]:
class0_len = 1000
X0 = np.random.multivariate_normal([1,2,-1], np.array([[1, 0.9, 0.9], [0.9, 1, 0.9], [0.9, 0.9, 1]]), size=class0_len)
y0 = [0]*class0_len

class1_len = 3000
X1 = np.random.multivariate_normal([1,1,0], np.array([[1, 0.9, 0.9], [0.9, 1, 0.9], [0.9, 0.9, 1]]), size=class1_len)
y1 = [1]*class1_len

X = np.concatenate((X0, X1))
y = y0+y1

X_train, X_test, y_train, y_test = train_test_split(X, np.array(y))

In [25]:
lda = LDA(1)
lda.fit(X_train, y_train)
accuracy = lda.score(X_test, y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 24.90%
