In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [12]:
class DiscriminentAnalysis():
    def __init__(self, alpha=0.0, beta=0.0):
        self.learned = False
        self.alpha = alpha
        self.beta = beta
        self.class_names = []
        self.class_priors = {}
        self.class_means = {}
        self.regularized_covariances = {}
        self.rda_covariances = {}

    def fit(self, X, y):
        self.class_names = np.unique(y)
        class_covariances = {}
        pooled_covariances = 0
        for i in self.class_names:
            class_indices = np.where(y == i)[0]
            class_samples = X[class_indices, :]
            self.class_priors[i] = float(len(class_indices)) / len(y)
            self.class_means[i] = np.mean(class_samples, axis=0)
            class_covariances[i] = np.cov(class_samples, rowvar=0)
            pooled_covariances += class_covariances[i] * self.class_priors[i]
        # Calculate RDA regularized covariance matricies for each class
        for i in self.class_names:
            self.regularized_covariances[i] = (self.beta * class_covariances[i]) + ((1 - self.beta) * pooled_covariances)

        for i in self.class_names:
            # self.rda_covariances[i] = (self.alpha * 1/self.class_priors[i] * np.trace(self.regularized_covariances[i]) * np.eye(self.regularized_covariances[i].shape[0])) + (self.beta * pooled_covariances) \
            # + ((1- self.alpha- self.beta) * class_covariances[i])
            self.rda_covariances[i] = ((1-self.alpha) * self.regularized_covariances[i]) + (self.alpha * (1/self.class_priors[i]) * np.trace(self.regularized_covariances[i]) * np.eye(self.regularized_covariances[i].shape[0]))
        
        self.learned = True
        return self

    def predict(self, x):
        if not self.learned:
            raise NameError('Fit model first')
        # Determine probability of each class given input vector
        class_prob = {}
        for i in self.class_names:
            # Divid the class delta calculation into 3 parts
            part1 = -0.5 * np.linalg.det(self.rda_covariances[i])
            part2 = -0.5 * np.dot(np.dot((x - self.class_means[i]).T, np.linalg.pinv(self.rda_covariances[i])), (x - self.class_means[i]))
            part3 = np.log(self.class_priors[i])
            class_prob[i] = part1 + part2 + part3
        return max(class_prob, key=class_prob.get)

In [3]:
data = pd.read_excel('../data.xlsx')

In [4]:
y = data['Class Label'].to_numpy()
x = data.loc[:,data.columns != 'Class Label'].to_numpy()

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, stratify=y)