In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, f1_score

In [8]:
def get_clf_eval(y_test, pred):
    accuracy = accuracy_score(y_test, pred)
    recall = recall_score(y_test, pred)
    precision = precision_score(y_test, pred)
    f1 = f1_score(y_test, pred)
    print('Accuracy:{0:.4f}, Recall:{1:.4f}, Precision:{2:.4f}, F1-Score:{3:.4f}'.format(accuracy, recall, precision, f1))

In [6]:
def make_confusion(y_test, y_pred):
    confusion = confusion_matrix(y_test,y_pred)
    confusion_df = pd.DataFrame(confusion,columns=['Predicted_Negative','Predicted_Positive'],index=['Actual_Negative','Actual_Positive'])
    
    return confusion_df

In [30]:
class DiscriminentAnalysis():
    def __init__(self, alpha=0.0, beta=0.0):
        self.learned = False
        self.alpha = alpha
        self.beta = beta
        self.class_names = []
        self.class_priors = {}
        self.class_means = {}
        self.regularized_covariances = {}
        self.rda_covariances = {}
        self.reset()


    def reset(self):
        self.learned = False
        self.class_names = []
        self.class_priors = {}
        self.class_means = {}
        self.regularized_covariances = {}
        self.rda_covariances = {}

    def fit(self, X, y):
        self.class_names = np.unique(y)
        class_covariances = {}
        pooled_covariances = 0
        for i in self.class_names:
            class_indices = np.where(y == i)[0]
            class_samples = X[class_indices, :]
            self.class_priors[i] = float(len(class_indices)) / len(y)
            self.class_means[i] = np.mean(class_samples, axis=0)
            class_covariances[i] = np.cov(class_samples, rowvar=0)
            pooled_covariances += class_covariances[i] * self.class_priors[i]
        # Calculate RDA regularized covariance matricies for each class
        for i in self.class_names:
            self.regularized_covariances[i] = (self.beta * class_covariances[i]) + ((1 - self.beta) * pooled_covariances)

        for i in self.class_names:
            # self.rda_covariances[i] = (self.alpha * 1/self.class_priors[i] * np.trace(self.regularized_covariances[i]) * np.eye(self.regularized_covariances[i].shape[0])) + (self.beta * pooled_covariances) \
            # + ((1- self.alpha- self.beta) * class_covariances[i])
            self.rda_covariances[i] = ((1-self.alpha) * self.regularized_covariances[i]) + (self.alpha * (1/self.class_priors[i]) * np.trace(self.regularized_covariances[i]) * np.eye(self.regularized_covariances[i].shape[0]))
        
        self.learned = True
        return self

    def predict(self, x):
        if not self.learned:
            raise NameError('Fit model first')
        # Determine probability of each class given input vector
        
        class_prob = {}
        for i in self.class_names:
            # Divid the class delta calculation into 3 parts
            part1 = -0.5 * np.linalg.det(self.rda_covariances[i])
            part2 = -0.5 * np.dot(np.dot((x - self.class_means[i]).T, np.linalg.pinv(self.rda_covariances[i])), (x - self.class_means[i]))
            part3 = np.log(self.class_priors[i])
            class_prob[i] = part1 + part2 + part3
        return max(class_prob, key=class_prob.get)

In [None]:
class GridSearchRDA():
    def __init__(self, model, param_grid):
        self.model = model
        self.param_grid = param_grid
        self.alpha = 0
        self.beta = 0
        self.best_covariance = {}
        self.best_score = 0


    def fit(self, X, y, cv=1):
        
        metric_score = []

        alpha_list = self.param_grid['alpha']
        beta_list = self.param_grid['beta']

        cv_x = np.split(X, cv)
        cv_y = np.split(y, cv)
        

        for alpha in alpha_list:
            for beta in beta_list:
                score_means = []
                for i in range(cv):
                    self.model.reset()
                    self.model.alpha = alpha
                    self.model.beta = beta

                    test_x_cv = cv_x[i]
                    train_x_cv = cv_x[:i] + cv_x[i+1:]

                    test_y_cv = cv_y[i]
                    train_y_cv = cv_y[:i] + cv_y[i+1:]

                    self.model.fit(train_x_cv, train_y_cv)

                    pred = []

                    for data in test_x_cv:
                        pred.append(self.model.predict(data))
                    
                    metric_score.append(f1_score(test_y_cv, pred))
                    
                        
                        

In [17]:
data = pd.read_excel('../data.xlsx')

In [48]:
y = data['Class Label'].to_numpy()
x = data.loc[:,data.columns != 'Class Label'].to_numpy()

In [60]:
ex = np.split(x,3)

In [19]:
x_trvl, x_test, y_trvl, y_test = train_test_split(x, y, test_size = 0.2, stratify=y)

In [20]:
x_train, x_valid, y_train, y_valid = train_test_split(x_trvl, y_trvl, test_size=0.25, stratify=y_trvl)

In [38]:
rda = DiscriminentAnalysis()
rda.fit(x_train, y_train)

<__main__.DiscriminentAnalysis at 0x27394d0eb20>

In [2]:
a = [1,2,3,4,5]
print(a[5:])

[]
