In [None]:
import numpy as np
import pandas as pd
import os
from Gaussian_Naive_Bayes import GaussianNaiveBayes
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
train_path = ".\DevanagariHandwrittenCharacterDataset\Train"

test_path = ".\DevanagariHandwrittenCharacterDataset\Test"

In [None]:
def data(path):
    df = list()
    labels = list()
    for image_folder in [os.path.join(path, folder_name) for folder_name in os.listdir(path)]:
        for image_path in [os.path.join(image_folder, image) for image in os.listdir(image_folder)]:
            labels.append(image_folder.split('\\')[-1])
            image = plt.imread(image_path)
            df.append(np.reshape(image, (1,image.shape[0] * image.shape[1])))
    df = np.concatenate(df,axis=0)
    df = pd.DataFrame(df)
    df['labels'] = labels
    return df

In [None]:
train_data = data(train_path)
test_data = data(test_path)

In [None]:
class DevanagariHandwrittenCharacter(GaussianNaiveBayes):
    def __init__(self, training, testing, n_comp, discriminant_analysis='qda', rda_p=np.nan):
        
        self.training = training.iloc[:,:-1]
        training_labels = training['labels']
        self.testing = testing.iloc[:,:-1]
        testing_labels = testing['labels']
        self.n_comp = n_comp
        
        self.training = (self.training - self.training.mean(axis=0))/self.training.std(axis=0)
        self.testing = (self.testing - self.testing.mean(axis=0))/self.testing.std(axis=0)
        
        self.training.replace(to_replace=np.nan, value=0, inplace=True)
        self.testing.replace(to_replace=np.nan, value=0, inplace=True)
        
        self.__eigen_vector_calculation()
        
        self.training = np.matmul(np.array(self.training),self.Q)
        self.testing = np.matmul(np.array(self.testing),self.Q)
        
        self.training = pd.DataFrame(self.training)
        self.training['labels'] = training_labels
        
        self.testing = pd.DataFrame(self.testing)
        self.testing['labels'] = testing_labels
        
        X_cv, X_test, Y_cv, Y_test = train_test_split(self.testing.iloc[:,:-1], self.testing['labels'], test_size=1/3)
        
        self.cv = pd.DataFrame(X_cv)
        self.cv['labels'] = Y_cv
        
        self.testing = pd.DataFrame(X_test)
        self.testing['labels'] = Y_test
        
        super().__init__(discriminant_analysis, rda_p)
        
        del training, testing, n_comp, discriminant_analysis, rda_p, training_labels, testing_labels, X_cv, X_test, Y_cv, Y_test
        
    def __eigen_vector_calculation(self):
        data = np.array(self.training)
        print
        sigma_hat = (1/self.training.shape[0]) * np.matmul(data.T,data)
        self.Q = np.linalg.svd(sigma_hat)[0][:,:self.n_comp]

In [None]:
def evaluate(predicted, actual):
    return np.count_nonzero(predicted == actual)/len(actual)

In [None]:
Results = dict()
best = {'Accuracy':0, 'Best Obj':'obj', 'Parameter':()}

# We will check for various values of N Components in PCA for assumption of Quadratic Discriminant Analysis

In [None]:
for n_comp in np.arange(2,603,10):
    obj = DevanagariHandwrittenCharacter(train_data, test_data, n_comp)
    obj.fit(obj.training.iloc[:,:-1], obj.training['labels'])
    obj.predict(obj.cv.iloc[:,:-1])
    Results[('qda', n_comp)] = evaluate(obj.predicted_labels, obj.cv['labels'])
    if Results[('qda', n_comp)] > best['Accuracy']:
        best['Accuracy'] = Results[('qda', n_comp)]
        best['Best Obj'] = obj
        best['Parameter'] = ('qda', n_comp)
    else:
        del obj

In [None]:
best

# We will check for various values of N Components in PCA for assumption of Linear Discriminant Analysis

In [None]:
for n_comp in np.arange(2,603,10):
    obj = DevanagariHandwrittenCharacter(train_data, test_data, n_comp, discriminant_analysis='lda')
    obj.fit(obj.training.iloc[:,:-1], obj.training['labels'])
    obj.predict(obj.cv.iloc[:,:-1])
    Results[('lda', n_comp)] = evaluate(obj.predicted_labels, obj.cv['labels'])
    if Results[('lda', n_comp)] > best['Accuracy']:
        best['Accuracy'] = Results[('lda', n_comp)]
        best['Best Obj'] = obj
        best['Parameter'] = ('lda', n_comp)
    else:
        del obj

In [None]:
best

# We will check for various values of N Components in PCA for assumption of Regularized Discriminant Analysis

In [None]:
for alpha in np.arange(0.1,1,0.1):
    for gamma in np.arange(0.1,1,0.1):
        for n_comp in np.arange(2,602,10): 
            obj = DevanagariHandwrittenCharacter(train_data, test_data, n_comp, discriminant_analysis='rda', rda_p=(alpha, gamma))
            obj.fit(obj.training.iloc[:,:-1],obj.training['labels'])
            obj.predict(obj.cv.iloc[:,:-1])
            Results[('rda', n_comp, alpha, gamma)] = evaluate(obj.predicted_labels, obj.cv['labels'])
            if Results[('rda', n_comp, alpha, gamma)] > best['Accuracy']:
                best['Accuracy'] = Results[('rda', n_comp, alpha, gamma)]
                best['Best Obj'] = obj
                best['Parameter'] = ('rda', n_comp, alpha, gamma)
            else:
                del obj

In [None]:
best

# From over 5063 Hyperparameter combinaitons we find our best model

In [None]:
best_obj = best['Best Obj']

In [None]:
best_obj.predict(best_obj.testing.iloc[:,:-1])

In [None]:
evaluate(best_obj.predicted_labels, best_obj.testing['labels'])