In [1]:
import numpy as np
import pandas as pd

In [2]:
train_df, test_df = pd.read_csv("train.csv"), pd.read_csv("test.csv")

In [3]:
groups = train_df.groupby(["4"])
means, stds, covs = groups.mean(), groups.std(), groups.cov()

In [4]:
def bayes_classifier(x, mu, cov):
    d = x.shape[0]
    return np.exp((-1/2)*np.matmul(np.matmul((x-mu).T, np.linalg.inv(cov)), (x-mu)))/(((2*np.pi)**d/2)*np.sqrt(np.linalg.det(cov)))

In [5]:
prior = dict()
prior["Iris-setosa"] = 0.8
prior["Iris-versicolor"] = 0.1
prior["Iris-virginica"] = 0.1

In [6]:
def run_classifer(prob_func, dataset, means, sigmas):
    correct_ones = 0
    for index, row in dataset.iterrows():
        probabilities = dict()
        for group in groups.groups.keys():
            probabilities[group] = prob_func(
                row[:-1].astype('float').to_numpy(),
                means.loc[group].to_numpy(),
                sigmas.loc[group].to_numpy()
            ) * prior[group]
        if row[-1] == max(probabilities, key=probabilities.get):
            correct_ones += 1

    func_name = prob_func.__name__.replace('_', ' ').title()
    print(f"\tThe accuracy of {func_name} is {correct_ones/dataset.shape[0]*100:.2f} %")

In [7]:
print("For training set - ")
run_classifer(bayes_classifier, train_df, means, covs)

print("\nFor test set - ")
run_classifer(bayes_classifier, test_df, means, covs)

For training set - 
	The accuracy of Bayes Classifier is 97.32 %

For test set - 
	The accuracy of Bayes Classifier is 100.00 %
