In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
raw_data = pd.read_csv("../IRIS.csv")
#extracting unique species
types = pd.unique(raw_data["species"])

idx = [i for i in range(len(types))]
data = raw_data.copy()
data["species"].replace(types,idx,inplace=True)

In [3]:
X = data.iloc[:,:-1].values
Y = data.iloc[:,-1].values
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=20, random_state=31)

In [4]:
class GaussianBayes():
    '''
        This class performs the Gaussian Naive Bayesian classifier, by finding the Priors, 
        and using the Gaussian distribution to find the likelihood probability
    '''
    def __init__(self):
        self.n_classes = None
        self.classes = None
        self.mean = None
        self.varience = None
        self.priors = None
        
    def fit(self,X,Y):
        n_samples, n_features = X.shape
        self.classes = np.unique(Y)
        self.n_classes = len(self.classes)
        # for the Gaussian Naive Bayesian
        self.mean = np.zeros((self.n_classes, n_features), dtype=np.float64)
        self.varience = np.zeros((self.n_classes, n_features), dtype=np.float64)
        self.priors = np.zeros(self.n_classes, dtype=np.float64)
        
        for idx, clas in enumerate(self.classes):
            Xc = X[Y==clas]
            self.mean[idx,:] = Xc.mean(axis=0)
            self.varience[idx,:] = Xc.var(axis=0)
            self.priors[idx] = Xc.shape[0] / float(n_samples)
        
    def predict(self, X):
        pred = [self._pred_(x) for x in X]
        return pred
    
    def _pred_(self,x):
        post = [] # aray of probabilities
        for idx, clas in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            pos = np.sum(np.log(self.Gaussian(idx, x)))
            pos = pos + prior
            post.append(pos)
        return self.classes[np.argmax(post)]
    
    def Gaussian(self, idx, x):
        mean = self.mean[idx]
        var = self.varience[idx]
        numerator = np.exp(-((x - mean) ** 2) / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        prob = numerator / denominator
        return prob

In [5]:
Classifier = GaussianBayes()
Classifier.fit(X_train,Y_train)

In [6]:
lables_test = Classifier.predict(X_test)
lables_train = Classifier.predict(X_train)

In [7]:
test_acc = accuracy_score(lables_test, Y_test)
train_acc = accuracy_score(lables_train, Y_train)

In [8]:
print(f"The Training accuracy is {train_acc*100:0.2f}")
print(f"The Testing accuracy is {test_acc*100:0.2f}")

The Training accuracy is 95.38
The Testing accuracy is 100.00
