In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [10]:
class NB_classifier():
    def __init__(self):
        pass
    
    def fit(self, X, y):
        self.n_rows, self.n_features = X.shape
        self.n_classes = len(np.unique(y))
        self.classes = np.unique(y)
        #initialize the stats matrices to zeros
        self.means = np.zeros((self.n_classes, self.n_features))
        self.variances = np.zeros((self.n_classes, self.n_features))
        self.priors = np.zeros(self.n_classes)
        
        for i, c in enumerate(self.classes):
            #get the i-th class observations
            class_observations = X[y==c]
            self.means[i,:] = np.mean(class_observations, axis=0)
            self.variances[i,:] = np.var(class_observations, axis=0)
            self.priors[i] = len(class_observations)/self.n_rows
    
    #get the gaussian density values for an observation
    def gaussian_density(self, x, mean, var):
        num = np.exp(-0.5 * ((x - mean) ** 2/var))
        den = np.sqrt(var * 2 * np.pi)
        return num/den
    
    def get_posterior(self, x):
        posteriors = []
        
        for i in range(self.n_classes):
            #prepare the pdf parameters and the prior prob
            mean = self.means[i]
            var = self.variances[i]
            prior = np.log(self.priors[i])
            
            #get the likelihood prob for the observation x
            conditional = np.sum(np.log(self.gaussian_density(x, mean, var)))
            
            posterior = conditional + prior
            posteriors.append(posterior)
        
        return np.argmax(posteriors)
    
    def predict(self, X):
        preds = [self.get_posterior(x) for x in X]
        return np.array(preds)
    
    
    

In [6]:
X = load_iris().data
y = load_iris().target
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [11]:
model = NB_classifier()
model.fit(x_train, y_train)
predictions = model.predict(x_test)

In [12]:
def accuracy(y_pred, y_true):
    acc = np.sum(y_pred == y_true)/len(y_true)
    return acc

print("accuracy : ", accuracy(predictions, y_test))

accuracy :  1.0
