In [1]:
import numpy as np
import pandas as pd


In [None]:
class NB:
    
    
    def fit(self,X,y):
        """
        This is the fit function calculating mean,variance,priors 
        
        """
        samples,features = X.shape
        
        self.classes = np.unique(y)
        n_classes = len(self.classes)
        
        
        self.mean = np.zeros((n_classes,features))
        self.var = np.zeros((n_classes,features))
        
        self.priors = np.zeros(n_classes)
        
        
        for index,class_ in enumerate(self.classes):
            X_c = X[y==class_]
            self.mean[index,:]=X_c.mean(axis=0)
            self.var[index,:] = X_c.var(axis=0)+1e-9
            self.priors[index] = X_c.shape[0]/float(samples)
            
    def _pdf(self,class_index,x):
        
        """
        This function is to compute PDF(Gaussian Naive Bayes) 
        
        """
        mean = self.mean[class_index]
        var = self.var[class_index]
        numerator = np.exp(-(x-mean)**2/(2*var))
        deno = np.sqrt(2*np.pi*var)
        return numerator/deno        
            
    def _predict(self,X):
        """
        Calculating Posterior for the each class and returing argmax 
        
        """
        posteriors = []
        for index,class_ in enumerate(self.classes):
            
            prior = np.log(self.priors[index])
            
            class_conditional = np.sum(np.log(self._pdf(index,X)))
            
            posterior = prior + class_conditional
            
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]
    
    def predict(self,X):
        return np.array([self._predict(x) for x in X])
    
            

In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train from scratch Naive Bayes
nb = NB()
nb.fit(X_train, y_train)
predictions = nb.predict(X_test)

accuracy = np.mean(predictions == y_test)
print("Accuracy:", accuracy)


Accuracy: 0.9777777777777777


In [7]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, y_train)
print("Sklearn accuracy:", model.score(X_test, y_test))


Sklearn accuracy: 0.9777777777777777
