In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets

- NaiveBayes Class

In [2]:
class NaiveBayes:
    #We don't need an __init__() function as we don't have any parameters in NaiveBayes
    
    #fit() function takes the training_samples and the training_labels as input.
    def fit(self, X, y):
        num_samples , num_features = X.shape
        
        #get the unique classes
        self._classes = np.unique(y)
        
        #get the total number of classes present
        num_classes =  len(self._classes)
        
        #Inititalize the mean,variance & prior probability for each class
        self.mean = np.zeros((num_classes, num_features), dtype=np.float64)
        self.variance = np.zeros((num_classes, num_features), dtype=np.float64)
        self.priors = np.zeros(num_classes , dtype=np.float64)
        
        for idx, cls in enumerate(self._classes):
            #We only want to get the training samples of a particular class in each iteration
            X_cls = X[y == cls]
            
            #Compute the mean,variance,prior for the current class
            self.mean[idx, :] = X_cls.mean(axis = 0)
            self.variance[idx, :] = X_cls.var(axis = 0)
            self.priors[idx] = X_cls.shape[0] / float(num_samples)
            
    def predict(self, X):
        #Using a helper function
        y_pred = [self._predict(x) for x in X]
        
        return np.array(y_pred)
    
    def _predict(self, x):
        #A helper function which will calculate the posterior probability of a single class
        posteriors = []
        
        for idx, cls in enumerate(self._classes):
            prior = np.log(self.priors[idx])
            
            #Compute the posterior probabilities values using Gausian Normal Distributon helper function.
            posterior = np.sum(np.log(self.prob_density(idx, x)))
            posterior = posterior + prior
            posteriors.append(posterior)
            
        #Return the class with the highest Posterior Probability
        return self._classes[np.argmax(posteriors)]
    
    def prob_density(self, clss_idx, x):
        mean = self.mean[clss_idx]
        variance =  self.variance[clss_idx]
        numerator = np.exp(-((x - mean) ** 2) / (2*variance))
        denominator = np.sqrt(2 * np.pi * variance) 
        
        return numerator / denominator 

- Function to compute the accuracy of our Algorithm

In [3]:
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

- Dataset Loading 

In [4]:
X, y = datasets.make_classification(
    n_samples=1000, n_features = 10, n_classes = 2, random_state=123 
)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=123
)

- Training & Testing our Naive_Bayes Algorithm

In [5]:
nb = NaiveBayes()
nb.fit(X_train, y_train)
predictions = nb.predict(X_test)

In [6]:
acc= accuracy(y_test, predictions)
print("="*28, f"Accuracy:- {acc*100:.5f}%", "="*28)

