In [9]:
#Importing all libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import math
from tqdm import tqdm
from scipy.stats import norm

In [10]:
# Sample data set of 2 dimensions col1 and col2
col1= np.random.randint(2, size = 1000)
col2= np.random.randint(2, size = 1000)
y= np.random.randint(2, size = 1000)

In [11]:
#Generating an array by stacking discrete generated variables 
X = np.vstack((col1,col2)).T 

In [19]:
X

array([[1, 0],
       [1, 0],
       [0, 0],
       ...,
       [1, 1],
       [0, 0],
       [1, 0]])

In [34]:
class NaiveBayes: 
    
    def __init__(self, X, y, alpha=0):
        
        self.X = X
        self.y = y
        self.alpha = alpha
        
    def trainTestSplit(self):
        
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y, test_size = 0.35, random_state = 20)
           
    def probability(self, X, prior, dist1, dist2):
        return prior * dist1 * dist2

    def runModel(self):
        
        self.trainTestSplit()

        self.X0_train = self.X_train[self.y_train == 0]
        self.X1_train = self.X_train[self.y_train == 1]
        
        # X0, class= 0
        self.X00_train = self.X0_train[:, 0]
        # X1, class 0
        self.X10_train = self.X0_train[:, 1]
        # X0, class= 1
        self.X01_train = self.X1_train[:, 0]
        # X1, class= 1
        self.X11_train = self.X1_train[:, 1]
        
        # Calculating the likelihoods
        
        # for class 0
        self.X00y0_train = self.X00_train[self.X00_train == 0]
        self.X01y0_train = self.X00_train[self.X00_train == 1]
        self.X10y0_train = self.X10_train[self.X10_train == 0]
        self.X11y0_train = self.X10_train[self.X10_train == 1]
        
        # for class 1
        self.X00y1_train = self.X01_train[self.X01_train == 0]
        self.X01y1_train = self.X01_train[self.X01_train == 1]
        self.X10y1_train = self.X11_train[self.X11_train == 0]
        self.X11y1_train = self.X11_train[self.X11_train == 1]
              
        # calculating priors on y
        self.prior_y0 = len(self.X0_train)/len(self.X_train)
        self.prior_y1 = len(self.X1_train)/len(self.X_train)
        
        #m is the number of features in dataset
        m = 2
        alpha = self.alpha
        print(alpha)
               
        # likelihood functions for x0 and x1 - class 0
        self.likelihood_X00y0 = (len(self.X00y0_train)+alpha)/(len(self.X0_train)+(m*alpha)) 
        self.likelihood_X01y0 = (len(self.X01y0_train)+alpha)/(len(self.X0_train)+(m*alpha))
        self.likelihood_X10y0 = (len(self.X10y0_train)+alpha)/(len(self.X0_train)+(m*alpha))
        self.likelihood_X11y0 = (len(self.X11y0_train)+alpha)/(len(self.X0_train)+(m*alpha))
        
         # likelihood functions for x0 and x1 - class 1       
        self.likelihood_X00y1 = (len(self.X00y1_train)+alpha)/(len(self.X1_train)+(m*alpha))
        self.likelihood_X01y1 = (len(self.X01y1_train)+alpha)/(len(self.X1_train)+(m*alpha))
        self.likelihood_X10y1 = (len(self.X10y1_train)+alpha)/(len(self.X1_train)+(m*alpha))
        self.likelihood_X11y1 = (len(self.X11y1_train)+alpha)/(len(self.X1_train)+(m*alpha))
        
    def predict(self):
        
        for sample, target in zip(self.X_test, self.y_test):
            
            # if sample belongs to class 0

            if sample[0] == 1:
                self.dist_X0y0 = self.likelihood_X01y0 #X0=1|y=0
            else:
                self.dist_X0y0 = self.likelihood_X00y0 #X0=0|y=0
                
            if sample[1] == 1:
                self.dist_X1y0 = self.likelihood_X11y0 #X1=1|y=0
            else:
                self.dist_X1y0 = self.likelihood_X10y0 #X1=0|y=0
                
            # if sample belongs to class 1
            
            if sample[0] == 1:
                self.dist_X0y1 = self.likelihood_X01y1 #X0=1|y=1
            else:
                self.dist_X0y1 = self.likelihood_X00y1 #X0=0|y=1
                 
            if sample[1] == 1:
                self.dist_X1y1 = self.likelihood_X11y1 #X1=1|y=1
            else:
                self.dist_X1y1 = self.likelihood_X10y1 #X1=0|y=1
                           
            py0 = self.probability(sample, self.prior_y0, self.dist_X0y0, self.dist_X1y0)
            py1 = self.probability(sample, self.prior_y1, self.dist_X0y1, self.dist_X1y1)
            
            print('P(y=0| %s) = %.3f' % (sample, py0*100))
            print('P(y=1| %s) = %.3f' % (sample, py1*100))
            print('Our model predicted as {}, but the actual value was: {} \n'.format(np.argmax([py0*100, py1*100]), target)) 

In [35]:
#Without Laplace Smoothing
nb = NaiveBayes(X, y, alpha=0)
nb.runModel()

0


In [36]:
nb.predict()

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 1 

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the actual value was: 0 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the ac

P(y=1| [1 1]) = 12.092
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the actual value was: 0 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 0 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y


P(y=0| [1 1]) = 11.468
P(y=1| [1 1]) = 12.092
Our model predicted as 1, but the actual value was: 0 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the actual value was: 0 

P(y=0| [1 1]) = 11.468
P(y=1| [1 1]) = 12.092
Our model predicted as 1, but the actual value was: 0 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 0 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the actual value was: 0 

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the a

P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [1 1]) = 11.468
P(y=1| [1 1]) = 12.092
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 1 

P(y=0| [1 1]) = 11.468
P(y=1| [1 1]) = 12.092
Our model predicted as 1, but the actual value was: 1 

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the actual value was: 0 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y


P(y=0| [1 1]) = 11.468
P(y=1| [1 1]) = 12.092
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 1 

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the actual value was: 0 

P(y=0| [1 1]) = 11.468
P(y=1| [1 1]) = 12.092
Our model predicted as 1, but the actual value was: 0 

P(y=0| [1 1]) = 11.468
P(y=1| [1 1]) = 12.092
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 1]) = 11.609
P(y=1| [0 1]) = 10.677
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 1]) = 11.468
P(y=1| [1 1]) = 12.092
Our model predicted as 1, but the actual value was: 0 

P(y=0| [1 1]) = 11.468
P(y=1| [1 1]) = 12.092
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 0]) = 13.776
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 0]) = 13.609
P(y=1| [1 0]) = 14.216
Our model predicted as 1, but the a

In [37]:
#With Laplace Smoothing
nb = NaiveBayes(X, y, alpha=1)
nb.runModel()

1


In [38]:
nb.predict()

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 1 

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the actual value was: 0 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the ac

P(y=1| [1 1]) = 12.094
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the actual value was: 0 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 0 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y


P(y=0| [1 1]) = 11.475
P(y=1| [1 1]) = 12.094
Our model predicted as 1, but the actual value was: 0 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the actual value was: 0 

P(y=0| [1 1]) = 11.475
P(y=1| [1 1]) = 12.094
Our model predicted as 1, but the actual value was: 0 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 0 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the actual value was: 0 

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the a

P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [1 1]) = 11.475
P(y=1| [1 1]) = 12.094
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 1 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 1 

P(y=0| [1 1]) = 11.475
P(y=1| [1 1]) = 12.094
Our model predicted as 1, but the actual value was: 1 

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the actual value was: 0 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 1 

P(y


P(y=0| [1 1]) = 11.475
P(y=1| [1 1]) = 12.094
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 1 

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the actual value was: 0 

P(y=0| [1 1]) = 11.475
P(y=1| [1 1]) = 12.094
Our model predicted as 1, but the actual value was: 0 

P(y=0| [1 1]) = 11.475
P(y=1| [1 1]) = 12.094
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 1]) = 11.615
P(y=1| [0 1]) = 10.688
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 1]) = 11.475
P(y=1| [1 1]) = 12.094
Our model predicted as 1, but the actual value was: 0 

P(y=0| [1 1]) = 11.475
P(y=1| [1 1]) = 12.094
Our model predicted as 1, but the actual value was: 1 

P(y=0| [0 0]) = 13.769
P(y=1| [0 0]) = 12.553
Our model predicted as 0, but the actual value was: 0 

P(y=0| [1 0]) = 13.603
P(y=1| [1 0]) = 14.204
Our model predicted as 1, but the a