In [3]:
import math
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scipy.stats import norm

In [4]:
# Creating a x1, x2 and y with random var {0, 1} of len 1000
x1= np.random.randint(2, size = 1000)
x2= np.random.randint(2, size = 1000)
y= np.random.randint(2, size = 1000)

In [5]:
#creating input matrix 
X= np.vstack((x1,x2)).T 

In [6]:
class NaiveBayes:
    def __init__(self, X, y, laplace=0):
        self.X = X
        self.y = y
        self.laplace = laplace
        
    def trainTestSplit(self):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, 
                                                                                self.y, 
                                                                                test_size = 0.3, 
                                                                                random_state = 40)
           
    def probability(self, X, prior, dist1, dist2):
        return prior * dist1 * dist2

    def runModel(self):
        self.trainTestSplit()
        self.X0_train = self.X_train[self.y_train == 0]
        self.X1_train = self.X_train[self.y_train == 1]
        self.X00_train = self.X0_train[:, 0]
        self.X10_train = self.X0_train[:, 1]
        self.X01_train = self.X1_train[:, 0]
        self.X11_train = self.X1_train[:, 1]
        
        # class 0
        self.X00y0_train = self.X00_train[self.X00_train == 0]
        self.X01y0_train = self.X00_train[self.X00_train == 1]
        self.X10y0_train = self.X10_train[self.X10_train == 0]
        self.X11y0_train = self.X10_train[self.X10_train == 1]
        
        # class 1
        self.X00y1_train = self.X01_train[self.X01_train == 0]
        self.X01y1_train = self.X01_train[self.X01_train == 1]
        self.X10y1_train = self.X11_train[self.X11_train == 0]
        self.X11y1_train = self.X11_train[self.X11_train == 1]
              
        # priors on y
        self.prior_y0 = len(self.X0_train)/len(self.X_train)
        self.prior_y1 = len(self.X1_train)/len(self.X_train)
        
        k = 2                                                     #k is the number of features in dataset
        laplace = self.laplace
        print(laplace)

        self.likelihood_X00y0 = (len(self.X00y0_train)+laplace)/(len(self.X0_train)+(k*laplace)) 
        self.likelihood_X01y0 = (len(self.X01y0_train)+laplace)/(len(self.X0_train)+(k*laplace))
        self.likelihood_X10y0 = (len(self.X10y0_train)+laplace)/(len(self.X0_train)+(k*laplace))
        self.likelihood_X11y0 = (len(self.X11y0_train)+laplace)/(len(self.X0_train)+(k*laplace))
           
        self.likelihood_X00y1 = (len(self.X00y1_train)+laplace)/(len(self.X1_train)+(k*laplace))
        self.likelihood_X01y1 = (len(self.X01y1_train)+laplace)/(len(self.X1_train)+(k*laplace))
        self.likelihood_X10y1 = (len(self.X10y1_train)+laplace)/(len(self.X1_train)+(k*laplace))
        self.likelihood_X11y1 = (len(self.X11y1_train)+laplace)/(len(self.X1_train)+(k*laplace))
        
    def predict(self):
        for sample, target in zip(self.X_test, self.y_test):
            if sample[0] == 1:
                self.dist_X0y0 = self.likelihood_X01y0 #X0=1|y=0
            else:
                self.dist_X0y0 = self.likelihood_X00y0 #X0=0|y=0
                
            if sample[1] == 1:
                self.dist_X1y0 = self.likelihood_X11y0 #X1=1|y=0
            else:
                self.dist_X1y0 = self.likelihood_X10y0 #X1=0|y=0
            
            if sample[0] == 1:
                self.dist_X0y1 = self.likelihood_X01y1 #X0=1|y=1
            else:
                self.dist_X0y1 = self.likelihood_X00y1 #X0=0|y=1
                 
            if sample[1] == 1:
                self.dist_X1y1 = self.likelihood_X11y1 #X1=1|y=1
            else:
                self.dist_X1y1 = self.likelihood_X10y1 #X1=0|y=1
                           
            py0 = self.probability(sample, self.prior_y0, self.dist_X0y0, self.dist_X1y0)
            py1 = self.probability(sample, self.prior_y1, self.dist_X0y1, self.dist_X1y1)
            
            print('P(y=0| %s) = %.3f' % (sample, py0*100))
            print('P(y=1| %s) = %.3f' % (sample, py1*100))
            print('Model predicted class {} and the truth was: {} \n'.format(np.argmax([py0*100, py1*100]), target)) 

### Code Without Laplace Smoothing

In [7]:
nb = NaiveBayes(X, y)

In [8]:
nb.runModel()

0


In [9]:
nb.predict()

P(y=0| [1 1]) = 14.611
P(y=1| [1 1]) = 11.966
Model predicted class 0 and the truth was: 0 

P(y=0| [0 0]) = 10.611
P(y=1| [0 0]) = 12.966
Model predicted class 1 and the truth was: 1 

P(y=0| [0 0]) = 10.611
P(y=1| [0 0]) = 12.966
Model predicted class 1 and the truth was: 0 

P(y=0| [1 1]) = 14.611
P(y=1| [1 1]) = 11.966
Model predicted class 0 and the truth was: 0 

P(y=0| [0 0]) = 10.611
P(y=1| [0 0]) = 12.966
Model predicted class 1 and the truth was: 0 

P(y=0| [0 0]) = 10.611
P(y=1| [0 0]) = 12.966
Model predicted class 1 and the truth was: 0 

P(y=0| [0 1]) = 11.961
P(y=1| [0 1]) = 12.892
Model predicted class 1 and the truth was: 0 

P(y=0| [1 0]) = 12.961
P(y=1| [1 0]) = 12.034
Model predicted class 0 and the truth was: 0 

P(y=0| [0 0]) = 10.611
P(y=1| [0 0]) = 12.966
Model predicted class 1 and the truth was: 1 

P(y=0| [0 0]) = 10.611
P(y=1| [0 0]) = 12.966
Model predicted class 1 and the truth was: 0 

P(y=0| [1 0]) = 12.961
P(y=1| [1 0]) = 12.034
Model predicted class 0 

### Code with Laplace Smoothing

In [10]:
nb = NaiveBayes(X, y, laplace=1)

In [11]:
nb.runModel()

1


In [12]:
nb.predict()

P(y=0| [1 1]) = 14.598
P(y=1| [1 1]) = 11.968
Model predicted class 0 and the truth was: 0 

P(y=0| [0 0]) = 10.621
P(y=1| [0 0]) = 12.963
Model predicted class 1 and the truth was: 1 

P(y=0| [0 0]) = 10.621
P(y=1| [0 0]) = 12.963
Model predicted class 1 and the truth was: 0 

P(y=0| [1 1]) = 14.598
P(y=1| [1 1]) = 11.968
Model predicted class 0 and the truth was: 0 

P(y=0| [0 0]) = 10.621
P(y=1| [0 0]) = 12.963
Model predicted class 1 and the truth was: 0 

P(y=0| [0 0]) = 10.621
P(y=1| [0 0]) = 12.963
Model predicted class 1 and the truth was: 0 

P(y=0| [0 1]) = 11.965
P(y=1| [0 1]) = 12.889
Model predicted class 1 and the truth was: 0 

P(y=0| [1 0]) = 12.959
P(y=1| [1 0]) = 12.037
Model predicted class 0 and the truth was: 0 

P(y=0| [0 0]) = 10.621
P(y=1| [0 0]) = 12.963
Model predicted class 1 and the truth was: 1 

P(y=0| [0 0]) = 10.621
P(y=1| [0 0]) = 12.963
Model predicted class 1 and the truth was: 0 

P(y=0| [1 0]) = 12.959
P(y=1| [1 0]) = 12.037
Model predicted class 0 