In [30]:
# Required libs
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from dataclasses import dataclass

In [29]:
# Creating a sample data set of 2 cols x1 and x2
x1= np.random.randint(2, size = 1000)
x2= np.random.randint(2, size = 1000)
y= np.random.randint(2, size = 1000)

In [6]:
#creating an array for discrete generated variables 
X= np.vstack((x1,x2)).T 
X

array([[0, 1],
       [1, 0],
       [1, 1],
       ...,
       [1, 1],
       [0, 0],
       [0, 1]])

In [11]:
type(X)

numpy.ndarray

In [26]:
@dataclass
class NaiveBayes: 

    X: np.ndarray
    y: np.ndarray
    laplace: int = 2

    def __post_init__(self):
        self.X_train, self.X_test, self.y_train, self.y_test = self.trainTestSplit()
        self.fit()
        
    def trainTestSplit(self):
        X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size = 0.3, random_state = 15)
        return X_train, X_test, y_train, y_test
           
    def probability(self, X, prior, dist1, dist2):
           # since our data is 2 dims, we must have the distributions of feature 1 and feature 2
        return prior * dist1 * dist2

    def fit(self):

        self.X0_train = self.X_train[self.y_train == 0]
        self.X1_train = self.X_train[self.y_train == 1]
        
        # X0, class= 0
        self.X00_train = self.X0_train[:, 0]
        # X1, class 0
        self.X10_train = self.X0_train[:, 1]
        # X0, class= 1
        self.X01_train = self.X1_train[:, 0]
        # X1, class= 1
        self.X11_train = self.X1_train[:, 1]
        
        # Calculating the likelihoods
        
        # class 0
        self.X00y0_train = self.X00_train[self.X00_train == 0]
        self.X01y0_train = self.X00_train[self.X00_train == 1]
        self.X10y0_train = self.X10_train[self.X10_train == 0]
        self.X11y0_train = self.X10_train[self.X10_train == 1]
        
        # class 1
        self.X00y1_train = self.X01_train[self.X01_train == 0]
        self.X01y1_train = self.X01_train[self.X01_train == 1]
        self.X10y1_train = self.X11_train[self.X11_train == 0]
        self.X11y1_train = self.X11_train[self.X11_train == 1]
              
        # calculating priors on y
        self.prior_y0 = len(self.X0_train)/len(self.X_train)
        self.prior_y1 = len(self.X1_train)/len(self.X_train)
        
        #k is the number of features in dataset
        k = 2
        laplace = self.laplace
        print("Choosen Laplace Smoothing value:",laplace)
               
        # calculating likelihood functions for x0 and x1 - class 0
        self.likelihood_X00y0 = (len(self.X00y0_train)+laplace)/(len(self.X0_train)+(k*laplace)) 
        self.likelihood_X01y0 = (len(self.X01y0_train)+laplace)/(len(self.X0_train)+(k*laplace))
        self.likelihood_X10y0 = (len(self.X10y0_train)+laplace)/(len(self.X0_train)+(k*laplace))
        self.likelihood_X11y0 = (len(self.X11y0_train)+laplace)/(len(self.X0_train)+(k*laplace))
        
         # calculating likelihood functions for x0 and x1 - class 1       
        self.likelihood_X00y1 = (len(self.X00y1_train)+laplace)/(len(self.X1_train)+(k*laplace))
        self.likelihood_X01y1 = (len(self.X01y1_train)+laplace)/(len(self.X1_train)+(k*laplace))
        self.likelihood_X10y1 = (len(self.X10y1_train)+laplace)/(len(self.X1_train)+(k*laplace))
        self.likelihood_X11y1 = (len(self.X11y1_train)+laplace)/(len(self.X1_train)+(k*laplace))
        
    def predict(self):
        
        for sample, target in zip(self.X_test, self.y_test):
            
            # if sample belongs to class 0

            if sample[0] == 1:
                self.dist_X0y0 = self.likelihood_X01y0 #X0=1|y=0
            else:
                self.dist_X0y0 = self.likelihood_X00y0 #X0=0|y=0
                
            if sample[1] == 1:
                self.dist_X1y0 = self.likelihood_X11y0 #X1=1|y=0
            else:
                self.dist_X1y0 = self.likelihood_X10y0 #X1=0|y=0
                
            # if sample belongs to class 1
            
            if sample[0] == 1:
                self.dist_X0y1 = self.likelihood_X01y1 #X0=1|y=1
            else:
                self.dist_X0y1 = self.likelihood_X00y1 #X0=0|y=1
                 
            if sample[1] == 1:
                self.dist_X1y1 = self.likelihood_X11y1 #X1=1|y=1
            else:
                self.dist_X1y1 = self.likelihood_X10y1 #X1=0|y=1
                           
            py0 = self.probability(sample, self.prior_y0, self.dist_X0y0, self.dist_X1y0)
            py1 = self.probability(sample, self.prior_y1, self.dist_X0y1, self.dist_X1y1)
            
            print('P(y=0| %s) = %.3f' % (sample, py0*100))
            print('P(y=1| %s) = %.3f' % (sample, py1*100))
            print('Model predicted class: {} Actual class: {} \n'.format(np.argmax([py0*100, py1*100]), target)) 

In [27]:
nb = NaiveBayes(X, y)

Choosen Laplace Smoothing value: 2


In [28]:
nb.predict()

P(y=0| [0 1]) = 13.818
P(y=1| [0 1]) = 12.458
Model predicted class: 0 Actual class: 0 

P(y=0| [1 0]) = 12.123
P(y=1| [1 0]) = 11.611
Model predicted class: 0 Actual class: 0 

P(y=0| [1 0]) = 12.123
P(y=1| [1 0]) = 11.611
Model predicted class: 0 Actual class: 0 

P(y=0| [1 1]) = 13.594
P(y=1| [1 1]) = 11.543
Model predicted class: 0 Actual class: 1 

P(y=0| [0 1]) = 13.818
P(y=1| [0 1]) = 12.458
Model predicted class: 0 Actual class: 1 

P(y=0| [0 1]) = 13.818
P(y=1| [0 1]) = 12.458
Model predicted class: 0 Actual class: 1 

P(y=0| [1 0]) = 12.123
P(y=1| [1 0]) = 11.611
Model predicted class: 0 Actual class: 1 

P(y=0| [1 1]) = 13.594
P(y=1| [1 1]) = 11.543
Model predicted class: 0 Actual class: 0 

P(y=0| [0 1]) = 13.818
P(y=1| [0 1]) = 12.458
Model predicted class: 0 Actual class: 1 

P(y=0| [0 0]) = 12.322
P(y=1| [0 0]) = 12.531
Model predicted class: 1 Actual class: 1 

P(y=0| [0 0]) = 12.322
P(y=1| [0 0]) = 12.531
Model predicted class: 1 Actual class: 0 

P(y=0| [1 1]) = 13.59