In [None]:
import numpy as np
from sklearn.datasets import make_blobs
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.stats import norm

In [None]:
# Creating data with discrete values

X0 = np.random.randint(2, size = 1000) # Creating X0 column 
X1 = np.random.randint(2, size = 1000) # Creating X1-column
y =np.random.randint(2, size = 1000) # Target/ y column

#Arranging input as numpy array 
X = np.stack([X0,X1] , axis = 1) # Using stack function instead of concatenate 
#to add values on a new axis 

In [None]:
#visualizing data as table

pd.DataFrame([X0,X1,y],index=["X0","X1","y"]).transpose().head()

Unnamed: 0,X0,X1,y
0,1,1,0
1,0,0,0
2,1,0,0
3,1,0,0
4,0,0,1


In [None]:
class NaiveBayes  :   #for discrete
    
    def __init__(self):
        self.X = X
        self.y = y
        #self.X, self.y = make_blobs(n_samples = 10000, centers = 2, n_features = 2, random_state = 1)
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X,self.y, random_state= 0,
                                                                               test_size = 0.3)
        
    def fit_dist(self, data):
        mu = np.mean(data)
        sigma = np.std(data)
        
        dist = norm(mu, sigma)
        
        return dist
        
        
    def probability(self, X, prior, dist1, dist2):
        return prior * dist1 * dist2
    
    
    def run_model(self):
        #Separating data according to class to make it easier

        self.X_0_train = self.X_train[self.y_train == 0]
        self.X_1_train = self.X_train[self.y_train == 1]
        
        self.X0_0_train = self.X_0_train[:, 0] # first column X0 and points which belongs to class 0
        self.X1_0_train = self.X_0_train[:, 1] # second column X1 and points which belongs to class 0
        self.X0_1_train = self.X_1_train[:, 0] # first column X0 and points which belongs to class 1
        self.X1_1_train = self.X_1_train[:, 1] # second column and points which belongs to class 1
        
        
        # prior
        self.prior_y0 = len(self.X_0_train) / len(self.X_train) # prior for class 0
        self.prior_y1 = len(self.X_1_train) / len(self.X_train) # prior for class 1

        # Calculating likelihoods by adding laplace smoothing
        lap = 1
        # Here X takes only 2 values - 0 or 1 (cateogries in each X variables is only 0 or 1)
        uval = len(np.unique(X))
        # For class 0
        self.X00_0_train = (len(self.X0_0_train[self.X0_0_train == 0]) + lap) / (len(self.X_0_train) + uval*lap)
        self.X01_0_train = (len(self.X0_0_train[self.X0_0_train == 1]) + lap) / (len(self.X_0_train) + uval*lap)
        self.X10_0_train = (len(self.X1_0_train[self.X1_0_train == 0]) + lap) / (len(self.X_0_train) + uval*lap)
        self.X11_0_train = (len(self.X1_0_train[self.X1_0_train == 1]) + lap) / (len(self.X_0_train) + uval*lap)
        
        # For class 1
        self.X00_1_train = (len(self.X0_1_train[self.X0_1_train == 0]) + lap) / (len(self.X_1_train) + uval*lap)
        self.X01_1_train = (len(self.X0_1_train[self.X0_1_train == 1]) + lap) / (len(self.X_1_train) + uval*lap)
        self.X10_1_train = (len(self.X1_1_train[self.X1_1_train == 0]) + lap) / (len(self.X_1_train) + uval*lap)
        self.X11_1_train = (len(self.X1_1_train[self.X1_1_train == 1]) + lap) / (len(self.X_1_train) + uval*lap)        
            
        
    def predict(self):
        
             
        for sample, target in zip(self.X_test, self.y_test):
            
            # Assigning distributions based on conditions
            # for y = 0 i.e class 0
            if sample[0] == 0:
                self.dist_X0y0 = self.X00_0_train
            else:
                self.dist_X0y0 = self.X01_0_train
                
            if sample[1] == 0:
                self.dist_X1y0 = self.X10_0_train
            else:
                self.dist_X1y0 = self.X11_0_train
        
            if sample[0] == 1:
                self.dist_X0y1 = self.X00_1_train
            else:
                self.dist_X0y1 = self.X01_1_train
                
            if sample[1] == 1:
                self.dist_X1y1 = self.X10_1_train
            else:
                self.dist_X1y1 = self.X11_1_train
             
            
            py0 = self.probability(sample, self.prior_y0, self.dist_X0y0, self.dist_X1y0)
            
            py1 = self.probability(sample, self.prior_y1, self.dist_X0y1, self.dist_X1y1)
            
          
            print('P(y=0| %s) = % 3f' % (sample, py0))
            print('P(y=1| %s) = % 3f' % (sample, py1))

            print(' \n Model predicted class {}       Actual class was {} \n'. format(np.argmax([py0, py1]), target))
            
            print('------------------------------------------------------------------------------------------')
        
    def misclassified(self) :
        a=[]
        b=[]
        for sample, target in zip(self.X_test, self.y_test):

            # Assigning distributions based on conditions
            # for y = 0 i.e class 0
            if sample[0] == 0:
                self.dist_X0y0 = self.X00_0_train
            else:
                self.dist_X0y0 = self.X01_0_train

            if sample[1] == 0:
                self.dist_X1y0 = self.X10_0_train
            else:
                self.dist_X1y0 = self.X11_0_train

            if sample[0] == 1:
                self.dist_X0y1 = self.X00_1_train
            else:
                self.dist_X0y1 = self.X01_1_train

            if sample[1] == 1:
                self.dist_X1y1 = self.X10_1_train
            else:
                self.dist_X1y1 = self.X11_1_train


            py0 = self.probability(sample, self.prior_y0, self.dist_X0y0, self.dist_X1y0)

            py1 = self.probability(sample, self.prior_y1, self.dist_X0y1, self.dist_X1y1)
            
            a.append(py0)
            b.append(py1)
            
            
        jk= pd.DataFrame( [[np.argmax([i,j]) for i,j in zip(a,b)] ==target]).transpose().sum()

        print (" Out of the {} samples , {} were misclassified ".format(len(self.y_test),int(len(self.y_test)-jk)))  
        
    







In [None]:
nb = NaiveBayes()
nb.run_model()

In [None]:
nb.misclassified()


 Out of the 300 samples , 224 were misclassified 


In [None]:
nb.predict()

P(y=0| [1 0]) =  0.124302
P(y=1| [1 0]) =  0.121851
 
 Model predicted class 0       Actual class was 1 

------------------------------------------------------------------------------------------
P(y=0| [1 1]) =  0.124302
P(y=1| [1 1]) =  0.135225
 
 Model predicted class 1       Actual class was 1 

------------------------------------------------------------------------------------------
P(y=0| [1 0]) =  0.124302
P(y=1| [1 0]) =  0.121851
 
 Model predicted class 0       Actual class was 1 

------------------------------------------------------------------------------------------
P(y=0| [0 0]) =  0.129984
P(y=1| [0 0]) =  0.111080
 
 Model predicted class 0       Actual class was 0 

------------------------------------------------------------------------------------------
P(y=0| [0 0]) =  0.129984
P(y=1| [0 0]) =  0.111080
 
 Model predicted class 0       Actual class was 0 

------------------------------------------------------------------------------------------
P(y=0| [0 0]) =