# Multinomial Naive Bayes

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score


In [2]:
class NaiveBayes:    
    def __init__(self):
        self.x=None
        self.y=None 
        self.bias=None
        self.weights=None
        self.values= None
        self.accuracy=None
    
    def NaiveBayes_Train(self,x,y):        
        self.x = x
        self.y = y
        r,c = x.shape       
        self.Class= list(np.unique(y)) 
        print(" ")
        print(" ")
        print(".......Total Number Of Classes present in Data....") 
        print(self.Class)        
        
             #Class Probability: P(y=k)        
        total_num =len(y)
        class_prob = {}
        log_class_prob={}
        for k in self.Class:
           # using multi laplace smoothing
            class_prob[k] =(np.sum(np.equal(y,k))/(total_num))            
            log_class_prob[k]=np.log(class_prob[k])
        print(" ")
        print("class_prob ")
        print(class_prob)
        self.class_prob=log_class_prob
        print(" ")
        print("log_class_prob ")
        print(self.class_prob)
        #Conditional Probability: P( xj | y=k )        
        conditional_prob = {}
        conditional_prob1 = {}
        attri_prob = {}
        log_attri_prob={}
        for k in self.Class:
            conditional_prob[k] = {}
            conditional_prob1[k] = {}
            a= x[np.equal(y,k)]
            total_sum =np.array(a).sum()                
            r1,c1=a.shape
            for i in a:                    
                    column_sum = np.array(a[i]).sum()
                    
                    attri_prob[i] = ((( column_sum + 1 ) / (total_sum + c)))
                    conditional_prob1[k][i] =attri_prob[i]
                    log_attri_prob[i]=np.log(attri_prob[i])
                    conditional_prob[k][i] =log_attri_prob[i]
        
        self.conditional_prob=conditional_prob
        for i in conditional_prob1:
            print(" ")
            print("conditional_prob for class--->{} ".format(i))
            print(conditional_prob1[i])
        
        for i in conditional_prob:
            print(" ")
            print("log _conditional_prob for class--->{} ".format(i))
            print(conditional_prob [i])

        
    def NaiveBayes_Test(self,x,y):        
        y_pred = [ ]
        x=np.array(x)
        for r in range(len(x)):
            y_pred.append(self.predict_sample(x[r],y,self.conditional_prob,self.class_prob))  
        
        cm1=self.confusion_mat(y,y_pred)
        self.performance(cm1)
        print(" ")
        print("confusion_matrix --------- ")
        print(cm1)
        return (self.accuracy)
   
    
    def predict_sample(self,x_test,y,conditional_prob,class_prob):
        #calculate its posterior probability  p(y=k) * p(x/y=k)      
        Y_pred = []
        posterior_prob={}
        prob=[ ]
        
        for k in self.Class:
            product=class_prob[k]
            for m, i in zip(range(len(conditional_prob[k].keys())), range(len(x_test))):
                product+=((x_test[i])*(conditional_prob[k][m]))
                posterior_prob[k]=product                
            prob.append(posterior_prob[k])
        
        #whereever max probability return that index for class prediction        
        return(self.Class[np.argmax(prob)])   
    
    
    def confusion_mat(self,y_test,y_pred):        
        cm = confusion_matrix(y_test,y_pred)
        return cm
        
    def performance(self,cm):
        r,c=cm.shape
        if(r==2):
            tp = cm[0][0]
            tn = cm[1][1]
            fp = cm[0][1]
            fn = cm[1][0]            
            
        elif(r==3):
            tp = cm[0][0]
            tn = cm[1][1]+cm[1][2]+cm[2][1]+cm[2][2]
            fp = cm[0][1]+cm[0][2]
            fn = cm[1][0]+cm[2][0]
        else:
            print("Error")
            
        self.accuracy=(tp+tn)/(tp+tn+fp+fn)
        sn = tp/(tp+fn)
        sp = tn/(tn+fp)
        pr = tp/(tp+fp)
        fm = 2*(pr*sn)/(pr+sn)
        print(" ")
        print("Performance of model")
        print(f"Accuracy is {self.accuracy}")
        print(f"Sensitivity is {sn}")
        print(f"Specificity is {sp}")
        print(f"Precission is {pr}")
        print(f"F_measur is {fm}")
        
     
    

In [3]:
def holdout_cross_validation(data):
    r,c=data.shape 
    x = data.iloc[:,range(c-1)]
    y = data.iloc[:,c-1]
    a1=[ ]
    for i in range(2):
        X_train,X_test,Y_train,Y_test = train_test_split(x,y,test_size=0.3) #creating train & test  using random  permutation        
       
        model = NaiveBayes()
        model.NaiveBayes_Train(X_train,Y_train)
        accuracy = model.NaiveBayes_Test(X_test,Y_test)                                 #passing training data & test data to LDA func
        a1.append(accuracy)
    
    Avg_Accuracy=np.mean(a1)
    print(" ")
    print(" ")
    print("Avg_Accuracy")
    print(Avg_Accuracy)
    
    

In [4]:
d8 = pd.read_csv("data8.csv",header  =None)
d8

Unnamed: 0,0,1,2,3
0,12,10,28,0
1,13,10,27,0
2,14,8,28,0
3,14,12,24,0
4,14,13,23,0
...,...,...,...,...
203,20,19,11,1
204,20,21,9,1
205,21,19,10,1
206,21,20,9,1


In [5]:
holdout_cross_validation(d8)

 
 
.......Total Number Of Classes present in Data....
[0, 1]
 
class_prob 
{0: 0.4, 1: 0.6}
 
log_class_prob 
{0: -0.916290731874155, 1: -0.5108256237659907}
 
conditional_prob for class--->0 
{0: 0.3978642783327592, 1: 0.25111953151911814, 2: 0.35101619014812263}
 
conditional_prob for class--->1 
{0: 0.2724557776246267, 1: 0.39168389616356536, 2: 0.33586032621180795}
 
log _conditional_prob for class--->0 
{0: -0.9216443410687359, 1: -1.3818262320159718, 2: -1.046922930801905}
 
log _conditional_prob for class--->1 
{0: -1.3002789619946267, 1: -0.9373001518346804, 2: -1.0910599012425737}
 
Performance of model
Accuracy is 0.9841269841269841
Sensitivity is 1.0
Specificity is 0.9705882352941176
Precission is 0.9666666666666667
F_measur is 0.983050847457627
 
confusion_matrix --------- 
[[29  1]
 [ 0 33]]
 
 
.......Total Number Of Classes present in Data....
[0, 1]
 
class_prob 
{0: 0.4206896551724138, 1: 0.5793103448275863}
 
log_class_prob 
{0: -0.8658598782472632, 1: -0.54591694357