#  Gaussian Naive Bayes classification

In [9]:
#importing the libraries
import numpy as np
import scipy as sp
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [10]:
#loading the dataset
X,y = load_iris(return_X_y=True)

n,d = X.shape
print()
print ("We have {} samples".format(n))
print ("With {} features each".format(d))

#dividing into test and train dataset
X_train = X[range(0,150,2),:]
y_train = y[range(0,150,2)]

X_test = X[range(1,150,2),:]
y_test = y[range(1,150,2)]

n_train = len(X_train)
n_test = len(X_test)
print()
print ("{} samples in the training set".format(n_train))
print ("{} samples in the test set".format(n_test))

iris_dataset = load_iris()
Cn = len(iris_dataset.target_names)
print("{} class labels".format(Cn))

#mean matrix
mu_mat = np.zeros((Cn,d), dtype=float)

#stddev matrix
sig_mat = np.zeros((Cn,d), dtype=float)


for i in range(0,Cn):
    mu_mat[i] = X_train[y_train==i].mean(axis=0)
    sig_mat[i] = X_train[y_train==i].std(axis=0)

print()
print(mu_mat)
print()
print(sig_mat)


We have 150 samples
With 4 features each

75 samples in the training set
75 samples in the test set
3 class labels

[[5.024 3.48  1.456 0.228]
 [5.992 2.776 4.308 1.352]
 [6.504 2.936 5.564 2.076]]

[[0.38291513 0.31874755 0.20214846 0.07756288]
 [0.5447348  0.32897416 0.4698255  0.18998947]
 [0.59091793 0.28125433 0.53432574 0.27317394]]


In [13]:
epsilon = 0.01

# class_likelihood (Prior)
class_likelihood = np.zeros(Cn, dtype=float)
for i in range(0,Cn):
    class_likelihood[i] = (y_train==i).sum()/n_train

print("Likelihood:",class_likelihood)
print()

# Class_given_data (Posterior)
class_given_data_mat = np.zeros((n_test,Cn), dtype=float)
for i in range(0,n_test):
    for k in range(0,Cn):
        class_given_data = np.log(class_likelihood[k])
        for j in range(0,d):
            class_given_data = class_given_data + np.log(sp.stats.norm.pdf(X_test[i,j],mu_mat[k,j],sig_mat[k,j])*2*epsilon)
        class_given_data_mat[i,k] = class_given_data 

print ("Probability that the Model predicts the correct class label:")
print ((class_given_data_mat.argmax(axis=1)==y_test).sum()/n_test)

Likelihood: [0.33333333 0.33333333 0.33333333]

Probability that the Model predicts the correct class label:
0.96


# Multinomial Naive Bayes classification

In [17]:
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics

MNBclf = MultinomialNB()
MNBclf.fit(X, y)
prediction = MNBclf.predict(X_test)
print("Prediction :- \n",prediction)

print("\nAccuracy score :-\n",metrics.accuracy_score(y_test, prediction, normalize= True))

print("\nClassification report :- \n",metrics.classification_report(y_test, prediction))

print("\nConfusion Matrix :-\n",metrics.confusion_matrix(y_test,prediction))

Prediction :- 
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 2 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 2
 2]

Accuracy score :-
 0.9466666666666667

Classification report :- 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        25
           1       0.89      0.96      0.92        25
           2       0.96      0.88      0.92        25

    accuracy                           0.95        75
   macro avg       0.95      0.95      0.95        75
weighted avg       0.95      0.95      0.95        75


Confusion Matrix :-
 [[25  0  0]
 [ 0 24  1]
 [ 0  3 22]]


# Bernoulli Naive Bayes classification

In [18]:
from sklearn.naive_bayes import BernoulliNB

Bnclf = BernoulliNB()
Bnclf.fit(X, y)
prediction = Bnclf.predict(X_test)
print("Prediction :- \n",prediction)

print("\nAccuracy score :-\n",metrics.accuracy_score(y_test, prediction, normalize= True))

print("\nClassification report :- \n",metrics.classification_report(y_test, prediction))

print("\nConfusion Matrix :-\n",metrics.confusion_matrix(y_test,prediction))

Prediction :- 
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0]

Accuracy score :-
 0.3333333333333333

Classification report :- 
               precision    recall  f1-score   support

           0       0.33      1.00      0.50        25
           1       0.00      0.00      0.00        25
           2       0.00      0.00      0.00        25

    accuracy                           0.33        75
   macro avg       0.11      0.33      0.17        75
weighted avg       0.11      0.33      0.17        75


Confusion Matrix :-
 [[25  0  0]
 [25  0  0]
 [25  0  0]]


  'precision', 'predicted', average, warn_for)
