In [1]:
import numpy as np

In [2]:
def fit(X_train , Y_train):
    result = {}
    result['total_data'] = len(Y_train)
    class_values = set(Y_train)
    for current_class in class_values:
        result[current_class] = {}
        current_class_rows = (Y_train == current_class)
        X_train_current = X_train[current_class_rows]
        Y_train_current = Y_train[current_class_rows]
        result[current_class]['total_count'] = len(Y_train_current) 
        for j in range(X_train.shape[1]):
            result[current_class][j] = {}
            all_possible_values = set(X_train[:,j])
            for current_value in all_possible_values:
                result[current_class][j][current_value] = (X_train_current[:,j] == current_value).sum()
    return result

In [3]:
# note: we have modified the probability function to logarithmetic probability 
# bcos when we multiply these small probabilities with each other the it might become very small value 
#such that it eventually becomes 0 , so to avoid this situation we have put log 

def probability(dictionary , x , current_class):
    output = np.log(dictionary[current_class]['total_count']) / np.log(dictionary['total_data'])
    num_features = x.shape[-1]
    for j in range(num_features):
        xj = x[j]
        count_current_class_with_value_xj = dictionary[current_class][j][xj] + 1
        count_current_class = dictionary[current_class]['total_count'] + len(dictionary[current_class][j].keys())
        current_xj_probability = np.log(count_current_class_with_value_xj) / np.log(count_current_class)
        output = output + current_xj_probability
    return output

In [4]:
def predictSinglePoint(dictionary , x):
    classes = dictionary.keys()
    best_p = -1000
    best_class = -1
    first_run = True
    for current_class in classes:
        if (current_class == 'total_data'):
            continue
        p_current_class = probability(dictionary , x , current_class)
        if(first_run or p_current_class > best_p):
            best_p = p_current_class
            best_class = current_class
        first_run = False
    return best_class

In [5]:
def predict(dictionary , X_test):
    Y_pred = []
    for x in X_test:
        x_class = predictSinglePoint(dictionary , x)
        Y_pred.append(x_class)
    return Y_pred

In [6]:
def makeLabelled(column):
    second_limit = column.mean()
    first_limit = second_limit * 0.5
    third_limit = second_limit * 1.5
    for i in range(0,len(column)):
        if(column[i] < first_limit):
            column[i] = 0
        elif(column[i] > first_limit and column[i] < second_limit):
            column[i] = 1
        elif(column[i] > second_limit  and column[i] < third_limit):
            column[i] = 2
        else:
            column[i] = 3
    return column

In [7]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
Y = iris.target 

In [8]:
for i in range(0 , X.shape[1]):
    X[: , i] = makeLabelled(X[: , i])

In [9]:
from sklearn.model_selection import train_test_split
X_train , X_test , Y_train , Y_test = train_test_split(X , Y , random_state = 0 , test_size = 0.25)

In [10]:
dictionary = fit(X_train , Y_train)

In [11]:
Y_pred = predict(dictionary , X_test)

In [12]:
from sklearn.metrics import classification_report , confusion_matrix

In [13]:
print(confusion_matrix(Y_test , Y_pred))

[[13  0  0]
 [ 0 16  0]
 [ 0  1  8]]


In [14]:
print(classification_report(Y_test , Y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.94      1.00      0.97        16
           2       1.00      0.89      0.94         9

    accuracy                           0.97        38
   macro avg       0.98      0.96      0.97        38
weighted avg       0.98      0.97      0.97        38



In [15]:
from sklearn import naive_bayes
alg1 = naive_bayes.GaussianNB()
alg1.fit(X_train , Y_train)
Y_pred = alg1.predict(X_test)
print(confusion_matrix(Y_test , Y_pred))
print(classification_report(Y_test , Y_pred))

[[11  2  0]
 [ 0 16  0]
 [ 0  3  6]]
              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.76      1.00      0.86        16
           2       1.00      0.67      0.80         9

    accuracy                           0.87        38
   macro avg       0.92      0.84      0.86        38
weighted avg       0.90      0.87      0.87        38



In [16]:
from sklearn import naive_bayes
alg2 = naive_bayes.BernoulliNB()
alg2.fit(X_train , Y_train)
Y_pred = alg2.predict(X_test)
print(confusion_matrix(Y_test , Y_pred))
print(classification_report(Y_test , Y_pred))

[[11  0  2]
 [ 0  0 16]
 [ 0  0  9]]
              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.00      0.00      0.00        16
           2       0.33      1.00      0.50         9

    accuracy                           0.53        38
   macro avg       0.44      0.62      0.47        38
weighted avg       0.42      0.53      0.43        38



  _warn_prf(average, modifier, msg_start, len(result))


In [17]:
from sklearn import naive_bayes
alg3 = naive_bayes.MultinomialNB()
alg3.fit(X_train , Y_train)
Y_pred = alg3.predict(X_test)
print(confusion_matrix(Y_test , Y_pred))
print(classification_report(Y_test , Y_pred))

[[11  2  0]
 [ 0  0 16]
 [ 0  0  9]]
              precision    recall  f1-score   support

           0       1.00      0.85      0.92        13
           1       0.00      0.00      0.00        16
           2       0.36      1.00      0.53         9

    accuracy                           0.53        38
   macro avg       0.45      0.62      0.48        38
weighted avg       0.43      0.53      0.44        38

