In [11]:
import numpy as np

In [12]:
def fit(x_train, y_train) :
    
    result = {}
    class_values = set(y_train)
    
    for current_class in class_values :
        result[current_class] = {}
        result['total_data'] = len(y_train)
        
        current_class_rows = (y_train == current_class)
        x_train_current = x_train[current_class_rows]
        y_train_current = y_train[current_class_rows]
        
        num_features = x_train.shape[1]
        result[current_class]['total_count'] = len(y_train_current)
        
        for j in range(1, num_features + 1) :
            result[current_class][j] = {}
            all_possible_values = set(x_train[:, j - 1])
            
            for current_value in all_possible_values :
                result[current_class][j][current_value] = (x_train_current[:, j - 1] == current_value).sum()

    return result
        

In [13]:
def probability(dictionary, x, current_class):
    
    output = np.log(dictionary[current_class]["total_count"]) - np.log(dictionary["total_data"])
    num_features = len(dictionary[current_class].keys()) - 1;
    
    for j in range(1, num_features + 1):
        
        xj = x[j - 1]
        
        count_current_class_with_value_xj = dictionary[current_class][j][xj] + 1
        count_current_class = dictionary[current_class]["total_count"] + len(dictionary[current_class][j].keys())
        
        current_xj_probablity = np.log(count_current_class_with_value_xj) - np.log(count_current_class)
        
        output = output + current_xj_probablity
    
    return output

In [14]:
def predict_single_point(dictionary, x) :
    classes = dictionary.keys()
    best_p = -1000
    best_class = -1
    first_run = True
    for current_class in classes :
        if (current_class == "total_data"):
            continue
        
        p_current_class = probability(dictionary, x, current_class)

        if (first_run or p_current_class > best_p) :
            best_p = p_current_class
            best_class = current_class
        first_run = False
    
    return best_class


In [15]:
def predict(dictionary, x_test) :
    y_pred = []

    for x in x_test :
        x_class = predict_single_point(dictionary, x)
        y_pred.append(x_class)
    return y_pred

In [16]:
def makeLabelled(column):
    
    mean = column.mean()
    minimum = column.min()
    maximum = column.max()
    
    first_limit = (minimum + mean) / 2
    second_limit = mean
    third_limit = (maximum + mean) / 2
    
    for i in range (0,len(column)):
        if (column[i] < first_limit):
            column[i] = 0
        elif (column[i] < second_limit):
            column[i] = 1
        elif(column[i] < third_limit):
            column[i] = 2
        else:
            column[i] = 3
    return column

In [17]:
from sklearn import datasets

iris = datasets.load_iris()
x = iris.data
y = iris.target

In [18]:
for i in range(0, x.shape[-1]):
    x[:, i] = makeLabelled(x[:, i])

In [19]:
from sklearn import model_selection
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.25, random_state=0)

In [20]:
dictionary = fit(x_train, y_train)

In [21]:
y_pred = predict(dictionary, x_test)

In [22]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.89      1.00      0.94        16
           2       1.00      0.78      0.88         9

    accuracy                           0.95        38
   macro avg       0.96      0.93      0.94        38
weighted avg       0.95      0.95      0.95        38

[[13  0  0]
 [ 0 16  0]
 [ 0  2  7]]


In [28]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

iris = datasets.load_iris()
x = iris.data
y = iris.target

x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.25, random_state=0)

clf = GaussianNB()
clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

print("Classification Report")
print(classification_report(y_test,y_pred))
print("Confusion Matrix")
print(confusion_matrix(y_test,y_pred))
print()
print("Accuracy Score")
print(accuracy_score(y_test,y_pred) * 100, "%", sep="")


Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        16
           2       1.00      1.00      1.00         9

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38

Confusion Matrix
[[13  0  0]
 [ 0 16  0]
 [ 0  0  9]]

Accuracy Score
100.0%
