In [133]:
import random
from sklearn import datasets
import numpy as np
import pandas as pd

IRIS = datasets.load_iris()
x = IRIS.data
y = IRIS.target

train_index = random.sample(range(0, 150), 120)
X_train = x[train_index]
Y_train = y[train_index]

test_index = []
for i in range(0, 150):
    if i not in train_index:
        test_index.append(i)
X_test = x[test_index]
Y_test = y[test_index]

X_train_rint = np.rint(X_train)
X_test_rint = np.rint(X_test)

class naive_bayes:
    def fit(self, X, Y):
        n_samples, n_features = X.shape
        self._classes = np.unique(Y)
        n_classes = len(self._classes)
        
        self._mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self._var = np.zeros((n_classes, n_features), dtype=np.float64)
        self._priors = np.zeros(n_classes, dtype=np.float64)
        
        for idx, c in enumerate(self._classes):
            X_c = X[c == Y]
            self._mean[idx, :] = X_c.mean(axis=0)
            self._var[idx, :] = X_c.var(axis=0)
            self._priors[idx] = X_c.shape[0] / float(n_samples)
            
    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return y_pred
    
    def _predict(self, x):
        posteriors = []
        
        for idx, c in enumerate(self._classes):
            prior = np.log(self._priors[idx])
            class_conditional = np.sum(np.log(self._pdf(idx, x)))
            posterior = prior + class_conditional
            posteriors.append(posterior)
            
        return self._classes[np.argmax(posteriors)]
    
    def _pdf(self, class_idx, x):
        mean = self._mean[class_idx]
        var = self._var[class_idx]
        numerator = np.exp(-(x-mean)**2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator

model = naive_bayes()

#With discretization
model.fit(X_train_rint, Y_train)
preds = model.predict(X_test_rint)
print(preds)

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

print("Naive Bayes classifier accuracy with discretization", accuracy(Y_test, preds))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
Naive Bayes classifier accuracy with discretization 0.9666666666666667


In [134]:
#Without discretization
model.fit(X_train, Y_train)
preds = model.predict(X_test)
print(preds)

print("Naive Bayes classifier accuracy without discretization", accuracy(Y_test, preds))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2]
Naive Bayes classifier accuracy without discretization 0.9666666666666667


In [135]:
#Using OCR Dataset
train_data = pd.read_csv("pp_tra.dat", sep=" ", header=None)
test_data = pd.read_csv("pp_tes.dat", sep=" ", header=None)

X_train = train_data.iloc[:,:-1].to_numpy()
Y_train = train_data.iloc[:,-1].to_numpy()

X_test = test_data.iloc[:,:-1].to_numpy()
Y_test = test_data.iloc[:,-1].to_numpy()

model.fit(X_train, Y_train)
preds = model.predict(X_test)

print("Naive Bayes classifier accuracy on OCR dataset", accuracy(Y_test, preds))

  numerator = np.exp(-(x-mean)**2 / (2 * var))
  numerator = np.exp(-(x-mean)**2 / (2 * var))
  return numerator / denominator
  class_conditional = np.sum(np.log(self._pdf(idx, x)))


Naive Bayes classifier accuracy on OCR dataset 0.1002100210021002
