In [1]:
import math
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [2]:
X_train, X_test, y_train, y_test = train_test_split(load_iris()['data'], load_iris()['target'], random_state=1234)

In [3]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(112, 4) (112,)
(38, 4) (38,)


### Naive Bayes Trials

### Functions

In [10]:
class GaussianNaiveBayes:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self._classes, self._class_counts = np.unique(self.y, return_counts=True)
        self._num_samples, self._num_features = self.X.shape
        self._mean = np.zeros(shape=(len(self._classes), self._num_features))
        self._var =  np.zeros(shape=(len(self._classes), self._num_features))
        self._prior = np.zeros(shape=(len(self._classes)))
        
        for _class in self._classes:
            X_class = self.X[self.y == _class]
            self._mean[_class, :] = np.mean(X_class, axis=0)
            self._var[_class, :] = np.var(X_class, axis=0)
            self._prior[_class] = self._class_counts[_class] / float(np.sum(self._class_counts))
        
    def predict(self, X):
        predictions = []
        for x in X:
            predictions.append(self._predict(x))
        return np.array(predictions)
        
        
    def _predict(self, x):
        prosteriors = []
        for idx in self._classes:
            likelihood = np.sum(np.log(self.GaussianDensityFucntion(x, idx)))
            prior = self._prior[idx]
            prosterior = likelihood + prior
            prosteriors.append(prosterior)
        return self._classes[np.argmax(prosteriors)]
    
    def GaussianDensityFucntion(self,x,idx):
        constant = np.sqrt(2 * math.pi * self._var[idx])
        exp_numerator = -1 * ((x - self._mean[idx])**2)
        exp_denominator = 2 * self._var[idx]
        fraction = np.divide(exp_numerator, exp_denominator)
        exp = np.exp(fraction)
        return np.divide(exp, constant)

In [11]:
gnb = GaussianNaiveBayes(X_train, y_train)

In [12]:
np.sum(gnb.predict(X_test) == y_test)/len(y_test)

0.9473684210526315

In [13]:
from sklearn.naive_bayes import GaussianNB

In [14]:
gbb = GaussianNB().fit(X_train, y_train)

In [15]:
np.sum(gbb.predict(X_test) == y_test)/len(y_test)

0.9473684210526315

In [16]:
from sklearn.datasets import make_classification

In [17]:
X, y = make_classification(n_samples=600, n_classes=2)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=True, random_state=1234)

In [19]:
gb = GaussianNaiveBayes(X_train, y_train)

In [20]:
np.sum(gb.predict(X_test) == y_test)/len(y_test)

0.7933333333333333

In [21]:
gbb = GaussianNB().fit(X_train, y_train)
np.sum(gbb.predict(X_test) == y_test)/len(y_test)

0.7933333333333333