In [5]:
import numpy as np

In [24]:
class NaiveBayes:

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self._classes = np.unique(y)
        n_classes = len(self._classes)

        self._mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self._var = np.zeros((n_classes, n_features), dtype=np.float64)
        self._priors = np.zeros(n_classes, dtype=np.float64)
        
        for idx, c in enumerate(self._classes):
            X_c = X[c==y]
            print(X_c.shape)
            self._mean[c, :] = X_c.mean(axis=0)
            self._var[c, :] = X_c.var(axis=0)
            self._priors[idx] = X_c.shape[0] / np.float64(n_samples)

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return y_pred
    
    def _predict(self, x):
        posteriors = []

        for idx, cl in enumerate(self._classes):
            prior = np.log(self._priors[idx])
            class_conditional = np.sum(np.log(self._probability_density_function(idx, x)))
            posterior = prior + class_conditional
            posteriors.append(posterior)
        
        return self._classes[np.argmax(posteriors)]

    def _probability_density_function(self, class_idx, x):
        mean = self._mean[class_idx]
        var = self._var[class_idx]

        nominator = np.exp(-((x - mean)**2 / (2 * var)))
        denominator = np.sqrt(2 * np.pi * var)
        return nominator / denominator


$$
P(x) = \frac{e^{-\frac{(x-\mu)^2}{2 \sigma^2}}}{{\sigma \sqrt{2\pi}}}
$$

In [30]:
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

X, y = datasets.make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

nb = NaiveBayes()
nb.fit(X_train, y_train)
predictions = nb.predict(X_test)

print("Naive Bayes classification accuracy:", accuracy(y_test, predictions))


(402, 10)
(398, 10)
Naive Bayes classification accuracy: 0.82


In [21]:
X_train.shape

(800, 10)

In [28]:
import pandas as pd

df = pd.DataFrame(X)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-2.101187,-0.247208,-1.989335,-1.484556,1.128668,-0.925564,1.210290,2.070526,0.091527,-2.376400
1,0.323703,-0.126894,-1.309524,-0.022738,-0.398725,0.280419,0.034598,1.184820,-0.186802,1.969326
2,2.232038,0.334123,-0.852703,3.289016,0.331944,0.044767,-2.233844,-0.066448,-0.313881,1.499174
3,-0.428456,1.627132,0.484649,0.938373,1.339947,-0.869036,0.943132,0.713558,-0.468041,0.075341
4,0.386012,0.338141,-0.186530,2.369137,1.667284,-0.979107,0.951526,0.398790,0.729005,0.372213
...,...,...,...,...,...,...,...,...,...,...
995,-1.148812,-0.048464,0.555170,-2.167311,-0.595136,0.237045,-1.241394,-0.144619,-0.136638,-1.363868
996,1.585814,1.269868,1.193456,2.321360,0.222056,0.040255,2.260748,-0.182916,-0.959332,0.263967
997,-1.169663,0.230641,-2.710394,-3.217224,-1.509607,0.795293,-0.854581,0.021500,-1.066801,0.528868
998,-2.646182,-0.117844,0.149377,-0.377960,2.755272,-1.983281,-1.865016,0.340473,-0.276940,-0.871014
