<a href="https://www.kaggle.com/code/mh0386/naive-bayes-with-normal-distribution?scriptVersionId=221958330" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
    ConfusionMatrixDisplay,
    f1_score,
)
from sklearn.datasets import make_classification
import matplotlib.pyplot as plt
import numpy as np

In [None]:
x, y = make_classification(
    n_features=4, n_classes=2, n_samples=2000, n_informative=4, n_redundant=0
)

In [None]:
plt.scatter(x[:, 0], x[:, 1], c=y, marker="*")

In [None]:
x.shape, y.shape

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [None]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

# Naive Bayes from Built-in


In [None]:
classifier = GaussianNB()

In [None]:
classifier.fit(x_train, y_train)

In [None]:
y_pred = classifier.predict(x_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
f1_score(y_pred, y_test, average="weighted")

In [None]:
ConfusionMatrixDisplay(
    confusion_matrix=confusion_matrix(y_test, y_pred, labels=[0, 1]),
    display_labels=[0, 1],
).plot()

In [None]:
print(classification_report(y_test, y_pred, target_names=["class 0", "class 1"]))

# Naive Bayes from Scratch


In [None]:
class naive_bayes:
    def __init__(self):
        pass

    def fit(self, x, y):
        self.x, self.y = x, y
        self.classes = list(set(y))
        self.parameters = []
        for i, c in enumerate(self.classes):
            x_where_c = x[np.where(y == c)]
            self.parameters.append([])
            for col in x_where_c.T:
                self.parameters[i].append({"mean": col.mean(), "std": col.std()})

    def predict(self, x):
        self.prediction = []
        for row in x:
            probs = []
            for i, c in enumerate(self.classes):
                prob = len(self.y[self.y == c]) / len(self.y)
                for feature, param in zip(row, self.parameters[i]):
                    prob *= self.gaussian_prob(feature, param["mean"], param["std"])
                probs.append(prob)
            self.prediction.append(self.classes[np.argmax(probs)])
        return self.prediction

    def gaussian_prob(self, x, mean, std):
        return (1 / (std * np.sqrt(2 * np.pi))) * np.exp(
            -((x - mean) ** 2) / (2 * std**2)
        )

In [None]:
model = naive_bayes()

In [None]:
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
f1_score(y_pred, y_test, average="weighted")

In [None]:
ConfusionMatrixDisplay(
    confusion_matrix=confusion_matrix(y_test, y_pred, labels=[0, 1]),
    display_labels=[0, 1],
).plot()

In [None]:
print(classification_report(y_test, y_pred))