In [11]:
# Gaussian Naive Bayes
import numpy as np

class GaussianNB:
    def fit(self, X, y):
        self.classes = np.unique(y) # List of unique labels
        n_features = X.shape[1] 
        n_classes = len(self.classes)

        self.means = np.zeros((n_classes, n_features)) # Stores feature mean class-wise
        self.vars = np.zeros((n_classes, n_features)) # Stores feature variance class-wise
        self.class_log_prior = np.zeros(n_classes) # Stores class prior(probability)

        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.means[idx, :] = np.mean(X_c, axis=0)
            self.vars[idx, :] = np.var(X_c, axis=0)  #adding epilson to prevent division from 0.
            self.class_log_prior[idx] = np.log(X_c.shape[0] / X.shape[0])

    def _log_likelihood(self, X):
        n_samples = X.shape[0]
        n_classes = len(self.classes)
        log_probs =np.zeros((n_samples, n_classes))

        for idx, c in enumerate(self.classes):
            mean = self.means[idx] #Features mean class-wise
            var = self.vars[idx] #Features variance class-wise
            log_prob = -0.5 * np.sum(np.log(2 * np.pi * var)) # log(1/root(2.var.pi)) = log((2.var.pi)**-1/2)  
            log_prob -= 0.5 * np.sum(((X - mean) ** 2 ) / var , axis=1)
            log_probs[: , idx] = self.class_log_prior[idx] + log_prob
        
        return log_probs
    
    def predict(self, X):
        log_probs = self._log_likelihood(X)
        return self.classes[np.argmax(log_probs, axis=1)]    

 




In [13]:
from sklearn.datasets import make_classification
import sklearn.naive_bayes

# Create synthetic data
X, y = make_classification(n_samples=200, n_features=2,
                           n_informative=2, n_redundant=0,
                           n_classes=2, random_state=42)

# Train
gnb = GaussianNB()
sk_gnb = sklearn.naive_bayes.GaussianNB()

gnb.fit(X, y)
sk_gnb.fit(X, y)

# Predict
y_pred = gnb.predict(X)
y_pred_sk = sk_gnb.predict(X)

# Check accuracy
accuracy = np.mean(y_pred == y)
print(f"Accuracy: {accuracy:.2f}")

accuracy_sk = np.mean(y_pred_sk == y)
print(f"Accuracy: {accuracy_sk:.2f}")


Accuracy: 0.86
Accuracy: 0.86
