In [1]:
import numpy as np
import math

class NaiveBayes:
    def __init__(self):
        self.classes = None
        self.priors = None
        self.feature_likelihoods = None

    def _calculate_likelihood(self, mean, var, x):
        # Handles potential zero variances (smoothing)
        eps = 1e-4 
        coeff = 1.0 / math.sqrt(2.0 * math.pi * var + eps)
        exponent = math.exp(-(math.pow(x - mean, 2) / (2 * var + eps)))
        return coeff * exponent

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # Initialize data structures
        self.classes = np.unique(y)
        n_classes = len(self.classes)
        self.priors = {} 
        self.feature_likelihoods = {}

        # Calculate class priors (P(Y))
        for c in self.classes:
            self.priors[c] = (y == c).sum() / n_samples

        # Calculate feature likelihoods (P(X|Y))
        for c in self.classes:
            self.feature_likelihoods[c] = {}
            for i in range(n_features):
                features = X[y == c][:, i]
                mean = features.mean()
                var = features.var()
                self.feature_likelihoods[c][i] = (mean, var)

    def predict(self, X):
        y_pred = []
        for x in X:
            posteriors = []
            # Calculate posterior probabilities for each class
            for c in self.classes:
                prior = self.priors[c]
                likelihood = 1.0
                for i, feature in enumerate(x):
                    likelihood *= self._calculate_likelihood(
                        self.feature_likelihoods[c][i][0], 
                        self.feature_likelihoods[c][i][1], 
                        feature
                    )
                posterior = prior * likelihood
                posteriors.append(posterior)
            # Select class with highest posterior probability
            y_pred.append(self.classes[np.argmax(posteriors)])
        return y_pred


In [2]:
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target


In [3]:
from sklearn.preprocessing import LabelEncoder

# Example if you have a categorical feature:
categorical_feature_index = 2  # Index of the categorical column
encoder = LabelEncoder()
X[:, categorical_feature_index] = encoder.fit_transform(X[:, categorical_feature_index])


In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
model = NaiveBayes()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)


Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-Score: 1.0


In [6]:
import numpy as np
from collections import defaultdict
from typing import List, Tuple, Dict, Any

# Data Preprocessing
def load_dataset() -> Tuple[np.ndarray, np.ndarray]:
    from sklearn.datasets import load_iris
    data = load_iris()
    X, y = data['data'], data['target']
    return X, y

def train_test_split(X: np.ndarray, y: np.ndarray, test_size: float = 0.2) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    np.random.seed(42)  # For reproducibility
    indices = np.random.permutation(len(X))
    test_set_size = int(len(X) * test_size)
    test_indices = indices[:test_set_size]
    train_indices = indices[test_set_size:]
    return X[train_indices], X[test_indices], y[train_indices], y[test_indices]

# Naive Bayes Implementation
class NaiveBayesClassifier:
    def __init__(self):
        self.class_prior = {}
        self.mean_variance_by_class = {}

    def fit(self, X: np.ndarray, y: np.ndarray):
        self.classes = np.unique(y)
        for cls in self.classes:
            X_cls = X[y == cls]
            self.class_prior[cls] = len(X_cls) / len(X)
            self.mean_variance_by_class[cls] = [(np.mean(feature), np.var(feature)) for feature in zip(*X_cls)]

    def _calculate_likelihood(self, mean: float, var: float, x: float) -> float:
        epsilon = 1e-4  # To prevent division by zero
        coeff = 1.0 / np.sqrt(2.0 * np.pi * var + epsilon)
        exponent = np.exp(-(np.square(x - mean) / (2 * var + epsilon)))
        return coeff * exponent

    def _calculate_posterior(self, X: np.ndarray) -> Dict:
        posteriors = {}
        for cls in self.classes:
            prior = np.log(self.class_prior[cls])
            conditional = np.sum([np.log(self._calculate_likelihood(mean, var, x)) for (mean, var), x in zip(self.mean_variance_by_class[cls], X)])
            posteriors[cls] = prior + conditional
        return posteriors

    def predict(self, X: np.ndarray) -> np.ndarray:
        predictions = [max(self._calculate_posterior(x).items(), key=lambda x: x[1])[0] for x in X]
        return np.array(predictions)

# Evaluation Metrics
def accuracy_score(y_true: np.ndarray, y_pred: np.ndarray) -> float:
    return np.sum(y_true == y_pred) / len(y_true)

def precision_recall_fscore(y_true: np.ndarray, y_pred: np.ndarray, average: str = 'macro') -> Tuple[float, float, float]:
    unique_classes = np.unique(np.concatenate([y_true, y_pred]))
    precision, recall, fscore = 0, 0, 0

    for cls in unique_classes:
        tp = np.sum((y_pred == cls) & (y_true == cls))
        fp = np.sum((y_pred == cls) & (y_true != cls))
        fn = np.sum((y_pred != cls) & (y_true == cls))

        precision_cls = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall_cls = tp / (tp + fn) if (tp + fn) > 0 else 0
        fscore_cls = 2 * precision_cls * recall_cls / (precision_cls + recall_cls) if (precision_cls + recall_cls) > 0 else 0

        precision += precision_cls
        recall += recall_cls
        fscore += fscore_cls

    if average == 'macro':
        precision /= len(unique_classes)
        recall /= len(unique_classes)
        fscore /= len(unique_classes)

    return precision, recall, fscore

# Main function to run the classifier
def main():
    X, y = load_dataset()
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    nb_classifier = NaiveBayesClassifier()
    nb_classifier.fit(X_train, y_train)
    y_pred = nb_classifier.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    precision, recall, fscore = precision_recall_fscore(y_test, y_pred)

    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {fscore:.4f}")

if __name__ == "__main__":
    main()


Accuracy: 1.0000
Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000
