In [3]:
import numpy as np

class NaiveBayesClassifier:
    def fit(self, X, y):
        # Calculate class priors
        self.class_priors = {c: np.mean(y == c) for c in np.unique(y)}

        # Calculate mean and variance vectors for each class
        self.mean_vectors = {}
        self.var_vectors = {}
        for c in np.unique(y):
            X_c = X[y == c]
            self.mean_vectors[c] = np.mean(X_c, axis=0)
            self.var_vectors[c] = np.var(X_c, axis=0, ddof=1)

        # Convert the dictionaries to arrays
        self.classes = np.array(list(self.class_priors.keys()))
        n_classes = len(self.classes)
        self.mean = np.array([self.mean_vectors[c] for c in self.classes])
        self.var = np.array([self.var_vectors[c] for c in self.classes])
        self.priors = np.array([self.class_priors[c] for c in self.classes])

    def predict(self, X):
        predictions = []
        for x in X:
            class_probs = []
            for idx, c in enumerate(self.classes):
                mean_vector = self.mean[idx, :]
                var_vector = self.var[idx, :]

                # Standardize the test instance using the mean and variance vectors
                x_standardized = (x - mean_vector) / np.sqrt(var_vector)

                log_likelihood = np.sum(
                    -0.5 * np.log(2 * np.pi * var_vector)
                    - 0.5 * ((x_standardized - mean_vector) ** 2) / var_vector
                )
#                 Na tsekarw an einai o swstos ypologismos toy log_likelihood
                class_probs.append(np.log(self.priors[idx]) + log_likelihood)

            predictions.append(self.classes[np.argmax(class_probs)])

        return np.array(predictions)


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB

# Load the Iris dataset
wine = load_iris()
X, y = wine.data, wine.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features (optional but can be beneficial for Naive Bayes)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create and fit the Naive Bayes classifier
nb_classifier = NaiveBayesClassifier()
nb_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = nb_classifier.predict(X_test)

# Calculate and print accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print classification report
report = classification_report(y_test, y_pred)
print("Classification Report:\n", report)


Accuracy: 0.93
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.80      0.89        10
           1       0.82      1.00      0.90         9
           2       1.00      1.00      1.00        11

    accuracy                           0.93        30
   macro avg       0.94      0.93      0.93        30
weighted avg       0.95      0.93      0.93        30



In [11]:
# # Create and fit the scikit-learn Gaussian Naive Bayes classifier
# nb_classifier_sklearn = GaussianNB()
# nb_classifier_sklearn.fit(X_train, y_train)

# # Make predictions using the scikit-learn Naive Bayes classifier on the test set
# y_pred_sklearn = nb_classifier_sklearn.predict(X_test)

# # Calculate and print accuracy for the scikit-learn classifier
# accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
# print(f"scikit-learn Naive Bayes Accuracy: {accuracy_sklearn:.2f}")

# # Print classification report for the scikit-learn classifier
# report_sklearn = classification_report(y_test, y_pred_sklearn)
# print("scikit-learn Naive Bayes Classification Report:\n", report_sklearn)

scikit-learn Naive Bayes Accuracy: 1.00
scikit-learn Naive Bayes Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

