In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris, load_wine, load_breast_cancer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import GaussianNB
from supervised_learning.classification.NaiveBayes import NaiveBayes

def evaluate_naive_bayes_on_dataset(X, y, dataset_name):
    # Split the dataset into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train our Naive Bayes classifier
    our_model = NaiveBayes()
    our_model.fit(X_train, y_train)
    y_pred_our = our_model.predict(X_test)

    # Train sklearn's Naive Bayes classifier
    sklearn_model = GaussianNB()
    sklearn_model.fit(X_train, y_train)
    y_pred_sklearn = sklearn_model.predict(X_test)

    # Calculate evaluation metrics
    accuracy_our = accuracy_score(y_test, y_pred_our)
    accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)

    precision_our = precision_score(y_test, y_pred_our, average='weighted')
    precision_sklearn = precision_score(y_test, y_pred_sklearn, average='weighted')

    recall_our = recall_score(y_test, y_pred_our, average='weighted')
    recall_sklearn = recall_score(y_test, y_pred_sklearn, average='weighted')

    f1_our = f1_score(y_test, y_pred_our, average='weighted')
    f1_sklearn = f1_score(y_test, y_pred_sklearn, average='weighted')

    # Print evaluation metrics
    print(f"\n{dataset_name} Dataset")
    print(f"Our Model Accuracy: {accuracy_our:.4f}")
    print(f"Sklearn Model Accuracy: {accuracy_sklearn:.4f}")

    print(f"Our Model Precision: {precision_our:.4f}")
    print(f"Sklearn Model Precision: {precision_sklearn:.4f}")

    print(f"Our Model Recall: {recall_our:.4f}")
    print(f"Sklearn Model Recall: {recall_sklearn:.4f}")

    print(f"Our Model F1 Score: {f1_our:.4f}")
    print(f"Sklearn Model F1 Score: {f1_sklearn:.4f}")

# Load datasets
datasets = {
    'Iris': load_iris(),
    'Wine': load_wine(),
    'Breast Cancer': load_breast_cancer()
}

# Evaluate Naive Bayes classifier on each dataset
for name, data in datasets.items():
    evaluate_naive_bayes_on_dataset(data.data, data.target, name)



Iris Dataset
Our Model Accuracy: 1.0000
Sklearn Model Accuracy: 1.0000
Our Model Precision: 1.0000
Sklearn Model Precision: 1.0000
Our Model Recall: 1.0000
Sklearn Model Recall: 1.0000
Our Model F1 Score: 1.0000
Sklearn Model F1 Score: 1.0000

Wine Dataset
Our Model Accuracy: 1.0000
Sklearn Model Accuracy: 1.0000
Our Model Precision: 1.0000
Sklearn Model Precision: 1.0000
Our Model Recall: 1.0000
Sklearn Model Recall: 1.0000
Our Model F1 Score: 1.0000
Sklearn Model F1 Score: 1.0000

Breast Cancer Dataset
Our Model Accuracy: 0.9649
Sklearn Model Accuracy: 0.9737
Our Model Precision: 0.9652
Sklearn Model Precision: 0.9748
Our Model Recall: 0.9649
Sklearn Model Recall: 0.9737
Our Model F1 Score: 0.9647
Sklearn Model F1 Score: 0.9735
