In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.naive_bayes import BernoulliNB, MultinomialNB, GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import fetch_openml

# Q1. Probability that an employee is a smoker given that he/she uses the health insurance plan
print("Q1. Probability that an employee is a smoker given that he/she uses the health insurance plan")
P_health_insurance = 0.70
P_smoker_given_health_insurance = 0.40
P_smoker = P_smoker_given_health_insurance * P_health_insurance
P_smoker_given_health_insurance_plan = P_smoker / P_health_insurance
print(f"The probability that an employee is a smoker given that he/she uses the health insurance plan is: {P_smoker_given_health_insurance_plan:.2f}")

# Q2. Difference between Bernoulli Naive Bayes and Multinomial Naive Bayes
print("\nQ2. Difference between Bernoulli Naive Bayes and Multinomial Naive Bayes")
print("Bernoulli Naive Bayes:")
print("- Assumes binary/boolean features (0 or 1).")
print("- Models the presence or absence of features.")
print("Multinomial Naive Bayes:")
print("- Assumes features represent counts or frequencies.")
print("- Models the distribution of counts or frequencies.")

# Q3. How does Bernoulli Naive Bayes handle missing values?
print("\nQ3. How does Bernoulli Naive Bayes handle missing values?")
print("Bernoulli Naive Bayes does not handle missing values directly. Missing values should be imputed or handled before applying the model.")

# Q4. Can Gaussian Naive Bayes be used for multi-class classification?
print("\nQ4. Can Gaussian Naive Bayes be used for multi-class classification?")
print("Yes, Gaussian Naive Bayes can be used for multi-class classification. It works well with continuous features that are assumed to follow a normal distribution.")

# Q5. Assignment
print("\nQ5. Assignment")

# Data preparation
print("Loading the Spambase dataset...")
dataset = fetch_openml(name='spambase', version=1)
X = pd.DataFrame(dataset.data, columns=dataset.feature_names)
y = dataset.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize classifiers
bernoulli_nb = BernoulliNB()
multinomial_nb = MultinomialNB()
gaussian_nb = GaussianNB()

# Train and evaluate Bernoulli Naive Bayes
print("\nTraining and evaluating Bernoulli Naive Bayes...")
bernoulli_nb.fit(X_train, y_train)
y_pred_bernoulli = bernoulli_nb.predict(X_test)
accuracy_bernoulli = accuracy_score(y_test, y_pred_bernoulli)
precision_bernoulli = precision_score(y_test, y_pred_bernoulli, pos_label='spam')
recall_bernoulli = recall_score(y_test, y_pred_bernoulli, pos_label='spam')
f1_bernoulli = f1_score(y_test, y_pred_bernoulli, pos_label='spam')

# Train and evaluate Multinomial Naive Bayes
print("\nTraining and evaluating Multinomial Naive Bayes...")
multinomial_nb.fit(X_train, y_train)
y_pred_multinomial = multinomial_nb.predict(X_test)
accuracy_multinomial = accuracy_score(y_test, y_pred_multinomial)
precision_multinomial = precision_score(y_test, y_pred_multinomial, pos_label='spam')
recall_multinomial = recall_score(y_test, y_pred_multinomial, pos_label='spam')
f1_multinomial = f1_score(y_test, y_pred_multinomial, pos_label='spam')

# Train and evaluate Gaussian Naive Bayes
print("\nTraining and evaluating Gaussian Naive Bayes...")
gaussian_nb.fit(X_train, y_train)
y_pred_gaussian = gaussian_nb.predict(X_test)
accuracy_gaussian = accuracy_score(y_test, y_pred_gaussian)
precision_gaussian = precision_score(y_test, y_pred_gaussian, pos_label='spam')
recall_gaussian = recall_score(y_test, y_pred_gaussian, pos_label='spam')
f1_gaussian = f1_score(y_test, y_pred_gaussian, pos_label='spam')

# Print results
print("\nPerformance Metrics:")
print(f"Bernoulli Naive Bayes - Accuracy: {accuracy_bernoulli:.2f}, Precision: {precision_bernoulli:.2f}, Recall: {recall_bernoulli:.2f}, F1 Score: {f1_bernoulli:.2f}")
print(f"Multinomial Naive Bayes - Accuracy: {accuracy_multinomial:.2f}, Precision: {precision_multinomial:.2f}, Recall: {recall_multinomial:.2f}, F1 Score: {f1_multinomial:.2f}")
print(f"Gaussian Naive Bayes - Accuracy: {accuracy_gaussian:.2f}, Precision: {precision_gaussian:.2f}, Recall: {recall_gaussian:.2f}, F1 Score: {f1_gaussian:.2f}")

# Discussion
print("\nDiscussion:")
print("Compare the performance metrics to determine which Naive Bayes variant performed best.")
print("Discuss any limitations of Naive Bayes, such as its assumption of feature independence and sensitivity to feature scaling.")

# Conclusion
print("\nConclusion:")
print("Summarize your findings and provide suggestions for future work based on the performance of the classifiers.")
