In [None]:
# Apply the Naïve Bayes algorithm to a real-world classification problem such as email spam detection, sentiment analysis, or disease diagnosis.
# Train and test the model, then evaluate its performance using a Confusion Matrix and related metrics such as accuracy, precision, recall, and F1-score.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Load dataset
df = pd.read_csv('emails.csv')

# Prepare features and labels
X = df.drop(["Prediction", "Email No."], axis=1, errors="ignore").fillna(0)

y = pd.to_numeric(df["Prediction"], errors="coerce").fillna(0).astype(int)

In [None]:
df.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

# Train Naive Bayes model
model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(f"Accuracy : {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Recall   : {recall_score(y_test, y_pred):.4f}")
print(f"F1-score : {f1_score(y_test, y_pred):.4f}")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# 'cm' = confusion matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')

plt.title("Confusion Matrix - Naïve Bayes (Email Spam)")
plt.xlabel("Predicted label")
plt.ylabel("True label")
plt.show()
