In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Example dataset (replace with actual email data)
data = {
    'Email_Content': [
        "Congratulations! You've won a free gift card.",
        "Important meeting scheduled for tomorrow.",
        "Get cheap loans now!", 
        "Lunch with the team today?",
        "Exclusive deal on luxury watches!",
        "Can you review the project proposal?",
        "Earn money quickly working from home!",
        "Your order has been shipped."
    ],
    'Label': ["spam", "not spam", "spam", "not spam", "spam", "not spam", "spam", "not spam"]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Display the dataset
print("Dataset:")
print(df)

# Define features (X) and target (y)
X = df['Email_Content']
y = df['Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text data into numerical data using CountVectorizer
vectorizer = CountVectorizer()
X_train_transformed = vectorizer.fit_transform(X_train)
X_test_transformed = vectorizer.transform(X_test)

# Initialize and train the Naive Bayes model
model = MultinomialNB()
model.fit(X_train_transformed, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test_transformed)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("\nModel Evaluation:")
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)
print("Confusion Matrix:")
print(conf_matrix)

# Example: Predict if a new email is spam or not
new_emails = ["Win a brand new car by clicking this link!", "Team meeting at 3 PM."]
new_emails_transformed = vectorizer.transform(new_emails)
new_predictions = model.predict(new_emails_transformed)

print("\nPredicted Labels for New Emails:")
for email, label in zip(new_emails, new_predictions):
    print(f"Email: {email} => Label: {label}")

Dataset:
                                   Email_Content     Label
0  Congratulations! You've won a free gift card.      spam
1      Important meeting scheduled for tomorrow.  not spam
2                           Get cheap loans now!      spam
3                     Lunch with the team today?  not spam
4              Exclusive deal on luxury watches!      spam
5           Can you review the project proposal?  not spam
6          Earn money quickly working from home!      spam
7                   Your order has been shipped.  not spam

Model Evaluation:
Accuracy: 0.0
Classification Report:
              precision    recall  f1-score   support

    not spam       0.00      0.00      0.00       2.0
        spam       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0

Confusion Matrix:
[[0 2]
 [0 0]]

Predicted Labels for New Emails:
Email: Win

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
