In [7]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Sample dataset: emails with "spam" and "not spam" labels
emails = [
    "Free money offer now",               # Spam
    "Limited time offer, get it now",     # Spam
    "Hi, how are you?",                   # Not Spam
    "See you at the party this weekend",  # Not Spam
    "Get free access to the premium course", # Spam
    "Hey, just checking in on you",       # Not Spam
    "Earn free cash now, click here",      # Spam
    "Looking forward to seeing you soon", # Not Spam
    "Don't miss this free chance",        # Spam
    "Let's meet up for a coffee tomorrow" # Not Spam
]

labels = ["spam", "spam", "not spam", "not spam", "spam", "not spam", "spam", "not spam", "spam", "not spam"]

# Step 1: Convert emails to feature vectors (using CountVectorizer)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(emails)  # Feature matrix
y = labels  # Labels

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Initialize the Naive Bayes model with Laplace smoothing (alpha=1)
nb_model = MultinomialNB(alpha=1.0)

# Step 4: Train the model
nb_model.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = nb_model.predict(X_test)

# Step 6: Evaluate the model
print("Predictions:", y_pred)
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 7: Predict a new email
new_email = ["Free Free Free"]
new_email_vectorized = vectorizer.transform(new_email)
new_prediction = nb_model.predict(new_email_vectorized)

print(f"\nPrediction for new email '{new_email[0]}':", new_prediction[0])

Predictions: ['spam' 'spam' 'not spam']

Accuracy Score: 1.0

Classification Report:
               precision    recall  f1-score   support

    not spam       1.00      1.00      1.00         1
        spam       1.00      1.00      1.00         2

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3


Prediction for new email 'Free Free Free': spam
