In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

df = pd.read_csv('spam.csv', encoding='latin-1')
df = df.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1)
df = df.rename(columns={'v1': 'label', 'v2': 'text'})
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Step 3: Create a Bag of Words model
vectorizer = CountVectorizer()
X_train_counts = vectorizer.fit_transform(X_train)
X_test_counts = vectorizer.transform(X_test)

# Step 4: Train the Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train_counts, y_train)

# Step 5: Make predictions on the test set
y_pred = clf.predict(X_test_counts)

# Step 6: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Step 7: Function to classify new SMS messages
def classify_message(message):
    message_counts = vectorizer.transform([message])
    prediction = clf.predict(message_counts)
    return "Spam" if prediction[0] == 1 else "Not Spam"

# Test the classifier with some example messages
examples = [
    "Congratulations! You've won a free iPhone. Click here to claim your prize!",
    "Hi Mom, can you pick me up after school today?",
    "URGENT: Your bank account has been suspended. Log in now to reactivate.",
    "Meeting rescheduled to 3 PM. See you then."
]

for example in examples:
    print(f"Message: {example}")
    print(f"Classification: {classify_message(example)}\n")

Accuracy: 0.9838565022421525

Confusion Matrix:
[[963   2]
 [ 16 134]]

Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       965
           1       0.99      0.89      0.94       150

    accuracy                           0.98      1115
   macro avg       0.98      0.95      0.96      1115
weighted avg       0.98      0.98      0.98      1115

Message: Congratulations! You've won a free iPhone. Click here to claim your prize!
Classification: Spam

Message: Hi Mom, can you pick me up after school today?
Classification: Not Spam

Message: URGENT: Your bank account has been suspended. Log in now to reactivate.
Classification: Spam

Message: Meeting rescheduled to 3 PM. See you then.
Classification: Not Spam

