In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
df = pd.read_csv(url, sep='\t', names=["label", "message"])
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)
df['label_num'] = df.label.map({'ham': 0, 'spam': 1})
X = df['message']
y = df['label_num']
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_vec = vectorizer.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.25, random_state=42)
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
nb_pred = nb_model.predict(X_test)
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
def evaluate_model(name, y_true, y_pred):
    print(f"\n----- {name} -----")
    print("Accuracy:", round(accuracy_score(y_true, y_pred), 3))
    print("Precision:", round(precision_score(y_true, y_pred), 3))
    print("Recall:", round(recall_score(y_true, y_pred), 3))
    print("F1 Score:", round(f1_score(y_true, y_pred), 3))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
evaluate_model("Naive Bayes", y_test, nb_pred)
evaluate_model("Support Vector Machine", y_test, svm_pred)
test_messages = [
    "Win a $1000 Walmart gift card now by clicking here!",
    "Hey, are we still meeting for lunch today?",
    "URGENT! Your account has been compromised, click here to secure it!"
]
test_vec = vectorizer.transform(test_messages)

print("\n--- New Message Predictions ---")
print("Naive Bayes Predictions:", nb_model.predict(test_vec))
print("SVM Predictions:", svm_model.predict(test_vec))



----- Naive Bayes -----
Accuracy: 0.968
Precision: 1.0
Recall: 0.757
F1 Score: 0.862

Classification Report:
               precision    recall  f1-score   support

           0       0.96      1.00      0.98      1124
           1       1.00      0.76      0.86       169

    accuracy                           0.97      1293
   macro avg       0.98      0.88      0.92      1293
weighted avg       0.97      0.97      0.97      1293

Confusion Matrix:
 [[1124    0]
 [  41  128]]

----- Support Vector Machine -----
Accuracy: 0.972
Precision: 0.959
Recall: 0.822
F1 Score: 0.885

Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.99      0.98      1124
           1       0.96      0.82      0.89       169

    accuracy                           0.97      1293
   macro avg       0.97      0.91      0.93      1293
weighted avg       0.97      0.97      0.97      1293

Confusion Matrix:
 [[1118    6]
 [  30  139]]

--- New Message Pr