In [2]:
import pandas as pd
import numpy as np
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv("spam.csv", encoding='ISO-8859-1')
df = df.rename(columns={"v1": "label", "v2": "message"})
df = df[["label", "message"]]
df["label"] = df["label"].map({"ham": 0, "spam": 1})

X_train, X_test, y_train, y_test = train_test_split(df["message"], df["label"], test_size=0.2, random_state=42)

tfidf = TfidfVectorizer()
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred))

pickle.dump(tfidf, open("vectorizer.pkl", "wb"))
pickle.dump(model, open("model.pkl", "wb"))

def predict_spam(message):
    transformed_msg = tfidf.transform([message])
    result = model.predict(transformed_msg)[0]
    return "Spam" if result == 1 else "Not Spam"

example_sms = "Congratulations! You've won a free lottery. Call now!"
print(f"Message: {example_sms}\nPrediction: {predict_spam(example_sms)}")


Model Accuracy: 96.23%
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       965
           1       1.00      0.72      0.84       150

    accuracy                           0.96      1115
   macro avg       0.98      0.86      0.91      1115
weighted avg       0.96      0.96      0.96      1115

Message: Congratulations! You've won a free lottery. Call now!
Prediction: Not Spam
