In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print("Libraries imported successfully ‚úÖ")

In [None]:
df = pd.read_csv("spam.csv", encoding="latin-1")
df.head()

In [None]:
df = df[['v1', 'v2']]
df.columns = ['label', 'message']
df.head()

In [None]:
df['label_num'] = df['label'].map({'ham': 0, 'spam': 1})
df.head()

In [None]:
X = df['message']
y = df['label_num']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

len(X_train), len(X_test)

In [None]:
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)

X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

X_train_tfidf.shape, X_test_tfidf.shape


In [None]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)

accuracy_score(y_test, y_pred)


In [None]:
print(classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.savefig("results/confusion_matrix.png")
plt.show()

In [None]:
import pickle

with open("models/spam_classifier.pkl", "wb") as f:
    pickle.dump(model, f)

with open("models/vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

print("Model and vectorizer saved ‚úÖ")

In [None]:
def predict_message(msg):
    msg_tfidf = vectorizer.transform([msg])
    pred = model.predict(msg_tfidf)[0]
    return "üö® SPAM" if pred == 1 else "‚úîÔ∏è Not Spam"

print(predict_message("Congratulations! You won a free prize!!!"))
print(predict_message("Hey, are you coming today?"))
