In [1]:
# Spam Classifier Training Notebook

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

# 1. Load data
df = pd.read_csv('../data/emails.csv')
print(df.head())

# 2. Split data
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# 3. Vectorize text
vectorizer = TfidfVectorizer(stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# 4. Train Naive Bayes model
clf = MultinomialNB()
clf.fit(X_train_vec, y_train)

# 5. Evaluate
y_pred = clf.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# 6. Save model + vectorizer
model_dict = {"vectorizer": vectorizer, "classifier": clf}
with open('../model/spam_model.pkl', 'wb') as f:
    pickle.dump(model_dict, f)

print("✅ Model saved to ../model/spam_model.pkl")


                                                text label
0  Congratulations! You've won a $1,000 Walmart g...  spam
1  Hey John, can we reschedule the meeting to tom...   ham
2  URGENT! Your account will be suspended unless ...  spam
3                        Hi Mom, just landed safely.   ham
Accuracy: 0.0
[[0 1]
 [0 0]]
              precision    recall  f1-score   support

         ham       0.00      0.00      0.00       1.0
        spam       0.00      0.00      0.00       0.0

    accuracy                           0.00       1.0
   macro avg       0.00      0.00      0.00       1.0
weighted avg       0.00      0.00      0.00       1.0

✅ Model saved to ../model/spam_model.pkl


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
