In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pickle

# Sample data (replace this with your dataset)
data = {
    'text': [
        'Congratulations! You won a free ticket to Bahamas. Call now!',
        'Hi John, could you please send me the report by tomorrow?',
        'Get 50% off on your next purchase. Limited time offer!',
        'Dear customer, your bank account has been compromised. Click here to secure it.',
        'Just a reminder for our meeting at 3 PM tomorrow.',
    ],
    'label': ['spam', 'ham', 'spam', 'spam', 'ham']
}

# Create a DataFrame
df = pd.DataFrame(data)

# Map labels to binary values: spam -> 1, ham -> 0
df['label'] = df['label'].map({'spam': 1, 'ham': 0})

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.3, random_state=42)

# Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Initialize the Naive Bayes classifier
model = MultinomialNB()

# Train the model
model.fit(X_train_tfidf, y_train)

# Predict on the test set
y_pred = model.predict(X_test_tfidf)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')
print('Confusion Matrix:')
print(conf_matrix)

# Save the model
with open('spam_classifier.pkl', 'wb') as file:
    pickle.dump(model, file)

# Save the vectorizer
with open('tfidf_vectorizer.pkl', 'wb') as file:
    pickle.dump(vectorizer, file)

# Load the model and vectorizer
with open('spam_classifier.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

with open('tfidf_vectorizer.pkl', 'rb') as file:
    loaded_vectorizer = pickle.load(file)

# Example new email
new_email = ["Win a brand new car by clicking this link!"]

# Preprocess and predict
new_email_tfidf = loaded_vectorizer.transform(new_email)
prediction = loaded_model.predict(new_email_tfidf)

if prediction[0] == 1:
    print("Spam")
else:
    print("Not Spam")


Accuracy: 0.00
Precision: 0.00
Recall: 0.00
F1 Score: 0.00
Confusion Matrix:
[[0 2]
 [0 0]]
Spam


  _warn_prf(average, modifier, msg_start, len(result))
