In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score
import pickle


In [2]:
# Load dataset
df = pd.read_csv("spam_dataset.csv")
df.head()


Unnamed: 0,text,label
0,Win a free iPhone now,1
1,Your account has been compromised,1
2,Meeting at 3 PM,0
3,"Congratulations, you've won a lottery",1
4,Let's catch up tomorrow,0


In [3]:
# Vectorize the text data
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['text'])
y = df['label']


In [4]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# Train a Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train, y_train)


In [6]:
# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [7]:
# Save the trained model and vectorizer
with open("spam_classifier_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("spam_vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)
