In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [4]:

data = pd.read_csv('/content/spam.csv', encoding='latin-1')

# Rename columns
data = data.rename(columns={"v1": "Target", "v2": "Email Text"})

data['Target'] = data['Target'].map({'ham': 0, 'spam': 1})

X_train, X_test, y_train, y_test = train_test_split(data['Email Text'], data['Target'], test_size=0.2,
random_state=42)

vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Train a Naive Bayes classifier
classifier = MultinomialNB()
classifier.fit(X_train_vectorized, y_train)

y_pred = classifier.predict(X_test_vectorized)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:")

print(classification_report(y_test, y_pred))

Accuracy: 0.9838565022421525

Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       965
           1       0.99      0.89      0.94       150

    accuracy                           0.98      1115
   macro avg       0.98      0.95      0.96      1115
weighted avg       0.98      0.98      0.98      1115



In [7]:
def email_classifier(email):
    vectorized_data = vectorizer.transform([email])
    prediction = classifier.predict(vectorized_data)

    print("Prediction:", "Spam" if prediction[0] == 1 else "Not Spam")


email = "Hi. Pay 100$ to 012345678 to win cash prize of 100M$!"
email_classifier(email)

Prediction: Spam
