In [7]:
# Using Naive Bayes classifier

import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv("spam.csv", encoding="ISO-8859-1")
data = data[["v1", "v2"]]
data.columns = ["label", "text"]

# Convert labels to binary
data["label"] = data["label"].map({"spam": 1, "ham": 0})

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data["text"], data["label"], test_size=0.2, random_state=42)

# Vectorize the text data
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train the Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test_vec)

# Calculate accuracy and print classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=["ham", "spam"])

print("Accuracy:", accuracy)
print("Classification Report:")
print(report)


Accuracy: 0.9838565022421525
Classification Report:
              precision    recall  f1-score   support

         ham       0.98      1.00      0.99       965
        spam       0.99      0.89      0.94       150

    accuracy                           0.98      1115
   macro avg       0.98      0.95      0.96      1115
weighted avg       0.98      0.98      0.98      1115



In [12]:
# Test on own input

# Preprocess the input
input_text = "Congratulations, you've won a free vacation to Bali! Click here to claim your ticket discount codes!"
input_text = input_text.lower()
input_vec = vectorizer.transform([input_text])

# Make a prediction
prediction = model.predict(input_vec)

# Interpret the prediction
if prediction[0] == 1:
    print("Input text is classified as Spam")
else:
    print("Input text is classified as Not Spam")


Input text is classified as Spam


In [13]:
# Test on own input

# Preprocess the input
input_text = "ive made chicken curry for dinner"
input_text = input_text.lower()
input_vec = vectorizer.transform([input_text])

# Make a prediction
prediction = model.predict(input_vec)

# Interpret the prediction
if prediction[0] == 1:
    print("Input text is classified as Spam")
else:
    print("Input text is classified as Not Spam")


Input text is classified as Not Spam
