In [3]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
data = pd.read_csv('/content/Spam Email Detection - spam.csv')

# Preprocessing the text
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove special characters, numbers, and punctuation
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    return text

data['v2'] = data['v2'].apply(preprocess_text)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['v2'], data['v1'], test_size=0.2, random_state=42)

# Convert text data into numerical data
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

# Initialize the Naive Bayes classifier
classifier = MultinomialNB()

# Train the classifier
classifier.fit(X_train, y_train)

# Predicting on the test set
y_pred = classifier.predict(X_test)

# Model Evaluation
print("Accuracy: {:.2f}%".format(accuracy_score(y_test, y_pred) * 100))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Static examples
examples = [
    "Get a free cruise ticket now!",
    "Hi, how are you doing?",
    "You've won $1000000, claim your prize now!",
    "Reminder: Meeting tomorrow at 10 AM",
    "Congratulations! You've been selected for a special offer."
]

# Preprocess the static examples
preprocessed_examples = [preprocess_text(example) for example in examples]

# Convert the preprocessed examples into numerical data
example_vectors = vectorizer.transform(preprocessed_examples)

# Predict the examples
example_predictions = classifier.predict(example_vectors)

# Print the predictions
for example, prediction in zip(examples, example_predictions):
    print("\nExample:", example)
    print("Prediction:", prediction)


Accuracy: 97.94%

Confusion Matrix:
 [[960   5]
 [ 18 132]]

Classification Report:
               precision    recall  f1-score   support

         ham       0.98      0.99      0.99       965
        spam       0.96      0.88      0.92       150

    accuracy                           0.98      1115
   macro avg       0.97      0.94      0.95      1115
weighted avg       0.98      0.98      0.98      1115


Example: Get a free cruise ticket now!
Prediction: spam

Example: Hi, how are you doing?
Prediction: ham

Example: You've won $1000000, claim your prize now!
Prediction: spam

Example: Reminder: Meeting tomorrow at 10 AM
Prediction: ham

Example: Congratulations! You've been selected for a special offer.
Prediction: spam
