In [None]:
#import necessary libraries
import pandas as pd
import numpy as np
import re
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

In [None]:
# Load dataset
dataset_url = "https://raw.githubusercontent.com/HarshiniBhat/DataMining/refs/heads/main/spam.csv"
df = pd.read_csv(dataset_url)
df.columns = ['label', 'message']


In [None]:
df

In [None]:
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

In [None]:
df

In [None]:
# Text preprocessing
def preprocess_text(text):
    text = text.lower()
    text = re.sub(f"[{string.punctuation}]", "", text)
    return text



In [None]:
df['message'] = df['message'].apply(preprocess_text)

In [None]:
df['message']

In [None]:
# Convert text to numerical features
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(df['message'])
y = df['label']

In [None]:

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train Naïve Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

In [None]:
# Predictions
y_pred = model.predict(X_test)

In [None]:
# Evaluation
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [None]:
# Display results
print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print('Confusion Matrix:\n', conf_matrix)
print('Classification Report:\n', class_report)

In [None]:

# Test with a new message
sample_message = ["Congratulations! You've won a free iPhone. Click here to claim."]
sample_message = vectorizer.transform(sample_message)
prediction = model.predict(sample_message)
print("Spam" if prediction[0] else "Not Spam")
