In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import urllib.request
import zipfile

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip"
filename = "smsspamcollection.zip"
urllib.request.urlretrieve(url, filename)

with zipfile.ZipFile(filename, 'r') as zip_ref:
    zip_ref.extractall()

df = pd.read_csv("SMSSpamCollection", sep="\t", header=None, names=["label", "message"])

df['label'] = df['label'].map({'ham': 0, 'spam': 1})  # Convert labels to 0 and 1

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['message'])  # Transform text to feature vectors
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = MultinomialNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

test_message = ["Congratulations! You've won a free ticket to the Bahamas. Call now!"]
test_vector = vectorizer.transform(test_message)
prediction = model.predict(test_vector)
print("\nCustom Message Prediction:", "Spam" if prediction[0] == 1 else "Ham")
