###### pip install nltk scikit-learn pandas requests

In [2]:
# model_development.py
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import nltk
import requests
import zipfile
import io
from joblib import dump

# Download NLTK data
nltk.download('stopwords')

# URL of the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip"

# Download and extract the ZIP file
response = requests.get(url)
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    with z.open('SMSSpamCollection') as f:
        df = pd.read_csv(f, sep='\t', names=["label", "message"])

# Data preprocessing
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['message'], df['label'], test_size=0.2, random_state=42)

# Initialize the CountVectorizer
vectorizer = CountVectorizer(stop_words=nltk.corpus.stopwords.words('english'))

# Fit and transform the training data
X_train_vec = vectorizer.fit_transform(X_train)

# Transform the testing data
X_test_vec = vectorizer.transform(X_test)

# Initialize the Multinomial Naive Bayes classifier
clf = MultinomialNB()

# Train the classifier
clf.fit(X_train_vec, y_train)

# Save the trained model and vectorizer
dump(clf, 'spam_classifier.joblib')
dump(vectorizer, 'vectorizer.joblib')

# Make predictions on the test data
y_pred = clf.predict(X_test_vec)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Accuracy: 0.9847533632286996
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       966
           1       0.95      0.94      0.94       149

    accuracy                           0.98      1115
   macro avg       0.97      0.97      0.97      1115
weighted avg       0.98      0.98      0.98      1115



In [3]:
# prediction.py
from joblib import load

# Load the trained model and vectorizer
clf = load('spam_classifier.joblib')
vectorizer = load('vectorizer.joblib')

# Function to check if a message is spam
def is_spam(message):
    message_vec = vectorizer.transform([message])
    return clf.predict(message_vec)[0]

# Prompt the user for a message
user_message = input("Enter a message to check if it's spam: ")

# Predict if the message is spam
result = is_spam(user_message)
if result == 1:
    print("The message is spam.")
else:
    print("The message is not spam.")


Enter a message to check if it's spam:  hii


The message is not spam.


In [8]:
import tkinter as tk
from tkinter import messagebox
from joblib import load

# Load the trained model and vectorizer
clf = load('spam_classifier.joblib')
vectorizer = load('vectorizer.joblib')

# Function to check if a message is spam
def check_spam():
    message = entry.get()
    result = clf.predict(vectorizer.transform([message]))[0]
    messagebox.showinfo("Result", "The message is spam." if result == 1 else "The message is not spam.")

# Create the main window
root = tk.Tk()
root.title("Spam Classifier")
root.geometry("400x200")
root.configure(bg='#f0f0f0')

# Create and place the widgets
tk.Label(root, text="Enter a message to check if it's spam:", bg='#f0f0f0', fg='#333', font=('Arial', 12)).pack(pady=10)
entry = tk.Entry(root, width=50, font=('Arial', 12))
entry.pack(pady=10)
tk.Button(root, text="Check", command=check_spam, bg='#4CAF50', fg='white', font=('Arial', 12)).pack(pady=10)

# Run the application
root.mainloop()
