In [7]:
import numpy as np
import pandas as pd
import nltk
import string
import os
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from nltk.stem.porter import PorterStemmer

# Download required NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Load dataset
df = pd.read_csv(r'C:\Users\shaks\Desktop\SMS Spam Detection\spam.csv', encoding='latin-1')

# Rename and clean columns if needed
if 'v1' in df.columns and 'v2' in df.columns:
    df = df.rename(columns={'v1': 'Label', 'v2': 'EmailText'})

# Encode labels
encoder = LabelEncoder()
df['Label'] = encoder.fit_transform(df['Label'])
df = df.drop_duplicates(keep='first')

# Preprocessing functions
ps = PorterStemmer()
stopwords_set = set(nltk.corpus.stopwords.words('english'))

def get_importantFeatures(sent):
    sent = sent.lower()
    return [i for i in nltk.word_tokenize(sent) if i.isalnum()]

def removing_stopWords(sent):
    return [i for i in sent if i not in stopwords_set and i not in string.punctuation]

def potter_stem(sent):
    return " ".join([ps.stem(i) for i in sent])

# Apply preprocessing
df['imp_feature'] = df['EmailText'].apply(lambda x: potter_stem(removing_stopWords(get_importantFeatures(x))))

# Split data
X = df['imp_feature']
y = df['Label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# TF-IDF
tfidf = TfidfVectorizer()
X_train_tfidf = tfidf.fit_transform(X_train)

# Save vectorizer
joblib.dump(tfidf, "tfidf_vectorizer.pkl")

# Train model and save it
model_path = "spam_detector.pkl"
if not os.path.exists(model_path):
    model = GridSearchCV(SVC(), {'kernel': ['linear'], 'C': [1, 10]})
    model.fit(X_train_tfidf, y_train)
    joblib.dump(model.best_estimator_, model_path)

print("Model and vectorizer saved successfully.")


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\shaks\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\shaks\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Model and vectorizer saved successfully.


In [2]:
import tkinter as tk
from tkinter import StringVar
import joblib
import nltk
import string
from nltk.stem.porter import PorterStemmer

# Load model and vectorizer
spam_model = joblib.load("spam_detector.pkl")
tfidf = joblib.load("tfidf_vectorizer.pkl")

# Download required NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# NLP tools
ps = PorterStemmer()
stopwords_set = set(nltk.corpus.stopwords.words('english'))

def get_importantFeatures(sent):
    sent = sent.lower()
    return [i for i in nltk.word_tokenize(sent) if i.isalnum()]

def removing_stopWords(sent):
    return [i for i in sent if i not in stopwords_set and i not in string.punctuation]

def potter_stem(sent):
    return " ".join([ps.stem(i) for i in sent])

def check_spam():
    text = entry_text.get()
    tokens = removing_stopWords(get_importantFeatures(text))
    processed = potter_stem(tokens)
    vectorized = tfidf.transform([processed])
    prediction = spam_model.predict(vectorized)
    if prediction == 1:
        result_var.set("🔴 SPAM detected")
        result_label.config(fg="red")
    else:
        result_var.set("🟢 Not Spam")
        result_label.config(fg="green")

# GUI
root = tk.Tk()
root.title("SMS Spam Detector")
root.geometry("500x400")
root.configure(bg="#f0f4f7")
root.resizable(False, False)

title = tk.Label(root, text="SMS Spam Detector", font=("Helvetica", 20, "bold"), bg="#283149", fg="white", pady=15)
title.pack(fill="x")

main_frame = tk.Frame(root, bg="#f0f4f7", padx=20, pady=30)
main_frame.pack(fill="both", expand=True)

label_prompt = tk.Label(main_frame, text="Enter your message below:", font=("Arial", 12), bg="#f0f4f7")
label_prompt.pack(anchor="w")

entry_text = tk.Entry(main_frame, font=("Arial", 12), width=50, bd=2, relief="groove")
entry_text.pack(pady=10)

def on_enter(e):
    btn_submit.config(bg="#e63946", fg="white")
def on_leave(e):
    btn_submit.config(bg="#f76c6c", fg="black")

btn_submit = tk.Button(main_frame, text="Check Spam", font=("Arial", 12, "bold"), bg="#f76c6c", fg="black",
                       relief="raised", bd=3, command=check_spam, cursor="hand2", width=15)
btn_submit.pack(pady=10)
btn_submit.bind("<Enter>", on_enter)
btn_submit.bind("<Leave>", on_leave)

result_var = StringVar()
result_var.set("Result will be shown here...")
result_label = tk.Label(main_frame, textvariable=result_var, font=("Arial", 14, "italic"), bg="#f0f4f7", fg="blue")
result_label.pack(pady=20)

root.mainloop()


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\shaks\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\shaks\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
