In [None]:
import chardet
from tkinter import *
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import LancasterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split as ttsplit
from sklearn import svm
import pandas as pd
import pickle
import numpy as np
import imaplib
import email

# Detect file encoding
file = "spam.csv"
with open(file, 'rb') as rawdata:
    result = chardet.detect(rawdata.read(100000))
encoding = result['encoding']

# Read the dataset file
df = pd.read_csv(file, encoding=encoding)
message_X = df.iloc[:, 1]  # EmailText column
labels_Y = df.iloc[:, 0]  # Label

# Initialize the stemmer
lstem = LancasterStemmer()

def preprocess(messages):
    processed_messages = []
    for msg in messages:
        # Filter out non-alphabetic characters
        msg = ''.join(filter(lambda ch: ch.isalpha() or ch == " ", msg))
        # Tokenize the messages
        words = word_tokenize(msg)
        # Stem the words
        processed_messages.append(' '.join([lstem.stem(word) for word in words]))
    return processed_messages

message_x = preprocess(message_X)

# Vectorization process
tfvec = TfidfVectorizer(stop_words='english')
x_new = tfvec.fit_transform(message_x).toarray()

# Replace ham and spam labels with 0 and 1 respectively
y_new = np.array(labels_Y.replace(to_replace=['ham', 'spam'], value=[0, 1]))

# Split dataset into training and testing parts
x_train, x_test, y_train, y_test = ttsplit(x_new, y_new, test_size=0.2, shuffle=True)

# Train the SVM classifier
classifier = svm.SVC()
classifier.fit(x_train, y_train)

# Store the classifier and message features for prediction
pickle.dump({'classifier': classifier, 'message_x': message_x, 'tfvec': tfvec},
            open("training_data.pkl", "wb"))

# GUI Code
BG_COLOR = "#89CFF0"
FONT_BOLD = "Helvetica %d bold"

class SpamHam:
    def __init__(self):
        # Initialize tkinter window
        self.window = Tk()
        self.window.geometry("800x600")  # Set window size
        self.main_window()
        self.load_datafile()
        self.connect_email()

    def load_datafile(self):
        # Load classifier and message data
        datafile = pickle.load(open("training_data.pkl", "rb"))
        self.message_x = datafile["message_x"]
        self.classifier = datafile["classifier"]
        self.tfvec = datafile["tfvec"]

    def connect_email(self):
        # Connect to Gmail IMAP server (example)
        username = "talavarakshatha57@gmail.com"  # Use your own email address
        password = "ugwx rprc mcou lmyg"         # Use your own password

        # Connect to the IMAP server
        self.mail = imaplib.IMAP4_SSL("imap.gmail.com")
        self.mail.login(username, password)
        self.mail.select("inbox")  # Select inbox or another folder

    def fetch_emails(self):
        try:
            self.email_list.delete(0, END)  # Clear listbox before adding new items
            # Fetch emails from inbox
            result, data = self.mail.search(None, "ALL")  # Fetch all emails
            for num in data[0].split():
                result, data = self.mail.fetch(num, "(RFC822)")
                raw_email = data[0][1]
                msg = email.message_from_bytes(raw_email)

                # Extract email content (subject and body)
                subject = msg["subject"]
                body = ""

                # Process each part of the message
                for part in msg.walk():
                    content_type = part.get_content_type()
                    content_disposition = str(part.get("Content-Disposition"))

                    if content_type == "text/plain" and "attachment" not in content_disposition:
                        # Decode text parts
                        try:
                            payload = part.get_payload(decode=True)
                            if isinstance(payload, bytes):
                                body += payload.decode('utf-8', 'ignore')
                            else:
                                body += payload
                        except Exception as e:
                            print(f"Error decoding message: {e}")
                            continue

                if body:
                    self.email_list.insert(END, subject)
                    self.email_texts[subject] = body
                else:
                    self.email_list.insert(END, f"Subject: {subject} - Could not extract body.")
                    self.email_texts[subject] = None

        except imaplib.IMAP4.error as e:
            print(f"IMAP error occurred: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

    def preprocess_message(self, message):
        # Preprocess the message
        msg = ''.join(filter(lambda ch: ch.isalpha() or ch == " ", message))
        words = word_tokenize(msg)
        stemmed_message = ' '.join([lstem.stem(word) for word in words])
        return stemmed_message

    def main_window(self):
        # Configure the main window
        self.window.title("Spam Detector")
        self.window.configure(bg=BG_COLOR)

        # Header label
        head_label = Label(self.window, bg="#FFA500", fg="#000", text="Spam Detector", font=FONT_BOLD % 18, pady=15)
        head_label.pack(fill=X)

        # Email list label
        list_label = Label(self.window, text="Emails:", font=FONT_BOLD % 14, bg=BG_COLOR, pady=10)
        list_label.pack(pady=10)

        # Frame for listbox and scrollbar
        frame = Frame(self.window)
        frame.pack(padx=10, pady=10, fill=BOTH, expand=True)

        # Listbox for displaying emails
        self.email_list = Listbox(frame, bg="#FFF", fg="#000", font=FONT_BOLD % 12, width=50, height=20)
        self.email_list.pack(side=LEFT, fill=BOTH, expand=True)
        self.email_list.bind("<<ListboxSelect>>", self.on_select_email)

        # Scrollbar for listbox
        scrollbar = Scrollbar(frame)
        scrollbar.pack(side=RIGHT, fill=Y)
        self.email_list.config(yscrollcommand=scrollbar.set)
        scrollbar.config(command=self.email_list.yview)

        # Classify button
        classify_button = Button(self.window, text="Classify", font=FONT_BOLD % 14, width=15, bg="#000", fg="#FFF", command=self.classify_email)
        classify_button.pack(pady=15)

        # Result label
        self.result_label = Label(self.window, bg=BG_COLOR, fg="#000", text="", font=FONT_BOLD % 16, pady=15, wraplength=600)
        self.result_label.pack()

        # Store email text data
        self.email_texts = {}

    def on_select_email(self, event):
        # Get selected email subject
        selection = self.email_list.curselection()
        if selection:
            subject = self.email_list.get(selection[0])
            self.selected_email_subject = subject

    def classify_email(self):
        if hasattr(self, 'selected_email_subject'):
            body = self.email_texts.get(self.selected_email_subject)
            if body:
                processed_msg = self.preprocess_message(body)
                vectorized_msg = self.tfvec.transform([processed_msg]).toarray()

                # Predict the label
                prediction = self.classifier.predict(vectorized_msg)[0]
                result = "spam" if prediction == 1 else "ham"
                self.result_label.config(fg="#ff0000" if result == "spam" else "#00ff00", text=f"Email '{self.selected_email_subject}' is: {result}")
            else:
                self.result_label.config(fg="#ff0000", text="Could not extract body.")
        else:
            self.result_label.config(fg="#ff0000", text="Please select an email.")

    def run(self):
        self.window.mainloop()

if __name__ == "__main__":
    app = SpamHam()
    app.fetch_emails()  # Fetch and list emails
    app.run()

    # Evaluate accuracy
    accuracy = classifier.score(x_test, y_test)
    print(f"Accuracy of the model: {accuracy:.2%}")
