In [2]:
import tkinter as tk
from tkinter import filedialog, messagebox
import email
from bs4 import BeautifulSoup
from preprocessing import replace_email, replace_url, tokenize, remove_stopwords, lemmatize
from machine_learning import fit_model, train_logistic_regression, train_decision_tree, train_random_forest, train_gradient_boost, train_naive_bayes
from sklearn.feature_extraction.text import CountVectorizer

# Funcție pentru a încărca fișierul .eml
def load_email_file():
    file_path = filedialog.askopenfilename(filetypes=[("Email files", "*.eml")])
    if file_path:
        with open(file_path, 'r', encoding='utf-8') as f:
            msg = email.message_from_file(f)
            email_body = ""
            if msg.is_multipart():
                for part in msg.walk():
                    if part.get_content_type() == "text/plain":
                        email_body = part.get_payload(decode=True).decode()
                        break
            else:
                email_body = msg.get_payload(decode=True).decode()
            email_entry.delete("1.0", tk.END)
            email_entry.insert(tk.END, email_body)

# Funcție pentru a preprocesa textul email-ului
def preprocess_email_text(email_text):
    email_text = replace_email(email_text)
    email_text = replace_url(email_text)
    tokens = tokenize(email_text)
    tokens = remove_stopwords(tokens)
    tokens = lemmatize(tokens)
    return " ".join(tokens)

# Funcție pentru a clasifica email-ul
def classify_email():
    email_text = email_entry.get("1.0", tk.END).strip()
    if not email_text:
        messagebox.showwarning("Atenție", "Introduceți textul email-ului!")
        return

    # Preprocesare text email
    processed_text = preprocess_email_text(email_text)

    # Vectorizare text
    vectorizer = CountVectorizer()
    X_train_vec = vectorizer.fit_transform([processed_text])

    # Antrenare modele
    models = {
        'Logistic Regression': train_logistic_regression(X_train_vec, [1]),
        'Decision Tree': train_decision_tree(X_train_vec, [1]),
        'Random Forest': train_random_forest(X_train_vec, [1]),
        'Gradient Boosting': train_gradient_boost(X_train_vec, [1]),
        'Naive Bayes': train_naive_bayes(X_train_vec, [1]),
    }

    # Clasificare email
    results = {}
    for model_name, model in models.items():
        model = model['model']
        prediction = model.predict(X_train_vec)[0]
        proba = model.predict_proba(X_train_vec)[0]
        spam_proba = proba[1] * 100
        ham_proba = proba[0] * 100
        results[model_name] = (spam_proba, ham_proba)

    # Afisare rezultate
    result_text.delete("1.0", tk.END)
    for model_name, (spam_proba, ham_proba) in results.items():
        result_text.insert(tk.END, f"{model_name}: ")
        result_text.insert(tk.END, f"Mesaj autentic ({ham_proba:.2f}%)", "green")
        result_text.insert(tk.END, f" vs Spam ({spam_proba:.2f}%)\n", "red")

# Interfață grafică
mail_analyser_window = tk.Tk()
mail_analyser_window.title("Mail Analyser")

title_label = tk.Label(mail_analyser_window, text="Analiză Email Spam/Ham", font=("Arial", 16, "bold"), bg="#f0f4f7", fg="#333")
title_label.pack(pady=10)

email_label = tk.Label(mail_analyser_window, text="Introduceți textul email-ului:", font=("Arial", 12), bg="#f0f4f7")
email_label.pack()
email_entry = tk.Text(mail_analyser_window, height=10, width=50, font=("Arial", 10))
email_entry.pack(pady=5)

load_button = tk.Button(mail_analyser_window, text="Încarcă .eml", command=load_email_file, bg="#2196F3", fg="white", font=("Arial", 12, "bold"), width=15)
load_button.pack(pady=5)

classify_button = tk.Button(mail_analyser_window, text="Clasifică", command=classify_email, bg="#4CAF50", fg="white", font=("Arial", 12, "bold"), width=15)
classify_button.pack(pady=10)

result_text = tk.Text(mail_analyser_window, height=10, width=80, font=("Arial", 10), bg="#f0f4f7", wrap=tk.WORD)
result_text.tag_configure("green", foreground="green")
result_text.tag_configure("red", foreground="red")
result_text.pack(pady=10, padx=20, fill="both")

mail_analyser_window.mainloop()


Exception in Tkinter callback
Traceback (most recent call last):
  File "c:\Users\Here\anaconda3\envs\gputf3.10\lib\tkinter\__init__.py", line 1921, in __call__
    return self.func(*args)
  File "C:\Users\Here\AppData\Local\Temp\ipykernel_10096\1428199716.py", line 43, in classify_email
    processed_text = preprocess_email_text(email_text)
  File "C:\Users\Here\AppData\Local\Temp\ipykernel_10096\1428199716.py", line 30, in preprocess_email_text
    tokens = tokenize(email_text)
  File "c:\Users\Here\Desktop\Disertatie-Final\4. Cod\thesis-phishing-email-detection-main\preprocessing.py", line 152, in tokenize
    token_list = nltk.word_tokenize(lowercase)
  File "c:\Users\Here\anaconda3\envs\gputf3.10\lib\site-packages\nltk\tokenize\__init__.py", line 142, in word_tokenize
    sentences = [text] if preserve_line else sent_tokenize(text, language)
  File "c:\Users\Here\anaconda3\envs\gputf3.10\lib\site-packages\nltk\tokenize\__init__.py", line 119, in sent_tokenize
    tokenizer = _get_