In [None]:
import tkinter as tk
from tkinter import filedialog, messagebox
import pandas as pd
import email
import re
import numpy as np
import warnings
import os
import joblib
import threading
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder

warnings.filterwarnings('ignore')

def open_mail_analyser():
    try:
        # Funcția de preprocesare a textului
        def preprocess_text(text):
            text = re.sub(r'http\S+', '', text)
            text = re.sub(r'[^\w\s]', '', text)
            text = text.lower()
            text = re.sub(r'\s+', ' ', text).strip()
            return text

        # Încarcă setul de date
        df = pd.read_csv('..\\Data\\Phishing_Email.csv', encoding='latin-1')
        le = LabelEncoder()
        df["Email Type"] = le.fit_transform(df["Email Type"])
        df["Email Text"] = df["Email Text"].apply(preprocess_text)

        model_dir = "saved_models"
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        model_files = {
            'Naive Bayes': os.path.join(model_dir, "model_Naive_Bayes.pkl"),
            'Logistic Regression': os.path.join(model_dir, "model_Logistic_Regression.pkl"),
            'SGD Classifier': os.path.join(model_dir, "model_SGD_Classifier.pkl"),
            'XGBoost': os.path.join(model_dir, "model_XGBoost.pkl"),
            'Decision Tree': os.path.join(model_dir, "model_Decision_Tree.pkl"),
            'Random Forest': os.path.join(model_dir, "model_Random_Forest.pkl"),
            'MLP Classifier': os.path.join(model_dir, "model_MLP_Classifier.pkl")
        }
        vectorizer_file = os.path.join(model_dir, "vectorizer.pkl")

        models = {
            'Naive Bayes': MultinomialNB(),
            'Logistic Regression': LogisticRegression(),
            'SGD Classifier': SGDClassifier(),
            'XGBoost': XGBClassifier(),
            'Decision Tree': DecisionTreeClassifier(),
            'Random Forest': RandomForestClassifier(),
            'MLP Classifier': MLPClassifier()
        }

        if os.path.exists(vectorizer_file) and all(os.path.exists(path) for path in model_files.values()):
            vectorizer = joblib.load(vectorizer_file)
            feature_x = vectorizer.transform(df["Email Text"]).toarray()
            y_tf = np.array(df['Email Type'])
            X_train, X_test, y_train, y_test = train_test_split(feature_x, y_tf, train_size=0.8, random_state=0)
            for model_name in models:
                models[model_name] = joblib.load(model_files[model_name])
        else:
            vectorizer = TfidfVectorizer(stop_words="english", max_features=10000)
            feature_x = vectorizer.fit_transform(df["Email Text"]).toarray()
            y_tf = np.array(df['Email Type'])
            X_train, X_test, y_train, y_test = train_test_split(feature_x, y_tf, train_size=0.8, random_state=0)
            for model in models.values():
                model.fit(X_train, y_train)
            for model_name, model in models.items():
                joblib.dump(model, model_files[model_name])
            joblib.dump(vectorizer, vectorizer_file)

        def classify_email(email_text):
            email_vec = vectorizer.transform([email_text]).toarray()
            results = {}
            for model_name, model in models.items():
                try:
                    prediction = model.predict(email_vec)[0]
                    proba = model.predict_proba(email_vec)[0]
                    spam_proba = proba[1] * 100
                    ham_proba = proba[0] * 100
                except AttributeError:
                    prediction = model.predict(email_vec)[0]
                    spam_proba = 0
                    ham_proba = 0
                results[model_name] = (spam_proba, ham_proba)
            return results

        def evaluate_models():
            results = {}
            for model_name, model in models.items():
                y_pred = model.predict(X_test)
                accuracy = accuracy_score(y_test, y_pred)
                precision = precision_score(y_test, y_pred)
                recall = recall_score(y_test, y_pred)
                results[model_name] = (accuracy, precision, recall)
            return results

        mail_analyser_window = tk.Tk()
        mail_analyser_window.title("Mail Analyser")

        def on_classify():
            email_text = email_entry.get("1.0", tk.END).strip()
            if not email_text:
                messagebox.showwarning("Atenție", "Introduceți textul email-ului!")
                return
            results = classify_email(email_text)
            result_text.delete("1.0", tk.END)
            for model_name, (spam_proba, ham_proba) in results.items():
                result_text.insert(tk.END, f"{model_name}: ")
                result_text.insert(tk.END, f"Mesaj autentic ({ham_proba:.2f}%)", "green")
                result_text.insert(tk.END, f" vs Spam ({spam_proba:.2f}%)\n", "red")

        def load_email_file():
            file_path = filedialog.askopenfilename(filetypes=[("Email files", "*.eml")])
            if file_path:
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    msg = email.message_from_file(f)
                    email_body = ""
                    if msg.is_multipart():
                        for part in msg.walk():
                            if part.get_content_type() == "text/plain":
                                try:
                                    email_body = part.get_payload(decode=True).decode(errors='ignore')
                                    break
                                except Exception:
                                    continue
                    else:
                        try:
                            email_body = msg.get_payload(decode=True).decode(errors='ignore')
                        except Exception:
                            email_body = msg.get_payload()
                    email_entry.delete("1.0", tk.END)
                    email_entry.insert(tk.END, email_body)

        title_label = tk.Label(mail_analyser_window, text="Analiză Email Spam/Ham", font=("Arial", 16, "bold"), bg="#f0f4f7", fg="#333")
        title_label.pack(pady=10)

        email_label = tk.Label(mail_analyser_window, text="Introduceți textul email-ului:", font=("Arial", 12), bg="#f0f4f7")
        email_label.pack()
        email_entry = tk.Text(mail_analyser_window, height=10, width=50, font=("Arial", 10))
        email_entry.pack(pady=5)

        load_button = tk.Button(mail_analyser_window, text="Încarcă .eml", command=load_email_file, bg="#2196F3", fg="white", font=("Arial", 12, "bold"), width=15)
        load_button.pack(pady=5)

        classify_button = tk.Button(mail_analyser_window, text="Clasifică", command=on_classify, bg="#4CAF50", fg="white", font=("Arial", 12, "bold"), width=15)
        classify_button.pack(pady=10)

        result_text = tk.Text(mail_analyser_window, height=10, width=80, font=("Arial", 10), bg="#f0f4f7", wrap=tk.WORD)
        result_text.tag_configure("green", foreground="green")
        result_text.tag_configure("red", foreground="red")
        result_text.pack(pady=10, padx=20, fill="both")

        evaluation_results = evaluate_models()
        evaluation_text = tk.Text(mail_analyser_window, height=10, width=80, font=("Arial", 10), bg="#f0f4f7", wrap=tk.WORD)
        evaluation_text.pack(pady=10, padx=20, fill="both")
        evaluation_text.insert(tk.END, "Evaluarea Modelelor:\n")
        for model_name, (accuracy, precision, recall) in evaluation_results.items():
            evaluation_text.insert(tk.END, f"{model_name} - Acuratețe: {accuracy:.2f}, Precizie: {precision:.2f}, Recall: {recall:.2f}\n")

        mail_analyser_window.mainloop()

    except Exception as e:
        messagebox.showerror("Eroare", f"A apărut o eroare: {str(e)}")

# Rulare într-un thread separat pentru a evita blocarea interfeței
threading.Thread(target=open_mail_analyser).start()
