In [1]:
import tkinter as tk
from tkinter import ttk, messagebox
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from collections import Counter

models = {
    "MARBERT": "./fine-tuned-marbert_v4",
    "BERT": "./fine-tuned-arabert_v4",  
    "T5": "./fine-tuned-arat5_v3",  
    "GPT-2": "./fine-tuned-araGPT_last",
    "Best of All Models": None
}



In [2]:
def load_model(model_name):
    global tokenizer, model
    if model_name == "Best of All Models":
        tokenizer = None  
        model = None  
    else:
        tokenizer = AutoTokenizer.from_pretrained(models[model_name])
        model = AutoModelForSequenceClassification.from_pretrained(models[model_name])

load_model("MARBERT")

def classify_text():
    input_text = text_entry.get("1.0", tk.END).strip()
    if not input_text:
        messagebox.showwarning("Input Error", "Please enter some text to classify.")
        return

    selected_model = model_combobox.get()
    predictions = []

    if selected_model == "Best of All Models":
        for model_name in models:
            if model_name != "Best of All Models":
                load_model(model_name)
                prediction = get_model_prediction(input_text)
                predictions.append(prediction)
        
        prediction_counter = Counter(predictions)
        best_prediction = prediction_counter.most_common(1)[0][0]
    else:
        best_prediction = get_model_prediction(input_text)

    # Map prediction to label
    if best_prediction == 0:
        label = "Inoffensive and Not Hate Speech"
    elif best_prediction == 1:
        label = "Offensive and Not Hate Speech"
    elif best_prediction == 2:
        label = "Offensive and Hate Speech"
    else:
        label = "Unknown classification"

    result_label.config(text=f"Prediction: {label}", foreground="blue")

def get_model_prediction(input_text):
    tokens = tokenizer.encode(input_text, return_tensors='pt')
    with torch.no_grad():
        output = model(tokens)
        prediction = torch.argmax(output.logits, dim=1).item()
    return prediction

def show_context_menu(event):
    context_menu.tk_popup(event.x_root, event.y_root)

def paste_text():
    text_entry.event_generate("<<Paste>>")

def on_model_change(event):
    selected_model = model_combobox.get()
    if selected_model != "Best of All Models":
        load_model(selected_model)
    result_label.config(text="Prediction: ")  


In [3]:

root = tk.Tk()
root.title("Hate Speech Classifier")
root.geometry("500x500")
root.configure(bg="#f0f4f8")
root.resizable(False, False)

style = ttk.Style()
style.theme_use("clam")
style.configure("TButton", font=("Arial", 12), padding=10, background="#4CAF50", foreground="white")
style.configure("TLabel", font=("Arial", 12), background="#f0f4f8")
style.configure("TCombobox", font=("Arial", 12))

title_label = ttk.Label(root, text="Hate Speech Classifier", font=("Arial", 18, "bold"), background="#f0f4f8", foreground="#333")
title_label.pack(pady=15)

model_label = ttk.Label(root, text="Select Model:", background="#f0f4f8", foreground="#333")
model_label.pack(pady=5)

model_combobox = ttk.Combobox(root, values=list(models.keys()), font=("Arial", 12))
model_combobox.set("MARBERT")  
model_combobox.pack(pady=5)
model_combobox.bind("<<ComboboxSelected>>", on_model_change)

text_label = ttk.Label(root, text="Enter Text:", background="#f0f4f8", foreground="#333")
text_label.pack(pady=5)

text_entry = tk.Text(root, height=6, width=50, font=("Arial", 12), background="#ffffff", foreground="#333", borderwidth=2, relief="solid")
text_entry.pack(pady=5)

context_menu = tk.Menu(root, tearoff=0)
context_menu.add_command(label="Paste", command=paste_text)

text_entry.bind("<Button-3>", show_context_menu)

classify_button = ttk.Button(root, text="Classify", command=classify_text)
classify_button.pack(pady=15)

result_label = ttk.Label(root, text="Prediction: ", font=("Arial", 14), background="#f0f4f8", foreground="#333")
result_label.pack(pady=20)

root.mainloop()


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
