In [3]:
import tkinter as tk
from tkinter import messagebox
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import re
import nltk
from nltk.util import pr
stemmer = nltk.SnowballStemmer('english')
from nltk.corpus import stopwords

# Load your dataset
df = pd.read_csv('twitter_data.csv')  # Update with your dataset filename
stopword = set(stopwords.words('english'))

# Define the mapping dictionary
mapping = {
    0: "Hate Speech Detected",
    1: "Offensive Language Detected",
    2: "No Hate Speech and No Offensive Language Detected"
}

# Apply the mapping to the 'class' column and store the result in a new column called 'labels'
df['labels'] = df['class'].map(mapping)

# Function to clean text
def clean(text):
    text = str(text).lower()
    text = re.sub(r'\[.*?\]', '', text)  # Remove square brackets and content inside
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Remove URLs
    text = re.sub(r'<.*?>+', '', text)  # Remove HTML tags
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = re.sub(r'\n', ' ', text)  # Remove newline characters
    text = re.sub(r'\w*\d\w*', '', text)  # Remove words containing numbers
    text = [word for word in text.split() if word not in stopword]  # Remove stopwords
    text = " ".join(text)
    return text

df["tweet"] = df["tweet"].apply(clean)

# Vectorize the text
x = np.array(df["tweet"])
y = np.array(df["labels"])

cv = CountVectorizer()
x = cv.fit_transform(x)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

# Train the classifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Function to classify text
def classify_text():
    test_data = text_entry.get("1.0", "end-1c")
    if test_data:
        test_data_cleaned = clean(test_data)
        test_data_vectorized = cv.transform([test_data_cleaned]).toarray()
        prediction = clf.predict(test_data_vectorized)
        messagebox.showinfo("Classification Result", f"Classification: {prediction[0]}")
    else:
        messagebox.showerror("Error", "Please enter some text.")

# Create main window
root = tk.Tk()
root.title("Text Classification")

# Create text entry widget
text_entry = tk.Text(root, height=10, width=50)
text_entry.pack()

# Create button to classify text
classify_button = tk.Button(root, text="Classify Text", command=classify_text)
classify_button.pack()

# Run the GUI
root.mainloop()
