In [3]:
!pip install gradio
import pandas as pd
import numpy as np
import nltk
import re
import string
from nltk.stem import PorterStemmer

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

from nltk.corpus import stopwords
import gradio as gr  # Import Gradio

# Stemming and stopwords
stemmer = nltk.SnowballStemmer("english")
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Load dataset
data = pd.read_csv("/content/Twitter_Hate_Speech.csv")
data["labels"] = data["class"].map({0: "Hate Speech", 1: "Offensive Language", 2: "Normal"})

# Clean text
def clean(text):
    text = str(text).lower()
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>+', '', text)
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub(r'\n', '', text)
    text = re.sub(r'\w*\d\w*', '', text)
    text = " ".join([word for word in text.split() if word not in stop_words])
    return text

data["tweet"] = data["tweet"].apply(clean)

# Prepare data for model training
x = np.array(data["tweet"])
y = np.array(data["labels"])
cv = CountVectorizer()
X = cv.fit_transform(x)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Train the Decision Tree Classifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# Define a prediction function for Gradio
def classify_tweet(tweet):
    cleaned_tweet = clean(tweet)
    vectorized_tweet = cv.transform([cleaned_tweet])
    prediction = clf.predict(vectorized_tweet)[0]
    return f"Predicted Label: {prediction}"

# Build Gradio Interface
interface = gr.Interface(
    fn=classify_tweet,  # Function to call for predictions
    inputs="text",      # Input type (a text box)
    outputs="text",     # Output type (text display)
    title="Twit Safe: Hate Speech Classifier",
    description="Enter a tweet to classify it as Hate Speech, Offensive Language, or Normal."
)

# Launch the interface
interface.launch()




[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://30235392f2eb6e863c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


