**Hate Speech Detection**
This project focuses on building a model combining LSTM and Hugging Face transformer models to detect hate speech from a curated text dataset. It not only classifies whether a statement is hateful but also predicts the intensity level of the hate speech.
https://www.kaggle.com/datasets/waalbannyantudre/hate-speech-detection-curated-dataset/data

##**LSTM Model**

In [1]:
pip install --upgrade kagglehub




In [6]:
import kagglehub
waalbannyantudre_hate_speech_detection_curated_dataset_path = kagglehub.dataset_download('waalbannyantudre/hate-speech-detection-curated-dataset')

print('Data source import complete.')

import pandas as pd
import numpy as np
import tensorflow as tf
import random
from tensorflow.keras.layers import  Input,Dense,Embedding, LSTM,Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from nltk.corpus import stopwords
import re
import nltk
nltk.download('stopwords')

Data source import complete.


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# Enable dynamic memory growth for GPUs

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu_device in gpus:
            tf.config.experimental.set_memory_growth(gpu_device, True)
    except RuntimeError as e:
        print("GPU Memory Growth Error:", e)

# Load dataset

In [7]:
dataset_path = "/kaggle/input/hate-speech-detection-curated-dataset/HateSpeechDatasetBalanced.csv"
df = pd.read_csv(dataset_path)
stop_words = set(stopwords.words('english'))

# Clean text

In [8]:
def clean_text(text):
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    text = text.lower()
    words = text.split()
    filtered_words = [word for word in words if word not in stop_words]
    return " ".join(filtered_words)

df['CleanedContent'] = df['Content'].apply(clean_text)


# Tokenization and padding

In [9]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['CleanedContent'])
sequences = tokenizer.texts_to_sequences(df['CleanedContent'])
max_sequence_length = max(len(seq) for seq in sequences)

X = pad_sequences(sequences, maxlen=max_sequence_length)
y = df['Label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
original_test_texts = df['Content'].iloc[X_test.shape[0] * -1:].tolist()

# Set seeds
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Model definition

In [10]:
input_layer = Input(shape=(X_train.shape[1],))
embedding_layer = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=20)(input_layer)
lstm_layer1 = LSTM(10, return_sequences=True)(embedding_layer)
lstm_layer2 = LSTM(10, return_sequences=True)(lstm_layer1)
flatten_layer = Flatten()(lstm_layer2)
output_layer = Dense(1, activation='sigmoid')(flatten_layer)
model = Model(inputs=input_layer, outputs=output_layer)

# Compile and train

In [11]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/5
[1m9077/9077[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2550s[0m 280ms/step - accuracy: 0.8118 - loss: 0.4025 - val_accuracy: 0.8568 - val_loss: 0.3199
Epoch 2/5
[1m9077/9077[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2569s[0m 277ms/step - accuracy: 0.8775 - loss: 0.2808 - val_accuracy: 0.8650 - val_loss: 0.3169
Epoch 3/5
[1m9077/9077[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2511s[0m 273ms/step - accuracy: 0.8953 - loss: 0.2418 - val_accuracy: 0.8641 - val_loss: 0.3281
Epoch 4/5
[1m9077/9077[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2466s[0m 272ms/step - accuracy: 0.9039 - loss: 0.2230 - val_accuracy: 0.8649 - val_loss: 0.3352
Epoch 5/5
[1m9077/9077[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2513s[0m 277ms/step - accuracy: 0.9091 - loss: 0.2122 - val_accuracy: 0.8645 - val_loss: 0.3395


# Evaluate on test data

In [12]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nTest Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

# Predictions on test data
y_pred_probs = model.predict(X_test).squeeze()
y_pred_labels = (y_pred_probs > 0.5).astype(int)
print(f"\nAccuracy Score (sklearn): {accuracy_score(y_test, y_pred_labels)}")

# Evaluate on 20 random test sentences
print("\n--- Evaluation on 20 Test Sentences ---")
indices = np.random.choice(len(X_test), 20, replace=False)
for idx in indices:
    sentence = original_test_texts[idx]
    true_label = "HATE" if y_test[idx] == 1 else "NON-HATE"
    prob = y_pred_probs[idx]
    pred_label = "HATE" if prob > 0.5 else "NON-HATE"
    intensity = round(prob * 100, 2) if pred_label == "HATE" else round((1 - prob) * 100, 2)

    print(f"\nSentence: {sentence}")
    print(f"True Label: {true_label}")
    print(f"Predicted Label: {pred_label}")
    print(f"{'Hate' if pred_label == 'HATE' else 'Non-Hate'} Intensity: {intensity}%")

[1m4539/4539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m223s[0m 49ms/step - accuracy: 0.8639 - loss: 0.3405

Test Loss: 0.33951452374458313
Test Accuracy: 0.8644921183586121
[1m4539/4539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m215s[0m 47ms/step

Accuracy Score (sklearn): 0.8644920949705283

--- Evaluation on 20 Test Sentences ---

Sentence: stop vandalizing please stop your disruptive editing of removing rights to the unreliable and biased sources when you continue to vandalize wikipedia as you did at list of countries by population united nations and list and countries by population i will have to ask for an arbitration for examining of your access rights as well as to ask for protection of freedom and truth by blocking submissions from further editing preceding unsigned comment added by the bats are out today this must be the record for shortest time between entries by mooney editors please undo the damage you are doing to wikipedia stop doing more damage also i will h

# predict label and hate percentage for custom sentences

In [13]:
# Function to predict label and hate percentage for custom sentences
def predict_custom_sentences(sentences):
    cleaned = [clean_text(sentence) for sentence in sentences]
    seqs = tokenizer.texts_to_sequences(cleaned)
    padded = pad_sequences(seqs, maxlen=max_sequence_length)
    probs = model.predict(padded).squeeze()

    for i, sentence in enumerate(sentences):
        prob = probs[i]
        label = "HATE" if prob > 0.5 else "NON-HATE"
        intensity = round(prob * 100, 2) if label == "HATE" else round((1 - prob) * 100, 2)
        print(f"\nSentence: {sentence}")
        print(f"Predicted Label: {label}")
        print(f"{'Hate' if label == 'HATE' else 'Non-Hate'} Intensity: {intensity}%")

# Example predictions on 4 custom sentences
custom_sentences = [
    "I absolutely despise those people!",
    "I hope you have a wonderful day!",
    "You're so stupid and annoying.",
    "This is a peaceful and loving community."
]

predict_custom_sentences(custom_sentences)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step

Sentence: I absolutely despise those people!
Predicted Label: HATE
Hate Intensity: 59.52000045776367%

Sentence: I hope you have a wonderful day!
Predicted Label: NON-HATE
Non-Hate Intensity: 95.81999969482422%

Sentence: You're so stupid and annoying.
Predicted Label: HATE
Hate Intensity: 88.68000030517578%

Sentence: This is a peaceful and loving community.
Predicted Label: NON-HATE
Non-Hate Intensity: 64.98999786376953%
