<a href="https://colab.research.google.com/github/Amankp1/Psychological-Profiling-and-Context-Aware-Labeling-of-Hate-Speech/blob/main/Psychological_labeling_hateSpeech_file_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import re
import nltk
import string

In [2]:
nltk.download('stopwords')
nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
df = pd.read_csv("hate_speech_dataset.csv")

In [4]:
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.translate(str.maketrans("", "", string.punctuation))  # Remove punctuation
    words = text.split()
    words = [word for word in words if word not in stopwords.words('english')]  # Remove stopwords
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]  # Lemmatization
    return " ".join(words)

df["clean_text"] = df["text"].apply(clean_text)

In [5]:
label_mapping = {"Depression": 0, "Stress": 1, "Loneliness": 2}
df["label"] = df["label"].map(label_mapping)

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df["clean_text"], df["label"], test_size=0.2, random_state=42, stratify=df["label"])

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(max_features=5000, ngram_range=(1,2))
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

In [8]:
from sklearn.ensemble import VotingClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

svm_model = SVC(kernel="linear", C=1, probability=True)
nb_model = MultinomialNB()
rf_model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)

ensemble = VotingClassifier(estimators=[
    ('svm', svm_model),
    ('nb', nb_model),
    ('rf', rf_model)
], voting='hard')



In [9]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

ensemble.fit(X_train_tfidf, y_train)
y_pred_ensemble = ensemble.predict(X_test_tfidf)

print("\nEnsemble Model Classification Report:\n", classification_report(y_test, y_pred_ensemble))
print("Ensemble Accuracy:", accuracy_score(y_test, y_pred_ensemble))


Ensemble Model Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.70      0.70        61
           1       0.88      0.85      0.86        59
           2       0.80      0.81      0.81        59

    accuracy                           0.79       179
   macro avg       0.79      0.79      0.79       179
weighted avg       0.79      0.79      0.79       179

Ensemble Accuracy: 0.7877094972067039


In [10]:
test_texts = [
    # Depression-related hate speech
    "I hate how I wake up every day knowing nothing will change.",
    "No one gives a damn about me, and I’m tired of pretending otherwise.",
    "Every single person in this college is fake, just like my own smile.",
    "No matter what I do, I’m always the forgotten one in the background.",
    "This place is just a constant reminder that I’ll never be good enough.",
    "I’m done trying to fit in when all I get is rejection.",
    "If I vanished today, this college would just keep moving like I never existed.",
    "Everyone pretends to be so caring, but in the end, they leave just like the rest.",
    "The only thing college has taught me is how to hate myself more efficiently.",
    "I despise how people act like they’re my friend, only to push me away when I need them most.",

    # Stress-related hate speech
    "This college drains every ounce of energy and leaves nothing but anxiety.",
    "I hate how I work my ass off, and all I get is stress and sleepless nights.",
    "Professors act like we’re machines who can function without a break.",
    "Everything about this place is designed to break students mentally.",
    "Group projects are just a nightmare that make me hate people even more.",
    "The pressure here is so suffocating, it feels like I’m drowning.",
    "No matter how hard I work, there’s always another impossible deadline.",
    "I hate how this college turns students into stressed-out zombies.",
    "People here act like stress is a competition, and I’m sick of it.",
    "I swear, if one more professor dumps another assignment on me, I’m going to lose it.",

    # Loneliness-related hate speech
    "I hate walking into a crowded room and realizing I have no one to sit with.",
    "This college is just a giant popularity contest, and I was never even considered.",
    "People only notice me when they need something; otherwise, I’m invisible.",
    "No one even bothers to check in on me, and I hate them for it.",
    "I sit alone every single day, watching everyone else form friendships so easily.",
    "I hate how everyone has their group, and I’m just an outsider looking in.",
    "The lonelier I feel, the more I resent every fake smile I see.",
    "No one ever remembers my name, and honestly, I’ve stopped caring.",
    "I could disappear right now, and not a single person here would notice.",
    "This college makes it so easy to be surrounded by people and still feel completely alone."
]

test_labels = [
    "Depression", "Depression", "Depression", "Depression", "Depression",
    "Depression", "Depression", "Depression", "Depression", "Depression",

    "Stress", "Stress", "Stress", "Stress", "Stress",
    "Stress", "Stress", "Stress", "Stress", "Stress",

    "Loneliness", "Loneliness", "Loneliness", "Loneliness", "Loneliness",
    "Loneliness", "Loneliness", "Loneliness", "Loneliness", "Loneliness"
]


In [11]:
label_mapping = {0: "Depression", 1: "Stress", 2: "Loneliness"}

X_test_transformed = tfidf.transform(test_texts).toarray()

y_pred_test = ensemble.predict(X_test_transformed)
y_pred_test_labels = [label_mapping[label] for label in y_pred_test]

print("Confusion Matrix:\n", confusion_matrix(test_labels, y_pred_test_labels))
print("\nClassification Report:\n", classification_report(test_labels, y_pred_test_labels))
print("\nAccuracy:", accuracy_score(test_labels, y_pred_test_labels))

for i in range(len(test_texts)):
    print(f"\nText: {test_texts[i]}")
    print(f"Actual Label: {test_labels[i]}")
    print(f"Predicted Label: {y_pred_test_labels[i]}")
    if test_labels[i] == y_pred_test_labels[i]:
        print("✅ Correct Prediction")
    else:
        print("❌ Incorrect Prediction")


Confusion Matrix:
 [[ 6  4  0]
 [ 0 10  0]
 [ 0  1  9]]

Classification Report:
               precision    recall  f1-score   support

  Depression       1.00      0.60      0.75        10
  Loneliness       0.67      1.00      0.80        10
      Stress       1.00      0.90      0.95        10

    accuracy                           0.83        30
   macro avg       0.89      0.83      0.83        30
weighted avg       0.89      0.83      0.83        30


Accuracy: 0.8333333333333334

Text: I hate how I wake up every day knowing nothing will change.
Actual Label: Depression
Predicted Label: Depression
✅ Correct Prediction

Text: No one gives a damn about me, and I’m tired of pretending otherwise.
Actual Label: Depression
Predicted Label: Depression
✅ Correct Prediction

Text: Every single person in this college is fake, just like my own smile.
Actual Label: Depression
Predicted Label: Loneliness
❌ Incorrect Prediction

Text: No matter what I do, I’m always the forgotten one in the b