In [1]:
import pandas as pd
import re
import string
from collections import Counter
from nltk.corpus import stopwords
import nltk

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Dario\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Load Corpus

In [2]:
df = pd.read_csv("data.csv")

# parse date column
df["year"] = pd.to_datetime(df["created_date"], errors="coerce").dt.to_period("Y")
df["class"] = df["toxicity"].apply(lambda t: "Hate" if t > 0.5 else "Non-Hate")



### Case 1: Chi-Squared
For this case we will do some preprocessing for better results

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_selection import chi2
import numpy as np
import spacy
import re

nlp = spacy.load("en_core_web_sm")


def preprocess(text):
    text = re.sub(r'[^a-z\s]', '', text.lower())
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if 
                     (token.pos_ == "NOUN" or token.pos_ == "ADJ") and 
                     not token.is_stop and 
                     len(token) > 2 and
                     not re.search(r"\d", token.text)]) # The token does not contain digits


chi2_results = {}

for year, df_y in df.groupby("year"):
    texts = df_y["comment_text"].astype(str).tolist()
    labels = (df_y["class"] == "Hate").astype(int).values  # 1 = Hate, 0 = Non-Hate

    # Skip if only one class is present
    if len(set(labels)) < 2:
        print(f"Skipping year {year}: only one class present")
        continue

    # Vectorize with adaptive min_df
    min_df_val = min(5, max(1, len(df_y)//10))
    count_vec = CountVectorizer(min_df=min_df_val, ngram_range=(1,1))
    X_count = count_vec.fit_transform(texts)

    if X_count.shape[1] == 0:
        print(f"Skipping year {year}: no features after vectorization")
        continue

    vocab = np.array(count_vec.get_feature_names_out())

    # Chi² test
    chi2_scores, pvals = chi2(X_count, labels)

    # Compute per-class counts
    counts_hate = np.asarray(X_count[labels == 1].sum(axis=0)).ravel()
    counts_nonhate = np.asarray(X_count[labels == 0].sum(axis=0)).ravel()
    preferred_class = np.where(counts_hate > counts_nonhate, "Hate", "Non-Hate")

    # Build dataframe
    chi2_df = pd.DataFrame({
        "word": vocab,
        "chi2": chi2_scores,
        "pval": pvals,
        "counts_hate": counts_hate,
        "counts_nonhate": counts_nonhate,
        "preferred_class": preferred_class
    }).sort_values("chi2", ascending=False)

    chi2_results[year] = chi2_df

Results

In [9]:
for year in chi2_results.keys():
    print(f"Top Hate-indicative words for {year}:")
    top_hate_words = chi2_results[year][
        chi2_results[year]["preferred_class"] == "Hate"
    ].head(20)
    print(top_hate_words[["word","chi2","pval","counts_hate","counts_nonhate"]])
    print("\n")

Top Hate-indicative words for 2015:
              word        chi2          pval  counts_hate  counts_nonhate
232           crap  354.255319  5.017638e-79           18               0
811      profanity  236.170213  2.690281e-53           12               0
1008       testing  149.548430  2.176021e-34           12               6
819       purposes  149.548430  2.176021e-34           12               6
160          bunch  137.765957  8.199255e-32            7               0
169          cares  118.085106  1.661057e-27            6               0
223   contribution  118.085106  1.661057e-27            6               0
564           jerk  118.085106  1.661057e-27            6               0
424           fuck  118.085106  1.661057e-27            6               0
1049         total  118.085106  1.661057e-27            6               0
159       bullshit  118.085106  1.661057e-27            6               0
443         giving  118.085106  1.661057e-27            6               0
15

### Case 2: TF-IDF

In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

df = df[df["comment_text"].apply(lambda x: isinstance(x, str))].copy()  # There is one bad row to be removed


vectorizer = TfidfVectorizer(
    lowercase=True,
    stop_words='english',      # remove common words (optional)
    ngram_range=(1, 2),        # unigrams + bigrams
    #min_df=5                   # ignore very rare terms
)

results = []

for year, df_year in df.groupby("year"):

    # Computing TF-IDF Matrix
    X = vectorizer.fit_transform(df_year["comment_text"])
    feature_names = np.array(vectorizer.get_feature_names_out())

    hate_idx = df_year["class"] == "Hate"
    nonhate_idx = df_year["class"] == "Non-Hate"

    # Computing mean TF-IDF for each class
    mean_hate = X[hate_idx].mean(axis=0).A1
    mean_nonhate = X[nonhate_idx].mean(axis=0).A1

    diff = mean_hate - mean_nonhate

    # Top words
    top_n = 60
    top_idx = np.argsort(diff)[-top_n:]

    top_words = pd.DataFrame({
        "year": year,
        "word": feature_names[top_idx],
        "tfidf_diff": diff[top_idx],
        "mean_hate": mean_hate[top_idx],
        "mean_nonhate": mean_nonhate[top_idx]
    }).sort_values("tfidf_diff", ascending=False)

    results.append((top_words, year))

In [14]:
top_words

Unnamed: 0,word,tfidf_diff,mean_hate,mean_nonhate
59,stupid,0.012633,0.012793,0.00016
58,idiot,0.007198,0.007232,3.4e-05
57,trump,0.005064,0.011308,0.006244
56,ignorant,0.004245,0.004444,0.000199
55,pathetic,0.004121,0.004237,0.000116
54,idiots,0.003913,0.003942,2.9e-05
53,dumb,0.003825,0.003953,0.000128
52,white,0.003711,0.005218,0.001507
51,racist,0.003448,0.004157,0.000708
50,stupidity,0.003312,0.003353,4.1e-05
