In [3]:
import pandas as pd
from transformers import pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [4]:
df = pd.read_csv('jokes_divorce_classified.csv')
df.head()

Unnamed: 0,category,content_clean,label
0,divorce,divorce documentary seeks christians divorce d...,nothate
1,divorce,new child support question got my filing stb...,nothate
2,divorce,the judge will have a lot to consider gl,nothate
3,divorce,that s pretty much noise he will claim x y a...,nothate
4,divorce,support judges normally don t care about the s...,nothate


In [5]:
classifier = pipeline(
    "text-classification",
    model="Hate-speech-CNERG/dehatebert-mono-english",
    truncation=True,
    max_length=512
)

Device set to use mps:0


In [6]:
results = classifier(df['content_clean'].tolist(), batch_size=16)

In [7]:
df['pred_label_raw']   = [r['label'] for r in results]  # 'HATE' or 'NON_HATE'
df['pred_score_raw']   = [r['score'] for r in results]

In [8]:
label_map = {'HATE':'hate', 'NON_HATE':'nothate'}
df['pred_label'] = df['pred_label_raw'].map(label_map)

In [9]:
df.head()

Unnamed: 0,category,content_clean,label,pred_label_raw,pred_score_raw,pred_label
0,divorce,divorce documentary seeks christians divorce d...,nothate,NON_HATE,0.951871,nothate
1,divorce,new child support question got my filing stb...,nothate,NON_HATE,0.945256,nothate
2,divorce,the judge will have a lot to consider gl,nothate,NON_HATE,0.9716,nothate
3,divorce,that s pretty much noise he will claim x y a...,nothate,NON_HATE,0.952445,nothate
4,divorce,support judges normally don t care about the s...,nothate,NON_HATE,0.952543,nothate


In [10]:
y_true = df['label']
y_pred = df['pred_label']

In [11]:
print("Accuracy:", accuracy_score(y_true, y_pred))
print("\nClassification Report:\n", classification_report(y_true, y_pred))

Accuracy: 0.875961045617632

Classification Report:
               precision    recall  f1-score   support

        hate       0.59      0.23      0.33       261
     nothate       0.89      0.98      0.93      1690

    accuracy                           0.88      1951
   macro avg       0.74      0.60      0.63      1951
weighted avg       0.85      0.88      0.85      1951



In [12]:
# 7) (Optional) Confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=['hate','nothate'])
print("\nConfusion Matrix:\n", pd.DataFrame(
    cm, index=['true_hate','true_nothate'], columns=['pred_hate','pred_nothate']
))


Confusion Matrix:
               pred_hate  pred_nothate
true_hate            60           201
true_nothate         41          1649


# Model 2

In [13]:
df2 = pd.read_csv('jokes_divorce_classified.csv')

# 2) Standardize your ground truth
df2['ground_truth'] = df2['label'].astype(str).str.lower().str.strip()

# 3) Initialize the model pipeline
classifier = pipeline(
    "text-classification",
    model="IMSyPP/hate_speech_nl",
    truncation=True,
    max_length=512
)

# 4) Run the classifier in batches
results = classifier(df2['content_clean'].tolist(), batch_size=16)


Device set to use mps:0


In [14]:
results

[{'label': 'LABEL_0', 'score': 0.9980371594429016},
 {'label': 'LABEL_0', 'score': 0.5473940968513489},
 {'label': 'LABEL_0', 'score': 0.782377302646637},
 {'label': 'LABEL_2', 'score': 0.8170800805091858},
 {'label': 'LABEL_2', 'score': 0.5716385841369629},
 {'label': 'LABEL_0', 'score': 0.9957605004310608},
 {'label': 'LABEL_0', 'score': 0.67975914478302},
 {'label': 'LABEL_0', 'score': 0.5280640721321106},
 {'label': 'LABEL_0', 'score': 0.9830785393714905},
 {'label': 'LABEL_0', 'score': 0.9959790706634521},
 {'label': 'LABEL_0', 'score': 0.9943101406097412},
 {'label': 'LABEL_2', 'score': 0.9714437127113342},
 {'label': 'LABEL_0', 'score': 0.6853883266448975},
 {'label': 'LABEL_2', 'score': 0.9142704606056213},
 {'label': 'LABEL_1', 'score': 0.5022073984146118},
 {'label': 'LABEL_0', 'score': 0.998019814491272},
 {'label': 'LABEL_0', 'score': 0.9745175242424011},
 {'label': 'LABEL_0', 'score': 0.6584671139717102},
 {'label': 'LABEL_2', 'score': 0.998073935508728},
 {'label': 'LABEL

In [15]:
# 4) Pull out raw LABEL_n and confidence
df2['raw_label']  = [r['label'] for r in results]  # e.g. "LABEL_0" … "LABEL_3"
df2['pred_score'] = [r['score'] for r in results]

# 5) Map each LABEL_n to its human‐readable name
id2label = classifier.model.config.id2label
df2['class_name'] = df2['raw_label'].apply(
    lambda L: id2label[int(L.split('_')[-1])]
)

# 6) Collapse to binary 'hate' vs. 'nothate'
#    (adjust mapping if your model’s id2label differs)
binary_map = {
    'LABEL_0':    'nothate',
    'LABEL_1': 'nothate',
    'LABEL_2':     'hate',
    'LABEL_3':       'hate'
}
df2['predicted_label'] = df2['class_name'].map(binary_map)

# 7) Filter to only rows where both are in {hate, nothate}
valid = ['hate','nothate']
mask = df2['ground_truth'].isin(valid) & df2['predicted_label'].isin(valid)
df_eval = df2.loc[mask, :]

# 8) Compute metrics
y_true = df_eval['ground_truth']
y_pred = df_eval['predicted_label']

print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}\n")
print("Classification Report:")
print(classification_report(y_true, y_pred, labels=valid))
cm = confusion_matrix(y_true, y_pred, labels=valid)
cm_df = pd.DataFrame(
    cm,
    index=[f"true_{l}" for l in valid],
    columns=[f"pred_{l}" for l in valid]
)
print("\nConfusion Matrix:")
print(cm_df)

Accuracy: 0.5366

Classification Report:
              precision    recall  f1-score   support

        hate       0.19      0.75      0.30       261
     nothate       0.93      0.50      0.65      1690

    accuracy                           0.54      1951
   macro avg       0.56      0.63      0.48      1951
weighted avg       0.83      0.54      0.61      1951


Confusion Matrix:
              pred_hate  pred_nothate
true_hate           197            64
true_nothate        840           850


# Model 3

In [16]:
df3 = pd.read_csv('jokes_divorce_classified.csv')

# 2) Standardize your ground truth
df3['ground_truth'] = df3['label'].astype(str).str.lower().str.strip()

# 3) Initialize the model pipeline
classifier = pipeline(
    "text-classification",
    model="ctoraman/hate-speech-bert",
    truncation=True,
    max_length=512
)

# 4) Run the classifier in batches
result3 = classifier(df3['content_clean'].tolist(), batch_size=16)


Device set to use mps:0


In [17]:
result3

[{'label': 'LABEL_0', 'score': 0.99671471118927},
 {'label': 'LABEL_0', 'score': 0.9726043939590454},
 {'label': 'LABEL_0', 'score': 0.9930316209793091},
 {'label': 'LABEL_0', 'score': 0.9732677936553955},
 {'label': 'LABEL_0', 'score': 0.9949017763137817},
 {'label': 'LABEL_0', 'score': 0.9951677322387695},
 {'label': 'LABEL_0', 'score': 0.9790101051330566},
 {'label': 'LABEL_0', 'score': 0.9015123844146729},
 {'label': 'LABEL_0', 'score': 0.9912323355674744},
 {'label': 'LABEL_0', 'score': 0.9961910247802734},
 {'label': 'LABEL_0', 'score': 0.9830766916275024},
 {'label': 'LABEL_0', 'score': 0.9962015748023987},
 {'label': 'LABEL_1', 'score': 0.9089598655700684},
 {'label': 'LABEL_1', 'score': 0.9547712802886963},
 {'label': 'LABEL_0', 'score': 0.9877610206604004},
 {'label': 'LABEL_0', 'score': 0.9919814467430115},
 {'label': 'LABEL_0', 'score': 0.994510293006897},
 {'label': 'LABEL_0', 'score': 0.985873818397522},
 {'label': 'LABEL_0', 'score': 0.9870566129684448},
 {'label': 'LABE

In [19]:
df3['raw_label']  = [r['label'] for r in result3]  
df3['pred_score'] = [r['score'] for r in result3]

# 6) Map each LABEL_n to its human‐readable name
id2label = classifier.model.config.id2label
df3['class_name'] = df3['raw_label'].apply(
    lambda L: id2label[int(L.split('_')[-1])]
)

# 6) Collapse to binary 'hate' vs. 'nothate'
#    (adjust mapping if your model’s id2label differs)
binary_map = {
    'LABEL_0':    'nothate',
    'LABEL_1': 'hate',
    'LABEL_2':     'hate',
}
df3['predicted_label'] = df3['class_name'].map(binary_map)

# 7) Filter to only rows where both are in {hate, nothate}
valid = ['hate','nothate']
mask = df3['ground_truth'].isin(valid) & df3['predicted_label'].isin(valid)
df_eval = df3.loc[mask, :]

# 8) Compute metrics
y_true = df_eval['ground_truth']
y_pred = df_eval['predicted_label']

print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}\n")
print("Classification Report:")
print(classification_report(y_true, y_pred, labels=valid))
cm = confusion_matrix(y_true, y_pred, labels=valid)
cm_df = pd.DataFrame(
    cm,
    index=[f"true_{l}" for l in valid],
    columns=[f"pred_{l}" for l in valid]
)
print("\nConfusion Matrix:")
print(cm_df)

Accuracy: 0.8765

Classification Report:
              precision    recall  f1-score   support

        hate       0.53      0.77      0.63       261
     nothate       0.96      0.89      0.93      1690

    accuracy                           0.88      1951
   macro avg       0.74      0.83      0.78      1951
weighted avg       0.90      0.88      0.89      1951


Confusion Matrix:
              pred_hate  pred_nothate
true_hate           201            60
true_nothate        181          1509
