In [4]:
import pandas as pd
import json
import os
import ast
import re
import numpy as np
import matplotlib.pyplot as plt
import torch
from time import time
from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score, f1_score,  recall_score, confusion_matrix, precision_score
from transformers import (
    set_seed,
)
set_seed(42)

from unsloth import FastLanguageModel
max_seq_length = 500
from tqdm import tqdm

import pickle

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [5]:
random_state_list = [42, 57, 120, 98, 65, 74]

In [6]:
if os.getcwd() == '/root':
    new_path = "/root/0_Thesis/0_final/"
    os.chdir(new_path)
else:
    os.chdir("..") 
print(os.getcwd())

/root/0_Thesis/0_final


# Section 1: Load Human Label Data

In [8]:
df_evaluation = pd.read_csv("data/human/1_combine_hate_ds.csv")
df_evaluation['len_text'] = df_evaluation['text'].str.len()
df_evaluation = df_evaluation[df_evaluation['len_text'] <= 300]
print(df_evaluation.shape)

(84083, 7)


In [9]:
df_evaluation.dataset.value_counts()

dataset
ViHSD           30571
HateSpeechX     20022
Sexism          13631
GermEval2019    12131
GermEval2021     3457
Covid            2164
US_election      2107
Name: count, dtype: int64

In [10]:
df_evaluation.hate_label_id.value_counts()

hate_label_id
1    58915
0    25168
Name: count, dtype: int64

In [11]:
df_evaluation.language.value_counts()

language
eng    37924
vie    30571
deu    15588
Name: count, dtype: int64

In [12]:
df_evaluation = df_evaluation[df_evaluation['dataset'] == 'HateSpeechX']
df_evaluation['multi_label'] = 3
#df_hate_Discrimination,offensive,hate
df_evaluation.loc[df_evaluation['multi_label_id'] == '1', 'multi_label'] = 1
df_evaluation.loc[df_evaluation['multi_label_id'] == '2', 'multi_label'] = 2
df_evaluation.loc[df_evaluation['multi_label_id'] == '3', 'multi_label'] = 3


df_evaluation = df_evaluation.sample(frac=0.2, random_state=42)
df_evaluation['multi_label'].value_counts()

multi_label
3    1553
1    1367
2    1084
Name: count, dtype: int64

# Section 2: Load Model

In [None]:
model_lists = ["multi.lgb.Llama1B", 'dangdangde/m2.multi.v3.mean.Llama1B', "multi.lgb.Qwen14B", "m2.multi.v2.mean.Qwen14B", ""]
base_model = []

In [45]:
model_id = "dangdangde/" + model_lists[1]

In [46]:
dtype = getattr(torch, "bfloat16")
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_id,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

FastLanguageModel.for_inference(model) 
tokenizer.padding_side = "left"

==((====))==  Unsloth 2025.2.5: Fast Qwen2 patching. Transformers: 4.48.3.
   \\   /|    GPU: NVIDIA RTX A6000. Max memory: 47.529 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29. FA2 = True]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/138M [00:00<?, ?B/s]

Unsloth 2025.2.5 patched 48 layers with 48 QKV layers, 48 O layers and 48 MLP layers.


In [51]:
# Zero Shot
user_message_template = '''You are tasked with annotating speech. Your response must be a single valid number:
1 for Hate Speech.
2 for Offensive Speech.
3 for Normal.

Provide only the number corresponding to the category. Do not include any explanation or additional text or your thinking process.
Do you think the following comment is hate speech or offensive speech?
\n"{comment}"\n
Your Answer:
'''

# Few Shot
user_message_template = '''You are tasked with annotating speech. Your response must be a single valid number:
    1 for Hate Speech.
    2 for Offensive Speech.
    3 for Normal Speech.

    Think step by step:
    - Identify whether the comment contains explicit hate towards a group or individual based on identity characteristics (Hate Speech).
    - If the comment is aggressive, rude, or contains insults but lacks clear hateful intent, classify it as Offensive Speech.
    - If the comment is neutral or non-offensive, classify it as Normal Speech.

    Examples:
    Comment: "People of [group] are a disease and should be removed."
    Your Answer: 1

    Comment: "You're so stupid, just shut up!"
    Your Answer: 2

    Comment: "I hope you have a great day!"
    Your Answer: 3

    Provide only the number corresponding to the category. Do not include any explanation or additional text or your thinking process.
    Do you think the following comment is hate speech or offensive speech?
    \n"{comment}"\n
    Your Answer:
    '''
def preprocess(text):
    user_message_content = user_message_template.format(comment=text)
    user_message = {
        "role": "user",
        "content": user_message_content
    }
    if "Qwen" in model_id:
        system_message =  {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant"}
    else:
        system_message =  {"role": "system", "content": "You are a helpful assistant"}

    if "gemma" in model_id or "gemma" in model_id:
        messages = [user_message]
    else:
        messages = [system_message, user_message]


    messages = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    messages = messages
    if "mistral" in model_id:
        messages += " "

    return messages

df_evaluation["prompt"] = df_evaluation['text'].apply(preprocess)

In [52]:
print(df_evaluation['prompt'].tolist()[0])

<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant<|im_end|>
<|im_start|>user
You are tasked with annotating speech. Your response must be a single valid number:
    1 for Hate Speech.
    2 for Offensive Speech.
    3 for Normal Speech.

    Think step by step:
    - Identify whether the comment contains explicit hate towards a group or individual based on identity characteristics (Hate Speech).
    - If the comment is aggressive, rude, or contains insults but lacks clear hateful intent, classify it as Offensive Speech.
    - If the comment is neutral or non-offensive, classify it as Normal Speech.

    Examples:
    Comment: "People of [group] are a disease and should be removed."
    Your Answer: 1

    Comment: "You're so stupid, just shut up!"
    Your Answer: 2

    Comment: "I hope you have a great day!"
    Your Answer: 3

    Provide only the number corresponding to the category. Do not include any explanation or additional text or your thin

In [49]:
if "Qwen" in model_id:
    stop_token_id = tokenizer(["123"])['input_ids'][0]
elif "Llama" in model_id or "llama1B" in model_id:
    stop_token_id = [16, 17,18]
elif "mistral" in model_id:
    stop_token_id = [29508, 29518, 29538]
    #29549
else:
    stop_token_id = tokenizer(["123"])['input_ids'][0][1:]
assert len(stop_token_id) == 3
    
def process_task(texts):
    encoding = tokenizer(texts, padding=True, return_tensors='pt').to('cuda')
    with torch.no_grad():
        outputs = model(**encoding)
        logits = outputs.logits  
    last_token_logits = logits[:, -1, :] 
    probabilities = torch.softmax(last_token_logits, dim=-1)
    indices = torch.tensor(stop_token_id)
    probs = []
    for i in indices:
        probs.append( probabilities[:, i].float().cpu().numpy())
    return probs

# Section 3: Evaluation

In [53]:
batch_size = 200

multi.lgb.Llama1B

In [None]:
def run_test(df_for_evaluation):
    # batch_size = 50
    counter = 1
    texts = []
    device = 'cuda'

    probs = []
    for i in range(len(stop_token_id)):
        probs.append([])
    futures = []
    for index, value in tqdm(enumerate(df_for_evaluation['prompt'].tolist())):
        texts.append(value)

        if len(texts) % batch_size == 0:
        
            prob_return = process_task(texts)   
            for i2, p in enumerate(probs):
                probs[i2] += prob_return[i2].tolist()
            texts = []

            torch.cuda.empty_cache()
            torch.cuda.synchronize()  

    torch.cuda.empty_cache()
    torch.cuda.synchronize()     
    if len(texts) != 0:
        prob_return = process_task(texts)   
        for i2, p in enumerate(probs):
            probs[i2] += prob_return[i2].tolist()
            
        
    y_true = df_for_evaluation['multi_label']
    data = {}
    for i2, p in enumerate(probs):
        data[f"prob_value_{i2}"] = p

    probabilities = np.array([data["prob_value_0"], data["prob_value_1"], data["prob_value_2"]]).T  
    y_pred = np.argmax(probabilities, axis=1)


    print("ACC: ",accuracy_score(y_true - 1, y_pred))
    print("Precision: ",precision_score(y_true - 1, y_pred, average="macro"))
    print("Recall: ",recall_score(y_true - 1, y_pred, average="macro"))
    print("F1: ",f1_score(y_true - 1, y_pred, average="macro"))

run_test(df_evaluation)


4004it [00:28, 138.89it/s]

ACC:  0.3596403596403596
Precision:  0.305385959761406
Recall:  0.3937718449946895
F1:  0.2901941298580412



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


multi.lgb.Qwen14B

In [None]:
run_test(df_evaluation)

4004it [06:55,  9.64it/s]


ACC:  0.5591908091908092
Precision:  0.610482753544524
Recall:  0.5703250726332509
F1:  0.5634568361373148
