In [9]:
import transformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

import numpy as np
import pandas as pd
import sklearn.metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from glob import glob

import torch
from huggingface_hub import login
import os
import random
from tqdm import tqdm
device = "cuda"
login(token='HF_TOKEN')

In [None]:
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2").to(device)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

In [None]:
D_voiced = pd.read_parquet("/content/drive/MyDrive/Sujan_Dutta/LLM Noise Audit/EMNLP 2024/Share/voiced_complete.parquet")
cat2bin = {'Extremely offensive': 1,
            'Very offensive': 1,
            'Moderately offensive': 1,
            'Slightly offensive': 0,
            'Not at all offensive': 0}

def map_label(x):
    if x==-1: return -1
    if pd.isna(x): return -1
    return cat2bin[x.strip()]

D_voiced["PERSON_TOXIC"] = D_voiced["PERSON_TOXIC_raw"].apply(map_label)

In [None]:
def prepare_annotator_group_data(data, pol_grp):
    pol_comm = data[["comment_id", "comment_text", "annotator_id", "annotator_political", "PERSON_TOXIC"]]
    pol_comm = pol_comm[pol_comm["annotator_political"]==pol_grp]
    pol_comm = pol_comm.dropna()
    pol_comm["PERSON_TOXIC"] = pol_comm["PERSON_TOXIC"].astype('int')
    pol_comm = pol_comm[pol_comm["PERSON_TOXIC"]!=-1]

    annotators = list(pol_comm["annotator_id"].unique())
    re_n_shot_data = pd.DataFrame()
    n_shot = 10
    for ann in annotators:
        _df = pol_comm[pol_comm["annotator_id"]==ann]
        train = _df.sample(n=n_shot)
        test = _df[~_df.comment_id.isin(train.comment_id)]
        train_test = pd.concat([train, test])
        train_test["is_train"] = [1]*n_shot+[0]*(len(train_test)-n_shot)
        re_n_shot_data = pd.concat([re_n_shot_data, train_test])

    re_n_shot_data.to_csv(f"{pol_grp}_annotator_data.csv", index=False)

In [8]:
bin2label = {1: "offensive", 0: "non-offensive"}

def predict_offense(df, text):
    comments = df["comment_text"]
    labels = df["PERSON_TOXIC"]
    prompt = f"""
You are an expert in guessing my response against a social media comment. Your task is to analyze and predict \
my response against the comment after <<<>>> into one of the following predefined categories:

offensive
non-offensive

You will only respond with the predefined category. Do not provide explanations or notes.

####
Here are some examples:

"""

    examples = ""
    for i, (comment, label) in enumerate(zip(comments, labels)):
        examples += f"Comment: {comment}\nResponse: {bin2label[label]}\n"

    prompt += examples
    prompt += f"""###

<<<
Comment: {text}
>>>
"""

    messages = []
    messages.append({"role": "user", "content": prompt})
    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
    generated_ids = model.generate(encodeds, max_new_tokens=10, do_sample=False, eos_token_id=tokenizer.eos_token_id)
    decoded = tokenizer.batch_decode(generated_ids)
    return decoded[0].split("[/INST]")[1].rstrip("</s>").strip()

In [None]:
def model_annotators(pol_grp):
    n_shot_data = pd.read_csv(f"{pol_grp}_annotator_data.csv")
    predictors = n_shot_data["annotator_id"].unique()

    for ann in tqdm(predictors[:20]):
        ann_data = n_shot_data[n_shot_data["annotator_id"]==ann]
        train_data = ann_data[ann_data["is_train"]==1].sample(frac=1)
        test_data = ann_data[ann_data["is_train"]==0]
        test_data = test_data.sample(n=min(20,len(test_data)))

        test_data["prediction"] = test_data["comment_text"].apply(lambda x: predict_offense(train_data, x))
        test_data.to_csv(f"{pol_grp}_pred_{ann}.csv", index=False)

In [None]:
def label2bin(text):
    if text.lower().startswith("off"): return 1
    elif text.lower().startswith("non"): return 0
    else: return -1

def compute_score(pol_grp):
    all_df = pd.DataFrame()
    for file in glob(f"{pol_grp}_pred_*.csv"):
        df = pd.read_csv(file)
        all_df = pd.concat([all_df, df])

    all_df["prediction"] = all_df["prediction"].apply(label2bin)
    result_arr = []
    predictors = all_df["annotator_id"].unique()
    for ann in predictors:
        ann_data = all_df[(all_df["annotator_id"]==ann) & (all_df["prediction"]!=-1)]
        f1 = f1_score(ann_data["PERSON_TOXIC"], ann_data["prediction"], average="macro")
        result_arr.append([ann, f1])

    result_df = pd.DataFrame(result_arr, columns=["annotator_id", "f1"])
    result_df.to_csv(f"{pol_grp}_annotator_score.csv", index=False)

In [None]:
group = "Democrat" ## "Republican", "Independent"
prepare_annotator_group_data(D_voiced, group)
model_annotators(group)
compute_score(group)