In [22]:
import pandas as pd
from pathlib import Path
from tqdm import tqdm

In [23]:
dirpath = Path("../data")
dataset = "docanno"
filepath = dirpath / dataset / "test.csv"
df = pd.read_csv(filepath)
df.shape

(16306, 27)

In [24]:
def get_columns(dataset):
    if dataset == "go_emo":
        return sorted([
            'admiration','amusement', 'anger', 'annoyance', 'approval', 'caring',
            'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval',
            'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief',
            'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief',
            'remorse', 'sadness', 'surprise', 'neutral'
        ]), "rater_id", "text"
    if dataset == "unhealthy":
        return sorted([
            "antagonize", "condescending" , "dismissive", "generalisation",
            "generalisation_unfair", "healthy", "hostile", "sarcastic"
        ]), "_worker_id", "comment"
    if dataset == "docanno":
        return sorted([
            'inspiring', 'interesting', 'offensive_to_someone', 'negative',
            'offensive_to_me', 'political', 'positive', 'sadness', 'calm',
            'fear', 'compassion', 'disgust', 'vulgar', 'surprise', 'embarrasing',
            'anger', 'understandable', 'ironic', 'need_more_information',
            'happiness', 'delight', 'funny_to_someone', 'funny_to_me'
        ]), "user_id", "text"
    if dataset == "aggression":
        return sorted([
            "aggression"
        ]), "worker_id", "comment"
    return None

In [25]:
label_columns, annotator_column, text_column = get_columns(dataset)
num_shots = 2

In [26]:
def parse_annotation_to_text(sample):
    if dataset == "aggression":
        response = "true" if sample[label_columns[0]] == 1 else "false"
    else:
        response = ", ".join([label for label in label_columns if sample[label] == 1])
    return response

In [27]:
def get_examples(df, sample, num_shots=2):
    annotator_texts = df[df[annotator_column] == sample[annotator_column]].drop(sample["index"])
    if annotator_texts.shape[0] < num_shots:
        examples = annotator_texts.sample(frac=1)
    else:
        examples = annotator_texts.sample(num_shots)
    extracted_return = []
    for idx, ex in examples.iterrows():
        extracted_return.append(ex[text_column])
        extracted_return.append(parse_annotation_to_text(ex))
    while len(extracted_return) < 2*num_shots:
        extracted_return.append(None)
    return extracted_return

In [28]:
tqdm.pandas()
new_columns = sum([[f"example{i+1}", f"example{i+1}_response"] for i in range(num_shots)], start=[])
df[new_columns] = df.reset_index().progress_apply(lambda sample: get_examples(df, sample, 2), axis=1, result_type="expand")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16306/16306 [00:09<00:00, 1694.28it/s]


In [29]:
df.head()

Unnamed: 0,text_id,user_id,inspiring,interesting,offensive_to_someone,negative,offensive_to_me,political,positive,sadness,...,happiness,delight,funny_to_someone,funny_to_me,fold,text,example1,example1_response,example2,example2_response
0,30514,38,1,1,1,1,1,1,1,1,...,1,1,0,0,7,Dlaczego cały czas wypowiadają się ci sami cel...,Na ciężki przebieg są najbardziej narażone oso...,"anger, calm, compassion, delight, disgust, emb...",Jak trudno jest obecnemu kierownictwu najwieks...,"anger, calm, compassion, delight, disgust, emb..."
1,30514,45,1,1,1,1,0,1,1,1,...,1,1,0,0,7,Dlaczego cały czas wypowiadają się ci sami cel...,KOD dał 'wyraz' demokracji . Ludzie a wy ich p...,"anger, calm, compassion, delight, disgust, emb...",Jeżeli szczepionka nie chroni przed zachorowan...,"calm, compassion, delight, embarrasing, happin..."
2,30515,38,1,1,1,1,1,1,1,1,...,1,1,0,0,6,"Byla na prawde ladna kobieta,ale po tych wszys...","To nie hejt co napisały te panie, ludzie! Nie ...","anger, calm, compassion, delight, disgust, emb...",Emocje wokół stanowiska selekcjonera kadry cor...,"anger, calm, compassion, disgust, fear, funny_..."
3,30515,45,1,1,1,1,0,1,1,1,...,1,1,0,0,6,"Byla na prawde ladna kobieta,ale po tych wszys...",I tak urzędnicza PiSlandia się zabawia kosztem...,"anger, calm, compassion, delight, disgust, fea...","No cóż, promowana na siłe Anna Karwan promuje ...","anger, calm, compassion, delight, disgust, fea..."
4,30516,16,1,1,1,1,1,1,1,1,...,1,1,1,1,4,A teczka Kaczyńskich w cudowny sposób gdzieś z...,Ci ludzie naprawde odnalezli milosc a smaszczo...,"anger, calm, compassion, delight, disgust, emb...","Koniecznie napiszcie o Agencie Tomku, zwierzęc...","anger, calm, compassion, delight, disgust, emb..."


In [30]:
df.to_csv(filepath, index=False)