# Config

In [10]:
import_location = 'datasets/emoevent/raw/full_subset/emoevent_crowdsourced_400.csv'
export_location = 'datasets/emoevent/raw/full_subset/ai_labeled/'
export_name = 'emoevent_ai_labeled_wo_others.csv'

datasets = ['EmoEvent']
ai_models = ['Llama3.1 8B instruct-q8', 'DeepSeek-R1 14B', 'GPT 4o-mini', 'o3-mini'] # 'Llama3.1 8B instruct-q8', 'DeepSeek-R1 14B', 'GPT 4o-mini', 'o3-mini'
overwrite_previous_labels = False
deterministic = False

random_seed = 418   #   I'm a teacup

from config_files.dataset_config import dataset as dataset_config
from config_files import gen_ai_config

In [11]:
import pandas as pd
import os
import ollama
from ollama import ResponseError
from openai import OpenAI

# Label Generation Functions

### Build Label Prompt

In [12]:
def build_label_prompt(dataset_id, text, event = None):
    from config_files import prompts

    prompt = prompts.prompt[dataset_id]['labels'].replace('<text>', text)
    prompt = prompt.replace('<context>', prompts.prompt[dataset_id]['context'][event])

    return prompt

### Generate AI Label

In [13]:
def generate_synthetic_label(genAI_details, label_prompt, num_labelers = 1):
    if genAI_details["platform"] == "Ollama":
        response = ollama.chat(
            model=genAI_details["id"],
            messages=[{
                "role": "user",
                "content": label_prompt
            }],
            options={"top_k":1, "temperature":0.0}   # Greedy
        )
        return response["message"]["content"]


    if genAI_details["platform"] == "OpenAI":
        client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))

        if genAI_details['type'] == 'CoT':
            response = client.chat.completions.create(
                    model=genAI_details["id"],
                    messages = [{
                        "role": "user",
                        "content": label_prompt,
                    }],
                    n=num_labelers      # Get 3 responses simultaneously
                )
            response_text = []
            for choice in response.choices:
                response_text.append(choice.message.content)

        else:
            response = client.chat.completions.create(
                model=genAI_details["id"],
                messages = [{
                    "role": "user",
                    "content": label_prompt,
                }],
                temperature=0.0 # Greedy
            )
            response_text = response.choices[0].message.content

        return response_text

### Parse Label Response

In [14]:
def parse_label_response(response, dataset_details, cot_terminator = None):
    if cot_terminator:      #   Remove the reasoning layer text for Chain-of-Thought models
        response.find(cot_terminator)
        response_start = response.find(cot_terminator)+len(cot_terminator)
        response = response[response_start:]

    #print(response)

    # Find the label that occurs first.
    min_index = float('inf')
    first_found_label = None
    for label in dataset_details["all_labels"]:
        index = response.lower().find(label)
        if index != -1:
            if index < min_index:
                min_index = index
                first_found_label = label

    if first_found_label is None:
        raise ResponseError("Label not found:\n%s", response)

    return first_found_label


### Label Record

In [15]:
def get_label(dataset_details, gen_ai_details, text, event=None):
    label_prompt = build_label_prompt(dataset_details['id'], text, event)

    response = generate_synthetic_label(gen_ai_details, label_prompt, dataset_details['min_labelers'])
    print(response)

    if type(response) == str:
        label = parse_label_response(response, dataset_details, gen_ai_details['terminator'])
        print(f"--------------------------------------------- Label: {label} ---------------------------------------------\n ")
        return label, [label], [response]

    elif type(response) == list:
        response_list = response
        labels = []

        for response in response_list:
            label = parse_label_response(response, dataset_details, gen_ai_details['terminator'])
            labels.append(label)

        consensus_label = dataset_details['unlabeled_label']
        for potential_label in dataset_details["all_labels"]:
            if labels.count(potential_label) >= dataset_details["num_consensus"]:
                consensus_label = potential_label

        print(f"CONSENSUS LABEL: {consensus_label}")
        return consensus_label, labels, response

# Main Loop

In [16]:
for dataset in datasets:
    dataset_details = dataset_config[dataset]        #   Get dataset info

    try:
        df_dataset = pd.read_csv(import_location)    #  Load dataset
    except FileNotFoundError:
        df_dataset = pd.DataFrame()

    for ai_model in ai_models:
        ai_details = gen_ai_config.model[ai_model]   #   Get model info

        try:
            df_dataset.insert(len(df_dataset.columns), ai_details['id'], '')    #   Create a new column or clear old data for the AI generated labels

        except ValueError:
            if overwrite_previous_labels is True:
                df_dataset[ai_details['id']] = ''

        if ai_model == 'DeepSeek-R1 14B':
            os.makedirs(os.path.join(export_location, 'ai_responses', ai_model), exist_ok=True)

        for df_index, row in df_dataset.iterrows():
            print(f'{ai_model} -> {df_index}')
            successful = False

            while not successful:
                try:
                    if 'context' in df_dataset.columns:
                        label, labels, responses = get_label(dataset_details, ai_details, row.text, row.context)
                    else:
                        label, labels, responses = get_label(dataset_details, ai_details, row.text)

                except ResponseError:  #   Recover from a CUDA illegal memory access error
                    from time import sleep
                    sleep(5)

                else:
                    df_dataset.at[df_index, ai_details['id']] = label

                    if ai_model == 'DeepSeek-R1 14B':
                        with open(os.path.join(export_location,'ai_responses', ai_model, f"{dataset_details['id']}_{df_index}.txt"), 'w', encoding="utf-8") as file:
                            file.write(f"Record: \"{df_dataset['text'].iloc[df_index]}\"\n{ai_model}\n{'-'*120}\n\n")
                            for response_index, line in enumerate(responses):
                                file.write(f"Response {response_index+1}:\n{line}\n{'-'*120}\n\n")

                    successful = True

        display(df_dataset)
        df_dataset.to_csv(os.path.join(export_location, export_name), index_label=False)


Llama3.1 8B instruct-q8 -> 0
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
Llama3.1 8B instruct-q8 -> 1
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
Llama3.1 8B instruct-q8 -> 2
sadness
--------------------------------------------- Label: sadness ---------------------------------------------
 
Llama3.1 8B instruct-q8 -> 3
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
Llama3.1 8B instruct-q8 -> 4
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
Llama3.1 8B instruct-q8 -> 5
fear
--------------------------------------------- Label: fear ---------------------------------------------
 
Llama3.1 8B instruct-q8 -> 6
surprise
--------------------------------------------- Label: surprise ---------------------------------------------
 
Llama3.1 8

Unnamed: 0,original index,context,text,labels,llama3.1:8b-instruct-q8_0
0,4183,ChampionsLeague,Love that Jurgen just told the boys to smile a...,joy,joy
1,2733,WorldBookDay,#WorldBookDay To some amazing books that have ...,joy,joy
2,197,NotreDame,I know it’s been over 24 hours and for some re...,sadness,sadness
3,3519,GretaThunberg,Educating for Extinction. (climate change as a...,joy,anger
4,912,ChampionsLeague,I hope Salah scores a hat trick in the second ...,joy,joy
...,...,...,...,...,...
395,3956,GameOfThrones,"I’ve watched episode 3, the battle for winterf...",joy,joy
396,5607,WorldBookDay,"""I do believe something very magical can happe...",joy,joy
397,1635,NotreDame,What we know so far about the #NotreDameCathed...,fear,anger
398,335,ChampionsLeague,Year 2030 Me: Anyone remember 2019 when Liverp...,disgust,anger


DeepSeek-R1 14B -> 0
<think>
Alright, let's tackle this tweet analysis step by step. The user wants me to determine the primary emotion conveyed in the tweet using one of the specified labels: anger, disgust, fear, joy, sadness, or surprise.

First, I'll read the tweet carefully: "Love that Jurgen just told the boys to smile and play. Amazing managing in a tough situation. #BAR #Championsleague #Liverpool." 

The key words here are "love," "amazing," and the overall tone seems positive. The author is expressing admiration for Jurgen's actions, which suggests joy or possibly surprise.

Looking at the context provided, it's about the UEFA Champions League, a prestigious football competition. Jurgen likely refers to Jurgen Klopp, the manager of Liverpool FC. The tweet praises his leadership during a tough match, using words like "love" and "amazing," which are strong positive indicators.

Now, mapping this to the guidelines: joy is associated with serenity or ecstasy, while surprise relat

Unnamed: 0,original index,context,text,labels,llama3.1:8b-instruct-q8_0,deepseek-r1:14b
0,4183,ChampionsLeague,Love that Jurgen just told the boys to smile a...,joy,joy,joy
1,2733,WorldBookDay,#WorldBookDay To some amazing books that have ...,joy,joy,joy
2,197,NotreDame,I know it’s been over 24 hours and for some re...,sadness,sadness,sadness
3,3519,GretaThunberg,Educating for Extinction. (climate change as a...,joy,anger,sadness
4,912,ChampionsLeague,I hope Salah scores a hat trick in the second ...,joy,joy,joy
...,...,...,...,...,...,...
395,3956,GameOfThrones,"I’ve watched episode 3, the battle for winterf...",joy,joy,joy
396,5607,WorldBookDay,"""I do believe something very magical can happe...",joy,joy,joy
397,1635,NotreDame,What we know so far about the #NotreDameCathed...,fear,anger,anger
398,335,ChampionsLeague,Year 2030 Me: Anyone remember 2019 when Liverp...,disgust,anger,joy


GPT 4o-mini -> 0
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
GPT 4o-mini -> 1
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
GPT 4o-mini -> 2
sadness
--------------------------------------------- Label: sadness ---------------------------------------------
 
GPT 4o-mini -> 3
disgust
--------------------------------------------- Label: disgust ---------------------------------------------
 
GPT 4o-mini -> 4
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
GPT 4o-mini -> 5
fear
--------------------------------------------- Label: fear ---------------------------------------------
 
GPT 4o-mini -> 6
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
GPT 4o-mini -> 7
joy
--------------------------------------------- Label: joy ----------------------

Unnamed: 0,original index,context,text,labels,llama3.1:8b-instruct-q8_0,deepseek-r1:14b,gpt-4o-mini
0,4183,ChampionsLeague,Love that Jurgen just told the boys to smile a...,joy,joy,joy,joy
1,2733,WorldBookDay,#WorldBookDay To some amazing books that have ...,joy,joy,joy,joy
2,197,NotreDame,I know it’s been over 24 hours and for some re...,sadness,sadness,sadness,sadness
3,3519,GretaThunberg,Educating for Extinction. (climate change as a...,joy,anger,sadness,disgust
4,912,ChampionsLeague,I hope Salah scores a hat trick in the second ...,joy,joy,joy,joy
...,...,...,...,...,...,...,...
395,3956,GameOfThrones,"I’ve watched episode 3, the battle for winterf...",joy,joy,joy,joy
396,5607,WorldBookDay,"""I do believe something very magical can happe...",joy,joy,joy,joy
397,1635,NotreDame,What we know so far about the #NotreDameCathed...,fear,anger,anger,anger
398,335,ChampionsLeague,Year 2030 Me: Anyone remember 2019 when Liverp...,disgust,anger,joy,sadness


o3-mini -> 0
['joy', 'joy', 'joy']
CONSENSUS LABEL: joy
o3-mini -> 1
['joy', 'joy', 'joy']
CONSENSUS LABEL: joy
o3-mini -> 2
['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness
o3-mini -> 3
['anger', 'anger', 'anger']
CONSENSUS LABEL: anger
o3-mini -> 4
['joy', 'joy', 'joy']
CONSENSUS LABEL: joy
o3-mini -> 5
['fear', 'fear', 'fear']
CONSENSUS LABEL: fear
o3-mini -> 6
['joy', 'joy', 'joy']
CONSENSUS LABEL: joy
o3-mini -> 7
['joy', 'joy', 'joy']
CONSENSUS LABEL: joy
o3-mini -> 8
['joy', 'joy', 'joy']
CONSENSUS LABEL: joy
o3-mini -> 9
['joy', 'joy', 'joy']
CONSENSUS LABEL: joy
o3-mini -> 10
['anger', 'anger', 'anger']
CONSENSUS LABEL: anger
o3-mini -> 11
['joy', 'joy', 'joy']
CONSENSUS LABEL: joy
o3-mini -> 12
['disgust', 'disgust', 'disgust']
CONSENSUS LABEL: disgust
o3-mini -> 13
['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness
o3-mini -> 14
['anger', 'anger', 'anger']
CONSENSUS LABEL: anger
o3-mini -> 15
['anger', 'anger', 'anger']
CONSENSUS LABEL: anger
o3-mini -> 16


Unnamed: 0,original index,context,text,labels,llama3.1:8b-instruct-q8_0,deepseek-r1:14b,gpt-4o-mini,o3-mini
0,4183,ChampionsLeague,Love that Jurgen just told the boys to smile a...,joy,joy,joy,joy,joy
1,2733,WorldBookDay,#WorldBookDay To some amazing books that have ...,joy,joy,joy,joy,joy
2,197,NotreDame,I know it’s been over 24 hours and for some re...,sadness,sadness,sadness,sadness,sadness
3,3519,GretaThunberg,Educating for Extinction. (climate change as a...,joy,anger,sadness,disgust,anger
4,912,ChampionsLeague,I hope Salah scores a hat trick in the second ...,joy,joy,joy,joy,joy
...,...,...,...,...,...,...,...,...
395,3956,GameOfThrones,"I’ve watched episode 3, the battle for winterf...",joy,joy,joy,joy,joy
396,5607,WorldBookDay,"""I do believe something very magical can happe...",joy,joy,joy,joy,joy
397,1635,NotreDame,What we know so far about the #NotreDameCathed...,fear,anger,anger,anger,anger
398,335,ChampionsLeague,Year 2030 Me: Anyone remember 2019 when Liverp...,disgust,anger,joy,sadness,joy


# Testing

for dataset in datasets:
    dataset_details = dataset_config[dataset]               #   Get dataset info
    df_dataset = pd.read_csv(dataset_details['relpath'])    #  Load dataset

    for ai_model in ['DeepSeek-R1 14B']:
        ai_details = gen_ai_config.model[ai_model]                          #   Get model info
        df_dataset.insert(len(df_dataset.columns), ai_details['id'], '')    #   Create a new column or clear old data for the AI generated labels
        for row in df_dataset.itertuples():
            print(get_label(dataset_details, ai_details, row.text, row.event)[1])
        #if 'event' in df_dataset.columns:
        #    df_dataset[ai_details['id']] = df_dataset.apply(lambda row: get_label(dataset_details, ai_details, row.text, row.event)[1], axis=1)
        #else:
        #    df_dataset[ai_details['id']] = df_dataset.apply(lambda row: get_label(dataset_details, ai_details, row['text'])[1])
#



In [17]:
df_dataset.to_csv(os.path.join(export_location, export_name), index_label=False)

df_dataset

Unnamed: 0,original index,context,text,labels,llama3.1:8b-instruct-q8_0,deepseek-r1:14b,gpt-4o-mini,o3-mini
0,4183,ChampionsLeague,Love that Jurgen just told the boys to smile a...,joy,joy,joy,joy,joy
1,2733,WorldBookDay,#WorldBookDay To some amazing books that have ...,joy,joy,joy,joy,joy
2,197,NotreDame,I know it’s been over 24 hours and for some re...,sadness,sadness,sadness,sadness,sadness
3,3519,GretaThunberg,Educating for Extinction. (climate change as a...,joy,anger,sadness,disgust,anger
4,912,ChampionsLeague,I hope Salah scores a hat trick in the second ...,joy,joy,joy,joy,joy
...,...,...,...,...,...,...,...,...
395,3956,GameOfThrones,"I’ve watched episode 3, the battle for winterf...",joy,joy,joy,joy,joy
396,5607,WorldBookDay,"""I do believe something very magical can happe...",joy,joy,joy,joy,joy
397,1635,NotreDame,What we know so far about the #NotreDameCathed...,fear,anger,anger,anger,anger
398,335,ChampionsLeague,Year 2030 Me: Anyone remember 2019 when Liverp...,disgust,anger,joy,sadness,joy


In [18]:
df_dataset.at[89, 'deepseek-r1:14b'] = 'other'