# Config

In [1]:
import_location = 'datasets/emoevent/raw/full_subset/emoevent_crowdsourced_400.csv'
export_location = 'datasets/emoevent/new/full_subset/ai_labeled/'
export_name = 'emoevent_ai_labeled_400.csv'

datasets = ['EmoEvent']
ai_models = ['Llama3.1', 'DeepSeek', 'GPT 4o-mini', 'o3-mini'] # 'Llama3.1 8B instruct-q8', 'DeepSeek-R1 14B', 'GPT 4o-mini', 'o3-mini'
overwrite_previous_labels = True
deterministic = False

random_seed = 418   #   I'm a teacup

# Number of concurrent API calls
MAX_CONCURRENT_CALLS = 50
BATCH_SIZE = 50
# Semaphore to limit concurrent API calls
semaphore = None  # Will be initialized in main

from config_files.dataset_config import dataset as dataset_config
from config_files import gen_ai_config

In [2]:
import pandas as pd
import os
import ollama
from ollama import ResponseError
from openai import AsyncOpenAI, APIConnectionError
import asyncio
import tqdm
import nest_asyncio
nest_asyncio.apply()  # This allows asyncio to work in Jupyter

# Label Generation Functions

### Build Label Prompt

In [3]:
def build_label_prompt(dataset_id, text, event=None):
    from config_files import prompts

    prompt = prompts.prompt[dataset_id]['labels'].replace('<text>', text)
    prompt = prompt.replace('<context>', prompts.prompt[dataset_id]['context'][event])

    return prompt

### Generate AI Label

In [4]:
def generate_synthetic_label(genAI_details, label_prompt, num_labelers=1):
    """Synchronous version for Ollama"""
    if genAI_details["platform"] == "Ollama":
        response = ollama.chat(
            model=genAI_details["id"],
            messages=[{
                "role": "user",
                "content": label_prompt
            }],
            options={"top_k": 1, "temperature": 0.0}   # Greedy
        )
        return response["message"]["content"]

    # OpenAI calls should use the async version instead
    raise ValueError("For OpenAI, use the async version of this function")


async def generate_synthetic_label_async(genAI_details, label_prompt, num_labelers=1):
    """Asynchronous version for OpenAI"""
    if genAI_details["platform"] == "OpenAI":
        async with semaphore:  # Limit concurrent calls
            client = AsyncOpenAI(api_key=os.environ.get('OPENAI_API_KEY'))

            try:
                if genAI_details['type'] == 'CoT':
                    response = await client.chat.completions.create(
                        model=genAI_details["id"],
                        messages=[{
                            "role": "user",
                            "content": label_prompt,
                        }],
                        n=num_labelers
                    )
                    response_text = []
                    for choice in response.choices:
                        response_text.append(choice.message.content)

                else:
                    response = await client.chat.completions.create(
                        model=genAI_details["id"],
                        messages=[{
                            "role": "user",
                            "content": label_prompt,
                        }],
                        temperature=0.0  # Greedy
                    )
                    response_text = response.choices[0].message.content

                return response_text
            except APIConnectionError:
                # Add exponential backoff retry logic
                for attempt in range(5):  # Try 5 times
                    wait_time = 2 ** attempt  # Exponential backoff: 1, 2, 4, 8, 16 seconds
                    print(f"API Connection Error. Retrying in {wait_time} seconds... (Attempt {attempt+1}/5)")
                    await asyncio.sleep(wait_time)

                    try:
                        if genAI_details['type'] == 'CoT':
                            response = await client.chat.completions.create(
                                model=genAI_details["id"],
                                messages=[{
                                    "role": "user",
                                    "content": label_prompt,
                                }],
                                n=num_labelers
                            )
                            response_text = []
                            for choice in response.choices:
                                response_text.append(choice.message.content)
                        else:
                            response = await client.chat.completions.create(
                                model=genAI_details["id"],
                                messages=[{
                                    "role": "user",
                                    "content": label_prompt,
                                }],
                                temperature=0.0
                            )
                            response_text = response.choices[0].message.content

                        return response_text
                    except APIConnectionError:
                        if attempt == 4:  # Last attempt
                            raise  # Re-raise the exception if all retries failed

    else:
        # Use the synchronous version for non-OpenAI platforms
        return generate_synthetic_label(genAI_details, label_prompt, num_labelers)

### Parse Label Response

In [5]:
def parse_label_response(response, dataset_details, cot_terminator=None):
    if cot_terminator:      #   Remove the reasoning layer text for Chain-of-Thought models
        response.find(cot_terminator)
        response_start = response.find(cot_terminator) + len(cot_terminator)
        response = response[response_start:]

    # Find the label that occurs first.
    min_index = float('inf')
    first_found_label = None
    for label in dataset_details["all_labels"]:
        index = response.lower().find(label)
        if index != -1:
            if index < min_index:
                min_index = index
                first_found_label = label

    if first_found_label is None:
        raise ResponseError(f"Label not found:\n{response}")

    return first_found_label

### Label Record

In [6]:
async def get_label_async(dataset_details, gen_ai_details, text, event=None):
    label_prompt = build_label_prompt(dataset_details['id'], text, event)

    if gen_ai_details["platform"] == "OpenAI":
        response = await generate_synthetic_label_async(gen_ai_details, label_prompt, dataset_details['min_labelers'])
    else:
        # For non-async platforms like Ollama
        response = generate_synthetic_label(gen_ai_details, label_prompt, dataset_details['min_labelers'])

    print(response)

    if isinstance(response, str):
        label = parse_label_response(response, dataset_details, gen_ai_details['terminator'])
        print(f"--------------------------------------------- Label: {label} ---------------------------------------------\n ")
        return label, [label], [response]

    elif isinstance(response, list):
        response_list = response
        labels = []

        for resp in response_list:
            label = parse_label_response(resp, dataset_details, gen_ai_details['terminator'])
            labels.append(label)

        consensus_label = dataset_details['unlabeled_label']
        for potential_label in dataset_details["all_labels"]:
            if labels.count(potential_label) >= dataset_details["num_consensus"]:
                consensus_label = potential_label

        print(f"CONSENSUS LABEL: {consensus_label}\n")
        return consensus_label, labels, response

In [7]:
async def process_batch(df_batch, dataset_details, ai_details, context_column=None):
    """Process a batch of rows asynchronously"""
    tasks = []
    for df_index, row in df_batch.iterrows():
        if context_column and context_column in df_batch.columns:
            tasks.append(get_label_async(dataset_details, ai_details, row.text, row[context_column]))
        else:
            tasks.append(get_label_async(dataset_details, ai_details, row.text))

    results = await asyncio.gather(*tasks, return_exceptions=True)

    # Process results
    processed_results = []
    for i, result in enumerate(results):
        if isinstance(result, Exception):
            print(f"Error processing row {df_batch.index[i]}: {str(result)}")
            processed_results.append((None, None, None))  # Placeholder for error
        else:
            processed_results.append(result)

    return processed_results




In [8]:
async def process_dataset_async(df_dataset, dataset_details, ai_details, batch_size):
    """Process the entire dataset in batches"""
    results = {}

    # Split dataframe into batches
    total_rows = len(df_dataset)
    num_batches = (total_rows + batch_size - 1) // batch_size  # Ceiling division

    with tqdm.tqdm(total=total_rows, desc=f"Processing with {ai_details['id']}") as pbar:
        for i in range(0, total_rows, batch_size):
            batch_end = min(i + batch_size, total_rows)
            df_batch = df_dataset.iloc[i:batch_end]

            batch_results = await process_batch(
                df_batch,
                dataset_details,
                ai_details,
                'context' if 'context' in df_dataset.columns else None
            )

            # Store results
            for j, (label, labels, responses) in enumerate(batch_results):
                if label is not None:  # Skip errors
                    idx = df_batch.index[j]
                    results[idx] = (label, labels, responses)

            pbar.update(len(df_batch))

    return results

# Main Loop

In [9]:
async def main_async():
    global semaphore
    semaphore = asyncio.Semaphore(MAX_CONCURRENT_CALLS)

    for dataset in datasets:
        dataset_details = dataset_config[dataset]  # Get dataset info

        try:
            df_dataset = pd.read_csv(import_location)  # Load dataset
        except FileNotFoundError:
            df_dataset = pd.DataFrame()
            continue  # Skip to next dataset if this one doesn't exist

        for ai_model in ai_models:
            ai_details = gen_ai_config.model[ai_model]  # Get model info

            try:
                df_dataset.insert(len(df_dataset.columns), ai_details['id'], '')  # Create a new column
            except ValueError:
                if overwrite_previous_labels is True:
                    df_dataset[ai_details['id']] = ''

            if not os.path.exists(export_location):
                os.makedirs(export_location)

            if ai_model == 'DeepSeek-R1 14B':
                os.makedirs(os.path.join(export_location, 'ai_responses', ai_model), exist_ok=True)

            # Process using async if OpenAI, otherwise use synchronous approach
            if ai_details["platform"] == "OpenAI":
                print(f"Processing {ai_model} asynchronously...")

                results = await process_dataset_async(df_dataset, dataset_details, ai_details, BATCH_SIZE)

                # Update the dataframe with results
                for idx, (label, labels, responses) in results.items():
                    df_dataset.at[idx, ai_details['id']] = label

                    # Save responses if needed
                    if ai_model == 'DeepSeek-R1 14B':
                        with open(os.path.join(export_location, 'ai_responses', ai_model,
                                              f"{dataset_details['id']}_{idx}.txt"), 'w', encoding="utf-8") as file:
                            file.write(f"Record: \"{df_dataset['text'].iloc[idx]}\"\n{ai_model}\n{'-'*120}\n\n")
                            for response_index, line in enumerate(responses):
                                file.write(f"Response {response_index+1}:\n{line}\n{'-'*120}\n\n")

            else:
                # Original synchronous approach for non-OpenAI models
                print(f"Processing {ai_model} synchronously...")
                for df_index, row in df_dataset.iterrows():
                    print(f'{ai_model} -> {df_index}')
                    successful = False

                    while not successful:
                        try:
                            if 'context' in df_dataset.columns:
                                label, labels, responses = await get_label_async(dataset_details, ai_details, row.text, row.context)
                            else:
                                label, labels, responses = await get_label_async(dataset_details, ai_details, row.text)

                        except ResponseError:  # Recover from a CUDA illegal memory access error
                            print("Recovering from CUDA illegal memory access error...")
                            await asyncio.sleep(5)

                        except APIConnectionError:
                            for i in range(10, 1, -1):
                                print(f"Lost connection to API. Retrying in {i} seconds...", end='\r', flush=True)
                                await asyncio.sleep(1)

                        else:
                            df_dataset.at[df_index, ai_details['id']] = label

                            if ai_model == 'DeepSeek-R1 14B':
                                with open(os.path.join(export_location, 'ai_responses', ai_model,
                                                      f"{dataset_details['id']}_{df_index}.txt"), 'w', encoding="utf-8") as file:
                                    file.write(f"Record: \"{df_dataset['text'].iloc[df_index]}\"\n{ai_model}\n{'-'*120}\n\n")
                                    for response_index, line in enumerate(responses):
                                        file.write(f"Response {response_index+1}:\n{line}\n{'-'*120}\n\n")

                            successful = True

            df_dataset.to_csv(os.path.join(export_location, export_name), index=False)

    print("Finished processing all datasets and models.")
    df_dataset.to_csv(os.path.join(export_location, export_name), index_label=False)

    df_dataset


# Testing

for dataset in datasets:
    dataset_details = dataset_config[dataset]               #   Get dataset info
    df_dataset = pd.read_csv(dataset_details['relpath'])    #  Load dataset

    for ai_model in ['DeepSeek-R1 14B']:
        ai_details = gen_ai_config.model[ai_model]                          #   Get model info
        df_dataset.insert(len(df_dataset.columns), ai_details['id'], '')    #   Create a new column or clear old data for the AI generated labels
        for row in df_dataset.itertuples():
            print(get_label(dataset_details, ai_details, row.text, row.event)[1])
        #if 'event' in df_dataset.columns:
        #    df_dataset[ai_details['id']] = df_dataset.apply(lambda row: get_label(dataset_details, ai_details, row.text, row.event)[1], axis=1)
        #else:
        #    df_dataset[ai_details['id']] = df_dataset.apply(lambda row: get_label(dataset_details, ai_details, row['text'])[1])
#



In [10]:
# For running an asyncio event loop within Jupyter Notebook's event loop
import nest_asyncio
nest_asyncio.apply()

# Run the thing
await main_async()

Processing Llama3.1 synchronously...
Llama3.1 -> 0
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
Llama3.1 -> 1
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
Llama3.1 -> 2
sadness
--------------------------------------------- Label: sadness ---------------------------------------------
 
Llama3.1 -> 3
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
Llama3.1 -> 4
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
Llama3.1 -> 5
fear
--------------------------------------------- Label: fear ---------------------------------------------
 
Llama3.1 -> 6
others
--------------------------------------------- Label: other ---------------------------------------------
 
Llama3.1 -> 7
joy
--------------------------------------------- Label: joy --------

Processing with gpt-4o-mini:   0%|          | 0/400 [00:00<?, ?it/s]

others
--------------------------------------------- Label: other ---------------------------------------------
 
sadness
--------------------------------------------- Label: sadness ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
others
--------------------------------------------- Label: other ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------

Processing with gpt-4o-mini:  12%|█▎        | 50/400 [00:19<02:17,  2.55it/s]

joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
sadness
--------------------------------------------- Label: sadness -------------------------------------------

Processing with gpt-4o-mini:  25%|██▌       | 100/400 [00:38<01:53,  2.64it/s]

joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
sadness
--------------------------------------------- Label: sadness ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
-------

Processing with gpt-4o-mini:  38%|███▊      | 150/400 [00:55<01:31,  2.72it/s]

anger
--------------------------------------------- Label: anger ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
fear
--------------------------------------------- Label: fear ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
fear
--------------------------------------------- Label: fear -------------------------------------------

Processing with gpt-4o-mini:  50%|█████     | 200/400 [01:14<01:13,  2.72it/s]

anger
--------------------------------------------- Label: anger ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
disgust
--------------------------------------------- Label: disgust ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 

Processing with gpt-4o-mini:  62%|██████▎   | 250/400 [01:32<00:54,  2.73it/s]

joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
joy
---

Processing with gpt-4o-mini:  75%|███████▌  | 300/400 [01:48<00:35,  2.84it/s]

anger
--------------------------------------------- Label: anger ---------------------------------------------
 
others
--------------------------------------------- Label: other ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
disgust
--------------------------------------------- Label: disgust ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy --------------------------------------

Processing with gpt-4o-mini:  88%|████████▊ | 350/400 [02:04<00:17,  2.92it/s]

anger
--------------------------------------------- Label: anger ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
disgust
--------------------------------------------- Label: disgust ---------------------------------------------
 
joy
--------------------------------------------- Label: joy ---------------------------------------------
 
disgust
--------------------------------------------- Label: disgust ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
others
--------------------------------------------- Label: other --------------------------------------

Processing with gpt-4o-mini: 100%|██████████| 400/400 [02:20<00:00,  2.84it/s]


joy
--------------------------------------------- Label: joy ---------------------------------------------
 
anger
--------------------------------------------- Label: anger ---------------------------------------------
 
Processing o3-mini asynchronously...


Processing with o3-mini:   0%|          | 0/400 [00:00<?, ?it/s]

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['others', 'others', 'others']
CONSENSUS LABEL: other

['fear', 'fear', 

Processing with o3-mini:  12%|█▎        | 50/400 [00:30<03:32,  1.65it/s]

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: j

Processing with o3-mini:  25%|██▌       | 100/400 [00:58<02:55,  1.71it/s]

['anger', 'anger', 'sadness']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['others', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'surprise']
CONSENSUS LABEL: joy

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['anger', 'anger',

Processing with o3-mini:  38%|███▊      | 150/400 [01:30<02:31,  1.65it/s]

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['surprise', 'surprise', 'surprise']
CONSENSUS LABEL: surprise

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['surprise', 'surprise', 'surprise']
CONSENSUS LABEL: surprise

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['surprise', 'surprise', 'surprise']
CONSENSUS LABEL: surprise

['anger', 'a

Processing with o3-mini:  50%|█████     | 200/400 [02:04<02:07,  1.57it/s]

['fear', 'fear', 'fear']
CONSENSUS LABEL: fear

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['fear', 'fear', 'fear']
CONSENSUS LABEL: fear

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['surprise', 'surprise', 'surprise']
CONSENSUS LABEL: surprise

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['others', 'others', 'others']
CONSENSUS LABEL: other

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger

Processing with o3-mini:  62%|██████▎   | 250/400 [02:32<01:31,  1.64it/s]

['anger', 'disgust', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['fear', 'fear', 'fear']
CONSENSUS LABEL: fear

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

Processing with o3-mini:  75%|███████▌  | 300/400 [03:01<00:59,  1.67it/s]

['surprise', 'anger', 'surprise']
CONSENSUS LABEL: surprise

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['fear', 'fear', 'fear']
CONSENSUS LABEL: fear

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['anger', 'anger', 'anger']
CONSENSUS LABEL

Processing with o3-mini:  88%|████████▊ | 350/400 [03:37<00:31,  1.57it/s]

['others', 'others', 'others']
CONSENSUS LABEL: other

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['surprise', 'surprise', 'surprise']
CONSENSUS LABEL: surprise

['sadness', 'sadness', 'sadness']
CONSENSUS LABEL: sadness

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['joy', 'joy', 'joy']
CONSENSUS LABEL: joy

['sadness', 'sadness'

Processing with o3-mini: 100%|██████████| 400/400 [04:05<00:00,  1.63it/s]

['others', 'others', 'others']
CONSENSUS LABEL: other

['anger', 'anger', 'anger']
CONSENSUS LABEL: anger

Finished processing all datasets and models.





In [12]:
df_dataset = pd.read_csv(os.path.join(export_location, export_name))

df_dataset

Unnamed: 0,original index,context,text,labels,llama3.1:8b-instruct-q8_0,deepseek-r1:14b,gpt-4o-mini,o3-mini
0,4183,ChampionsLeague,Love that Jurgen just told the boys to smile a...,joy,joy,joy,joy,joy
1,2733,WorldBookDay,#WorldBookDay To some amazing books that have ...,joy,joy,joy,joy,joy
2,197,NotreDame,I know it’s been over 24 hours and for some re...,sadness,sadness,sadness,sadness,sadness
3,3519,GretaThunberg,Educating for Extinction. (climate change as a...,joy,anger,fear,other,anger
4,912,ChampionsLeague,I hope Salah scores a hat trick in the second ...,joy,joy,joy,joy,joy
...,...,...,...,...,...,...,...,...
395,3956,GameOfThrones,"I’ve watched episode 3, the battle for winterf...",joy,joy,joy,joy,joy
396,5607,WorldBookDay,"""I do believe something very magical can happe...",joy,joy,joy,joy,joy
397,1635,NotreDame,What we know so far about the #NotreDameCathed...,fear,anger,anger,anger,anger
398,335,ChampionsLeague,Year 2030 Me: Anyone remember 2019 when Liverp...,disgust,anger,joy,sadness,other
