In [2]:
from datasets import load_dataset

import numpy as np
import pandas as pd
import transformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

from huggingface_hub import login
import time
import datetime
import re
from tqdm import tqdm
from collections import defaultdict
from sentence_transformers import SentenceTransformer, util
import random


import json
import ast
import emoji
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns




# for dimensionality reduction and visualizing the embeddings
from sklearn.decomposition import PCA



In [None]:
enisear = pd.read_csv('../enISEAR.tsv', sep = '\t')
emotions = list(enisear['Prior_Emotion'].unique())
emotions

In [None]:
test = pd.read_csv('full_emotions_labeled/qwen3_32B_full_emotions.csv')
test['llama33_label'].value_counts()

In [None]:

model_id_33="meta-llama/Llama-3.3-70B-Instruct"
model_id_31 ="meta-llama/Llama-3.1-70B-Instruct"
model_id_qwen3_32B = "Qwen/Qwen3-32B"

def create_pipeline(model_id):
    

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,             
        bnb_4bit_quant_type="nf4",     
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16
    )
    
    # Load model with quantization
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map=0,             # or set manually: device_map=0
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16     # helps with performance on modern GPUs
    )
    
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    
    # Create generation pipeline
    pipeline_llm = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer
        )

    return pipeline_llm, model, tokenizer

torch.cuda.empty_cache()

#pipeline_llama_31, _, _ = create_pipeline(model_id_31)

#torch.cuda.empty_cache()

#pipeline_llama_33, _, _  = create_pipeline(model_id_33)

#torch.cuda.empty_cache()

_, pipeline_qwen3_32B_model, pipeline_qwen3_32B_tokenizer = create_pipeline(model_id_qwen3_32B)
#_, pipeline_qwen3_30BA3B_model, pipeline_qwen3_30BA3B_tokenizer = create_pipeline(model_id_qwen3_30BA3B)




In [5]:


configs =  [    {
        "data_to_label": "full_emotions_labeled/gpt4o_full_emotions.csv",
        "saving_file_path": "full_emotions_labeled/gpt4o_full_emotions.csv",
    },

    {
         "data_to_label": "full_emotions_labeled/gpt4o_mini_full_emotions.csv",
        "saving_file_path": "full_emotions_labeled/gpt4o_mini_full_emotions.csv",
    },   
    {
        "data_to_label": "full_emotions_labeled/llama31_full_emotions.csv",
        "saving_file_path": "full_emotions_labeled/llama31_full_emotions.csv",
    },
     {
        "data_to_label": "full_emotions_labeled/llama33_full_emotions.csv",
        "saving_file_path": "full_emotions_labeled/llama33_full_emotions.csv",
    },   

    {
        "data_to_label": "full_emotions_labeled/llama33_full_emotions.csv",
        "saving_file_path": "full_emotions_labeled/llama33_full_emotions.csv",
    },
     {
        "data_to_label": "full_emotions_labeled/enisear_preprocessed.csv",
        "saving_file_path": "full_emotions_labeled/enisear_preprocessed.csv",
    },     
    
    {
        "data_to_label": "full_emotions_labeled/qwen3_32B_full_emotions.csv",
        "saving_file_path": "full_emotions_labeled/qwen3_32B_full_emotions.csv",
    }

]









In [7]:
from huggingface_hub import login

API_TOKEN = ''

login(token = API_TOKEN)

In [9]:


def create_prompt(text):
    """Create prompt used for all LLMs

    Args: 
        text to label
    

    Retruns:
        complete prompt per text for the LLM with randomly selected example - 1-shot
    
    """    


    label_prompt = f""" Given a piece of text, you have to label to which of the following emotions
    it corresponds. The options are: Anger, Fear, Guilt, Shame, Joy, Sadness, Disgust. Do not
    choose any other emotion. Please return only one of the previous options as
    a single word. Do not provide an explanation.
            """
    task = f"""What is the label of this text: {text} """

    example_row = enisear.sample(1).iloc[0]
    example = f"text: {example_row['text']}, emotion: {example_row['sentiment']}"
    
    messages = [
        {"role": "system", "content": label_prompt},
        {"role": "user", "content": example},
        {"role": "user", "content": task}
    ]

    return messages



In [8]:

def run_llama(df, pipeline):
    """Run the llama models on the dataframe
    
    Args:
        df (dataframe): containing text the model should label
        pipeline : previously defined pipeline of the model

    Returns:
        dataframe containing predictions
    """    
    output_list = []
    for i, row in tqdm(df.iterrows(), desc="Processing labeling", total=df.shape[0]):    
        preds = []
        for n in range(0, 3):
            
            outputs = pipeline(
                create_prompt(row['text']),
                max_new_tokens=50,
                temperature = 0.4,
                pad_token_id=pipeline.tokenizer.eos_token_id  # Set pad_token_id explicitly
            )
            
            answer = outputs[0]['generated_text'][-1]['content']
            preds.append(answer)

        row_dict = row.to_dict()
        row_dict.update({
            'pred_1': preds[0],
            'pred_2': preds[1],
            'pred_3': preds[2]
        })

        output_list.append(row_dict)
    result = pd.DataFrame(output_list)



    #llama_label["Leia_Label"] = llama_label["Leia_Label"].replace({"Happiness": "Joy"})

    return result

#output_llama31 = run_llama(enisear[:2], pipeline_llama_31)
#output_llama33 = run_llama(messages[:10], pipeline_llama_33)
#output_llama31

In [10]:
def run_qwen(df, model, tokenizer):

    """Run qwen models on the dataframe

    
    Args:
        df (dataframe): containing text the model should label
        model and tokenizer : previously defined for the model

    Returns:
        dataframe containing predictions
    """        
    
    output_list = []
    
    for idx, row in tqdm(df.iterrows(), desc="Processing labels", total=df.shape[0]):    
        preds = []
        for n in range(0, 3):
            text = tokenizer.apply_chat_template(create_prompt(row['text']),
                tokenize=False,
                add_generation_prompt=True,
                enable_thinking=False # Switches between thinking and non-thinking modes. Default is True.
            )
            model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
            
            # conduct text completion
            generated_ids = model.generate(
                **model_inputs,
                max_new_tokens=32768
            )
            output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() 
            
            try:
                index = len(output_ids) - output_ids[::-1].index(151668)
            except ValueError:
                index = 0
            
            content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")
            preds.append(content)
            
        row_dict = row.to_dict()
        row_dict.update({
            'pred_1': preds[0],
            'pred_2': preds[1],
            'pred_3': preds[2]
        })

        output_list.append(row_dict)
    result = pd.DataFrame(output_list)


    return result


#output_qwen3_32B = run_qwen(enisear[:2], pipeline_qwen3_32B_model, pipeline_qwen3_32B_tokenizer)


In [10]:

def run_llama_with_logits(df, class_labels=["Anger", "Joy", "Sadness", "Fear"]):
    output_list = []

    for i, row in tqdm(df.iterrows(), desc="Processing with logits", total=df.shape[0]):
        prompt = create_prompt(row['text'])
        
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
        

        with torch.no_grad():
            outputs = model(**inputs, output_hidden_states=False, return_dict=True)
            logits = outputs.logits  

        last_token_logits = logits[0, -1]  

        label_token_ids = [tokenizer(label, add_special_tokens=False)['input_ids'][0] for label in class_labels]

        label_logits = {label: float(last_token_logits[token_id]) for label, token_id in zip(class_labels, label_token_ids)}

        # Sort or pick top label
        predicted_emotion = max(label_logits.items(), key=lambda x: x[1])[0]
        anger_logit = label_logits["Anger"]

        logits_tensor = torch.tensor([label_logits[label] for label in class_labels])
        probs = F.softmax(logits_tensor, dim=-1)
        anger_prob = probs[class_labels.index("Anger")].item()
        predicted_label = class_labels[torch.argmax(probs).item()]

        output_list.append([
            row['text'], row['Prior_Emotion'], row['Gender'], row['Leia_Label'],
            predicted_label, anger_prob, probs.tolist(), label_logits
        ])


    columns = ['text', 'Prior_Emotion', 'Gender', 'Leia_Label',
           'Predicted_Emotion', 'Anger_Prob', 'All_Probs', 'All_Logits']
    result_df = pd.DataFrame(output_list, columns=columns)
    return result_df


In [12]:

def majority_calculation(df, model, label_cols=emotions):
    """from all 3 runs, get the majority answer. Choose one at random if equal

    Args:
        df (dataframe): dataframe containing 3 predictions per model
        model (str): name of the model for which we get majority vote
        label_cols (list, optional): emotion columns. Defaults to emotions.
    """

    def find_max(row):
        max_count = row[label_cols].value_counts().max()  # Get max count
        max_labels = row[label_cols].value_counts()[row[label_cols].value_counts() == max_count].index.tolist()
        return random.choice(max_labels) if len(max_labels) > 1 else max_labels[0]  # Choose randomly if tie
    
    df[f'{model}_label'] = df.apply(find_max, axis=1)  # Apply to each row
    
    return df




In [None]:
#anno_maj = pd.read_csv('llama_labeled_exp1.csv')

#synth = pd.read_csv(data_to_label, index_col = 0)
#anno_maj = anno_maj.loc[:, ~anno_maj.columns.str.startswith('Unnamed')]


llama = run_qwen(all_models, pipeline_qwen3_32B_model, pipeline_qwen3_32B_tokenizer)

llama = majority_calculation(llama, 'qwen3_32B',['pred_1', 'pred_2', 'pred_3'])


#llama = run_llama(anno_maj, pipeline_llama_31)
#llama = run_llama(anno_maj, pipeline_llama_33)

#anno_maj_labeled = majority_calculation(llama, 'llama31',['pred_1', 'pred_2', 'pred_3'])
#anno_maj_labeled = majority_calculation(llama, 'llama33',['pred_1', 'pred_2', 'pred_3'])

In [None]:
for config in configs:
    data_to_label = config["data_to_label"]
    saving_file_path = config["saving_file_path"]

    synth = pd.read_csv(data_to_label, index_col = 0)
    synth = synth.loc[:, ~synth.columns.str.startswith('Unnamed')]

    
    llama = run_llama(synth, pipeline_llama_31)
    
    llama = majority_calculation(llama, 'llama31',['pred_1', 'pred_2', 'pred_3'])



    llama.to_csv(saving_file_path)


In [None]:



for config in configs:
    data_to_label = config["data_to_label"]
    saving_file_path = config["saving_file_path"]

    synth = pd.read_csv(data_to_label, index_col = 0)
    #synth.rename(columns = {'qwen32B': 'qwen3_32B'}, inplace = True)

    synth = synth.loc[:, ~synth.columns.str.startswith('Unnamed')]


    llama = run_qwen(synth, pipeline_qwen3_32B_model, pipeline_qwen3_32B_tokenizer)
    
    llama = majority_calculation(llama, 'qwen3_32B',['pred_1', 'pred_2', 'pred_3'])

    #llama= run_leia_wo_affection(llama)
    #female_llama["Leia_Label"] = female_llama["Leia_Label"].replace({"Happiness": "Joy"})


    llama.to_csv(saving_file_path)


In [None]:


data_paths = {'gpt4o':
    {
        "data_to_label": "../synth_data_openai/gpt4o_full_emotions.csv",
        "saving_file_path": "full_emotions_labeled/gpt4o_full_emotions.csv",
    },
              'gpt40_mini':
    {
         "data_to_label": "../synth_data_openai/gpt4o_mini_full_emotions.csv",
        "saving_file_path": "full_emotions_labeled/gpt4o_mini_full_emotions.csv",
    },  
              'llama31':
    {
        "data_to_label": "llama31_full_emotions.csv",
        "saving_file_path": "full_emotions_labeled/llama31_full_emotions.csv",
    },
              'llama33':
     {
        "data_to_label": "llama33_full_emotions.csv",
        "saving_file_path": "full_emotions_labeled/llama33_full_emotions.csv",
    },   

              'qwen32B':
     {
        "data_to_label": "synth_data_qwen/qwen3_32B_all_emotions.csv",
        "saving_file_path": "full_emotions_labeled/qwen3_32B_full_emotions.csv",
    },
              'enisear':
     {
        "data_to_label": "enisear_preprocessed.csv",
        "saving_file_path": "full_emotions_labeled/enisear_preprocessed.csv",
    }
    }


In [14]:
datasets = {}
for data, dict in data_paths.items():
    
    datasets[data] = pd.read_csv(dict['saving_file_path'], index_col=0)



In [None]:
datasets.keys()

In [None]:
for data, df in datasets.items():
    for idx, row in df.iterrows():
        if row['llama31_label'] == "Anxiety":
            df.loc[idx, 'llama31_label'] = "Fear"
    
    datasets[data] = df

    value_cts = df['llama31_label'].value_counts()
    print(f'dataset: {data}, \n value_cts {value_cts}')
