In [1]:
import time
import csv
from collections import defaultdict
from typing import List, Dict
import pandas as pd
from datasets import load_dataset
from tqdm.auto import tqdm
import ollama

# Configuration

In [2]:
LEXICON_XLS = "inquireraugmented.xls"
OUTPUT_CSV = "llama_answers_1shot_prompt_answer.csv"
EXTENDED_OUTPUT_CSV = "llama_answers_extended.csv"
OLLAMA_MODEL_NAME = "llama3.2:1b" 

In [3]:
MAX_MESSAGES_BEFORE_COOLDOWN = 200  # after sending this many prompts, sleep for GLOBAL_COOLDOWN_SECONDS
GLOBAL_COOLDOWN_SECONDS = 60        # cooldown duration (seconds)
MAX_RETRIES_PER_PROMPT = 6          # exponential backoff up to this many retries
INITIAL_BACKOFF_SECONDS = 2.0       # base of exponential backoff
RATE_LIMIT_SLEEP_ON_EXCEPTION = 30  # extra sleep in seconds on rate-limit-like exception

In [5]:
emotion_labels = ['admiration','amusement','anger','annoyance','approval','caring','confusion','curiosity','desire',
'disappointment','disapproval','disgust','embarrassment','excitement','fear','gratitude','grief','joy',
'love','nervousness','optimism','pride','realization','relief','remorse','sadness','surprise','neutral']

In [6]:
emotion_to_gi = {
    "admiration": [
        "Active_GI", "Affil_GI", "Dav_GI", "Iav_GI", "Ovrst_GI", "Passive_GI",
        "Positiv_GI", "Pstv_GI", "Socrel_GI", "Strong_GI", "Sv_GI", "Virtue_GI"
    ],
    "amusement": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Strong_GI"
    ],
    "anger": [
        "Active_GI", "Dav_GI", "Hostile_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI",
        "Passive_GI", "Strong_GI", "Sv_GI"
    ],
    "annoyance": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Passive_GI",
        "Socrel_GI", "Strong_GI", "Sv_GI"
    ],
    "approval": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ovrst_GI", "Passive_GI",
        "Positiv_GI", "Pstv_GI", "Quan_GI", "Strong_GI", "Sv_GI", "Virtue_GI"
    ],
    "caring": [
        "Active_GI", "Dav_GI", "Iav_GI", "Passive_GI", "Positiv_GI", "Pstv_GI",
        "Socrel_GI", "Strong_GI", "Sv_GI"
    ],
    "confusion": [
        "Active_GI", "Comform_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ovrst_GI",
        "Passive_GI", "Positiv_GI", "Pstv_GI", "Solve_GI", "Strong_GI", "Sv_GI"
    ],
    "curiosity": [
        "Active_GI", "Comform_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ovrst_GI",
        "Passive_GI", "Positiv_GI", "Pstv_GI", "Solve_GI", "Strong_GI", "Sv_GI"
    ],
    "desire": [
        "Active_GI", "Affil_GI", "Dav_GI", "Iav_GI", "Need_GI", "Passive_GI",
        "Positiv_GI", "Pstv_GI", "Self_GI", "Socrel_GI", "Strong_GI", "Sv_GI", "Weak_GI"
    ],
    "disappointment": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Passive_GI",
        "Positiv_GI", "Pstv_GI", "Self_GI", "Socrel_GI", "Strong_GI", "Sv_GI"
    ],
    "disapproval": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Passive_GI",
        "Positiv_GI", "Pstv_GI", "Self_GI", "Socrel_GI", "Strong_GI", "Sv_GI"
    ],
    "disgust": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Passive_GI",
        "Strong_GI", "Sv_GI"
    ],
    "embarrassment": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Passive_GI",
        "Strong_GI", "Sv_GI"
    ],
    "excitement": [
        "Active_GI", "Affil_GI", "Dav_GI", "Iav_GI", "Passive_GI", "Positiv_GI",
        "Pstv_GI", "Socrel_GI", "Strong_GI", "Time_GI"
    ],
    "fear": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Ovrst_GI",
        "Passive_GI", "Strong_GI", "Sv_GI"
    ],
    "gratitude": [
        "Active_GI", "Affil_GI", "Dav_GI", "Iav_GI", "Passive_GI", "Positiv_GI",
        "Pstv_GI", "Strong_GI"
    ],
    "grief": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Ovrst_GI",
        "Passive_GI", "Positiv_GI", "Pstv_GI", "Strong_GI", "Sv_GI", "Weak_GI"
    ],
    "joy": [
        "Active_GI", "Affil_GI", "Dav_GI", "Iav_GI", "Ovrst_GI", "Passive_GI",
        "Positiv_GI", "Pstv_GI", "Strong_GI", "Sv_GI"
    ],
    "love": [
        "Active_GI", "Affil_GI", "Dav_GI", "Emot_GI", "Iav_GI", "Passive_GI",
        "Positiv_GI", "Pstv_GI", "Self_GI", "Socrel_GI", "Strong_GI", "Sv_GI"
    ],
    "nervousness": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Ovrst_GI",
        "Passive_GI", "Self_GI", "Socrel_GI", "Strong_GI", "Sv_GI", "Weak_GI"
    ],
    "optimism": [
        "Active_GI", "Dav_GI", "Iav_GI", "Positiv_GI", "Pstv_GI", "Strong_GI"
    ],
    "pride": [
        "Active_GI", "Affil_GI", "Dav_GI", "Iav_GI", "Ovrst_GI", "Passive_GI",
        "Positiv_GI", "Pstv_GI", "Strong_GI"
    ],
    "realization": [
        "Active_GI", "Affil_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ovrst_GI",
        "Passive_GI", "Positiv_GI", "Pstv_GI", "Quan_GI", "Solve_GI", "Strong_GI", "Sv_GI"
    ],
    "relief": [
        "Active_GI", "Affil_GI", "Dav_GI", "Iav_GI", "Ovrst_GI", "Passive_GI",
        "Positiv_GI", "Pstv_GI", "Strong_GI", "Sv_GI"
    ],
    "remorse": [
        "Active_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Passive_GI",
        "Strong_GI", "Sv_GI"
    ],
    "sadness": [
        "Active_GI", "Dav_GI", "Iav_GI", "Negativ_GI", "Ngtv_GI", "Ovrst_GI",
        "Passive_GI", "Self_GI", "Strong_GI", "Sv_GI"
    ],
    "surprise": [
        "Active_GI", "Dav_GI", "Iav_GI", "Ovrst_GI", "Passive_GI", "Positiv_GI",
        "Pstv_GI", "Socrel_GI", "Strong_GI", "Sv_GI"
    ],
    "neutral": [
        "Active_GI", "Dav_GI", "Iav_GI", "Ovrst_GI", "Passive_GI", "Positiv_GI",
        "Pstv_GI", "Quan_GI", "Strong_GI", "Sv_GI"
    ],
}

Helper functions

In [7]:
def normalize_gi_key(name: str) -> str:
    """
    Normalize GI name to canonical key:
    - strip whitespace
    - remove trailing '_gi' if present
    - lower-case
    Example: 'Iav_GI' -> 'iav'
    """
    if name is None:
        return ""
    s = str(name).strip()
    if s.lower().endswith("_gi"):
        s = s[:-3]
    return s.strip().lower()

def load_gi_lexicons_from_xls(path: str) -> Dict[str, set]:
    xls = pd.ExcelFile(path)
    sheet0 = xls.parse(0)
    if "Entry" not in sheet0.columns:
        sheet0.rename(columns={sheet0.columns[0]: "Entry"}, inplace=True)

    gi_lexicons_norm = defaultdict(set)

    for _, row in sheet0.iterrows():
        raw_word = row["Entry"]
        if pd.isna(raw_word):
            continue
        word = str(raw_word).strip()
        if word == "":
            continue
        word_norm = word.lower()
        for col in sheet0.columns[1:]:
            val = row[col]
            if pd.notna(val) and val != 0:
                canonical = normalize_gi_key(col)
                if canonical:
                    gi_lexicons_norm[canonical].add(word_norm)

    for meta in ("source", "othrtags", "defined"):
        gi_lexicons_norm.pop(meta, None)

    return dict(gi_lexicons_norm)

In [8]:
def words_for_gi(gi_lexicons_norm: Dict[str, set], gi_name: str) -> List[str]:
    key = normalize_gi_key(gi_name)
    return sorted(list(gi_lexicons_norm.get(key, [])))

In [9]:
def build_prompt(text: str, k: int, emotion_labels: List[str], emotion_to_gi: Dict[str, List[str]],
                 gi_lexicons_norm: Dict[str, set], emphasize_mapping: bool = True) -> str:
    """Build prompt that asks the LLM to choose exactly k emotions (k derived from true labels)."""
    valid_emotions = ", ".join(emotion_labels)
    header = (
        "You are given a short text. Your task: choose the emotion(s) that this text would most likely "
        "evoke in most people.\n"
        "INSTRUCTIONS:\n"
        f"1) Choose exactly {k} emotion(s).\n"
        "2) Only choose from the provided list of valid emotions.\n"
        "3) Answer exactly as a Python list of strings (e.g. if asked for 2 emotions: ['fear', 'surprise']).\n"
        "4) Do not provide any explanation — only the Python list.\n"
        "\nFor example, If I ask to return 1 emotion for the following text: \"[NAME] - same fucking problem, slightly better command of the English language.\", you should return \"['anger']\", due to uses of words from the Negative_GI category. \n" #Comment when using 0 shot
    )
    emotion_list_block = f"Valid emotions: [{valid_emotions}]\n"

    mapping_block = "\nMapping (emotion -> GI features):\n"
    for emo in emotion_labels:
        gif = emotion_to_gi.get(emo, [])
        mapping_block += f"  - {emo}: {gif}\n"
    mapping_block += ""

    if emphasize_mapping:
        mapping_block += "Lexicon words for all GI features (examples):\n"
        all_gis = sorted({gi for gl in emotion_to_gi.values() for gi in gl})
        for gi in all_gis:
            samples = words_for_gi(gi_lexicons_norm, gi)
            if samples:
                mapping_block += f"  - {gi}: {', '.join(samples)}\n"
            else:
                mapping_block += f"  - {gi}: (no examples found)\n"
        mapping_block += "\n"

    question_block = (
        f"\nText: '''{text}'''\n"
        f"\nChoose exactly {k} emotion(s) from the valid list above, using the mapping and lexicon hints. "
        "Respond ONLY with a single Python list of the chosen labels (no additional text).\n"
    )

    return header + emotion_list_block + mapping_block + question_block

In [10]:
# Test cell: visualize prompts for 1 example
print("Loading GI lexicons from:", LEXICON_XLS)
gi_lexicons = load_gi_lexicons_from_xls(LEXICON_XLS)

print("Loading go_emotions simplified dataset...")
ds = load_dataset("go_emotions", "simplified")
test_ds = ds["test"]

for i in range(1):
    item = test_ds[i]
    text = item["text"]
    true_labs = item["labels"]
    k = max(1, len(true_labs))
    prompt = build_prompt(text, k, emotion_labels, emotion_to_gi, gi_lexicons, emphasize_mapping=True)

    print("="*80)
    print(f"Example {i+1} | Text: {text}")
    print("-"*80)
    print(prompt)
    print("="*80 + "\n")


Loading GI lexicons from: inquireraugmented.xls
Loading go_emotions simplified dataset...
Example 1 | Text: I’m really sorry about your situation :( Although I love the names Sapphira, Cirilla, and Scarlett!
--------------------------------------------------------------------------------
You are given a short text. Your task: choose the emotion(s) that this text would most likely evoke in most people.
INSTRUCTIONS:
1) Choose exactly 1 emotion(s).
2) Only choose from the provided list of valid emotions.
3) Answer exactly as a Python list of strings (e.g. if asked for 2 emotions: ['fear', 'surprise']).
4) Do not provide any explanation — only the Python list.

For example, If I ask to return 1 emotion for the following text: "[NAME] - same fucking problem, slightly better command of the English language.", you should return "['anger']", due to uses of words from the Negative_GI category. 
Valid emotions: [admiration, amusement, anger, annoyance, approval, caring, confusion, curiosity, de

# OLlama

In [11]:
class OllamaQueryClient:
    def __init__(self, model_name: str = "llama3"):
        self.model = model_name

    def chat(self, prompt: str, max_retries: int = MAX_RETRIES_PER_PROMPT) -> str:
        """Send a prompt to the Ollama model with retries and exponential backoff.
        Returns the assistant text as a string.
        """
        backoff = INITIAL_BACKOFF_SECONDS
        attempt = 0
        while attempt < max_retries:
            try:
                # small spacing to avoid sending too fast
                time.sleep(0.6)
                # Call Ollama
                resp = ollama.chat(model=self.model, messages=[{"role": "user", "content": prompt}])

                text = None
                try:
                    if hasattr(resp, "message") and getattr(resp.message, "content", None) is not None:
                        text = resp.message.content
                except Exception:
                    text = None

                if not text:
                    try:
                        text = resp["message"]["content"]
                    except Exception:
                        try:
                            # Some responses expose a choices list
                            text = resp["choices"][0]["message"]["content"]
                        except Exception:
                            text = str(resp)

                return str(text).strip()

            except Exception as e:
                attempt += 1
                err = str(e).lower()
                print(f"[Warning] attempt {attempt}/{max_retries} failed: {e}")
                # If we suspect a rate-limit, do a longer cooldown
                if "rate" in err or "429" in err or "too many" in err:
                    print(f"[Rate limit] sleeping {RATE_LIMIT_SLEEP_ON_EXCEPTION}s before retrying...")
                    time.sleep(RATE_LIMIT_SLEEP_ON_EXCEPTION)
                else:
                    time.sleep(backoff)
                    backoff *= 2
        raise RuntimeError(f"Failed to get response after {max_retries} retries.")

# Collect LLaMA answers

In [12]:
print("Loading GI lexicons from:", LEXICON_XLS)
gi_lexicons = load_gi_lexicons_from_xls(LEXICON_XLS)
print(f"Loaded {len(gi_lexicons)} GI categories")

print("Loading go_emotions simplified dataset (test split only)...")
ds = load_dataset("go_emotions", "simplified")
test_ds = ds["test"]
total = len(test_ds)
print(f"Test split size: {total}")

client = OllamaQueryClient(model_name=OLLAMA_MODEL_NAME)

out_f = open(OUTPUT_CSV, "w", encoding="utf-8", newline="")
writer = csv.writer(out_f)
writer.writerow(["prompt", "answer"])

ext_f = open(EXTENDED_OUTPUT_CSV, "w", encoding="utf-8", newline="")
ext_writer = csv.writer(ext_f)
ext_writer.writerow(["text", "true_labels", "prompt", "answer"])

errors = []

for i in tqdm(range(len(test_ds)), desc="Querying LLaMA (test)", unit="ex"):
    item = test_ds[i]
    text = item.get("text", "")
    true_labs = item.get("labels", [])
    # k is the EXACT number of emotions asked
    k = max(1, len(true_labs))

    prompt = build_prompt(text, k, emotion_labels, emotion_to_gi, gi_lexicons, emphasize_mapping=True)

    try:
        answer = client.chat(prompt)
    except Exception as e:
        print(f"[Error] failed for example {i}: {e}")
        errors.append((i, str(e)))
        answer = f"[ERROR] {e}"

    writer.writerow([prompt, answer])
    ext_writer.writerow([text, ",".join(map(str, true_labs)), prompt, answer])

    if i % 20 == 0:
        out_f.flush()
        ext_f.flush()

out_f.close()
ext_f.close()
print(f"Done. Wrote outputs to {OUTPUT_CSV} and {EXTENDED_OUTPUT_CSV}.")
if errors:
    print(f"Completed with {len(errors)} errors. Sample: {errors[:5]}")

Loading GI lexicons from: inquireraugmented.xls
Loaded 182 GI categories
Loading go_emotions simplified dataset (test split only)...
Test split size: 5427


Querying LLaMA (test): 100%|██████████| 5427/5427 [2:24:38<00:00,  1.60s/ex]  

Done. Wrote outputs to llama_answers_prompt_answer.csv and llama_answers_extended.csv.



