In [2]:
import json
import os
from dotenv import load_dotenv
import google.generativeai as genai

# ----------------------------------------------------
# Load .env
# ----------------------------------------------------
load_dotenv()
API_KEY = os.getenv("GEMINI_API_KEY")

genai.configure(api_key=API_KEY)
model = genai.GenerativeModel("gemini-1.5-flash")

# ----------------------------------------------------
# Paths
# ----------------------------------------------------
IN_PATH = "data/MAMS-ACSA/raw/data_jsonl/annotation_300.jsonl"
OUT_DIR = "output"
OUT_PATH = os.path.join(OUT_DIR, "annotation_300_with_emotions.jsonl")

os.makedirs(OUT_DIR, exist_ok=True)

# ----------------------------------------------------
# Load data
# ----------------------------------------------------
data = []
with open(IN_PATH, "r", encoding="utf-8") as f:
    for line in f:
        data.append(json.loads(line))

# ----------------------------------------------------
# Gemini one-word emotion
# ----------------------------------------------------
import os, json, requests
from dotenv import load_dotenv

load_dotenv()
GEMINI_KEY = os.getenv("GEMINI_API_KEY")

URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
HEADERS = {"Content-Type": "application/json", "X-goog-api-key": GEMINI_KEY}

def ask_gemini(prompt):
    payload = {"contents": [{"parts": [{"text": prompt}]}]}
    r = requests.post(URL, headers=HEADERS, json=payload)
    r.raise_for_status()
    return r.json()["candidates"][0]["content"]["parts"][0]["text"].strip()

def get_emotion(review, aspect, polarity):
    prompt = f"""
    Review: "{review}"
    Aspect: "{aspect}"
    Polarity: "{polarity}"

    What emotion is expressed toward this aspect?
    Answer with EXACTLY ONE WORD.
    Do NOT answer with sentiment words like "positive", "negative".
    Use an actual emotion word.
    """

    return ask_gemini(prompt).split()[0].lower()

# ----------------------------------------------------
# Fill emotions
# ----------------------------------------------------
for row in data:
    text = row["input"]
    for item in row["output"]:
        item["emotion"] = get_emotion(text, item["aspect"], item["polarity"])

# ----------------------------------------------------
# Save output
# ----------------------------------------------------
with open(OUT_PATH, "w", encoding="utf-8") as f:
    for row in data:
        f.write(json.dumps(row, ensure_ascii=False) + "\n")

print("Done:", OUT_PATH)

Done: output/annotation_300_with_emotions.jsonl


In [3]:
import json

rows = []
with open("output/annotation_300_with_emotions.jsonl") as f:
    for line in f:
        rows.append(json.loads(line))

In [4]:
emotions = []

for r in rows:
    for item in r["output"]:
        emotions.append(item["emotion"])

In [5]:
unique_emotions = sorted(set(emotions))
print("Unique emotions:", unique_emotions)
print("Count:", len(unique_emotions))

Unique emotions: ['acceptance', 'admiration', 'aggression', 'ambivalence', 'amusement', 'anger', 'annoyance', 'anticipation', 'apathetic', 'apathy', 'appreciation', 'avoidance', 'calm', 'comfort', 'concern', 'confusion', 'content', 'contentment', 'curiosity', 'delight', 'desire', 'disappointment', 'disapproval', 'disbelief', 'discomfort', 'disgust', 'disinterest', 'displeasure', 'enjoyment', 'enthusiasm', 'excitement', 'expectation', 'frustration', 'gratification', 'gratitude', 'happiness', 'hope', 'hopeful', 'indifference', 'interest', 'interested', 'intrigue', 'jealousy', 'joy', 'love', 'mixed', 'negative', 'neutral', 'none', 'okay', 'overwhelmed', 'pleasure', 'puzzlement', 'relaxed', 'relief', 'sarcasm', 'satisfaction', 'satisfied', 'skeptical', 'surprise', 'sympathy', 'uncertainty', 'unconcerned', 'uninterested']
Count: 64


In [6]:
by_polarity = {
    "positive": [],
    "negative": [],
    "neutral": []
}

for r in rows:
    for item in r["output"]:
        pol = item["polarity"]
        emo = item["emotion"]
        by_polarity[pol].append(emo)

In [7]:
from collections import Counter

for pol in ["positive", "negative", "neutral"]:
    print("\nPolarity:", pol.upper())
    print("Unique emotions:", sorted(set(by_polarity[pol])))
    print("Count:", len(set(by_polarity[pol])))
    print("Top frequencies:", Counter(by_polarity[pol]).most_common(15))


Polarity: POSITIVE
Unique emotions: ['acceptance', 'admiration', 'ambivalence', 'amusement', 'anger', 'annoyance', 'anticipation', 'appreciation', 'comfort', 'content', 'contentment', 'delight', 'desire', 'disappointment', 'disgust', 'enjoyment', 'enthusiasm', 'excitement', 'frustration', 'gratification', 'gratitude', 'happiness', 'hope', 'hopeful', 'interest', 'intrigue', 'joy', 'love', 'mixed', 'negative', 'neutral', 'none', 'pleasure', 'relaxed', 'relief', 'satisfaction', 'satisfied', 'surprise', 'sympathy']
Count: 39
Top frequencies: [('joy', 31), ('gratitude', 15), ('contentment', 14), ('satisfaction', 13), ('delight', 12), ('pleasure', 11), ('enjoyment', 9), ('excitement', 7), ('appreciation', 5), ('neutral', 5), ('disappointment', 5), ('hopeful', 4), ('frustration', 4), ('amusement', 4), ('interest', 4)]

Polarity: NEGATIVE
Unique emotions: ['amusement', 'anger', 'annoyance', 'concern', 'confusion', 'curiosity', 'disappointment', 'disapproval', 'disbelief', 'discomfort', 'disgu