In [3]:
import os
import sys

PROJECT_MARKERS = ("src", "data", "prompts", "results")

def find_project_root(start_path):
    current = os.path.abspath(start_path)

    while True:
        if all(os.path.isdir(os.path.join(current, m)) for m in PROJECT_MARKERS):
            return current

        parent = os.path.dirname(current)
        if parent == current:
            raise RuntimeError("Project root not found")

        current = parent


# ---- execution directory (cwd) ----
cwd = os.getcwd()

# ---- safe starting point ----
try:
    start_path = os.path.dirname(os.path.abspath(__file__))
except NameError:
    start_path = cwd


# ---- resolve canonical paths ----
project_root = find_project_root(start_path)

# âœ… THIS IS THE IMPORTANT PART
if project_root not in sys.path:
    sys.path.insert(0, project_root)

src_root     = os.path.join(project_root, "src", "daniel", "gemini")
data_root    = os.path.join(project_root, "data", "MAMS-ACSA", "raw", "data_jsonl", "annotated")
schemas_root = os.path.join(project_root, "data", "MAMS-ACSA", "raw", "data_jsonl", "schema")
prompts_root = os.path.join(project_root, "prompts", "daniel", "gemini")
utils_root   = os.path.join(project_root, "utils")
results_root = os.path.join(project_root, "results", "daniel", "gemini")

print(
    f"ðŸ“‚ cwd          : {cwd}\n"
    f"ðŸ“‚ Project root : {project_root}\n"
    f"ðŸ“‚ Source root  : {src_root}\n"
    f"ðŸ“‚ Data root    : {data_root}\n"
    f"ðŸ“‚ Prompts root : {prompts_root}\n"
    f"ðŸ“‚ Utils root   : {utils_root}\n"
    f"ðŸ“‚ Results root : {results_root}"
)

ðŸ“‚ cwd          : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/src/daniel/gemini
ðŸ“‚ Project root : /Users/hd/Desktop/RCS-Emotion-Prediction-2025
ðŸ“‚ Source root  : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/src/daniel/gemini
ðŸ“‚ Data root    : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/data/MAMS-ACSA/raw/data_jsonl/annotated
ðŸ“‚ Prompts root : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/prompts/daniel/gemini
ðŸ“‚ Utils root   : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/utils
ðŸ“‚ Results root : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/results/daniel/gemini


In [4]:
# ==========================================
# Emotion prediction script using Gemini API
# (EMOTION ONLY â€” NO REASONS)
# ==========================================

import json
import os
import requests
from dotenv import load_dotenv

from prompts.daniel.gemini.zero_shot import GUIDELINES


# ==========================================
# API SETUP
# ==========================================
load_dotenv()
API_KEY = os.getenv("GEMINI_API_KEY")

MODEL = "models/gemini-2.5-flash"
URL = f"https://generativelanguage.googleapis.com/v1beta/{MODEL}:generateContent"

HEADERS = {
    "Content-Type": "application/json",
    "X-goog-api-key": API_KEY
}


# ==========================================
# PATHS
# ==========================================
IN_PATH = os.path.join(data_root, "02_iteration_cleaned_300.jsonl")
SCHEMA_JSON = os.path.join(schemas_root, "emotion_schema_v2.json")

# ---- derive input identifier ----
input_name = os.path.splitext(os.path.basename(IN_PATH))[0]

OUT_DIR = os.path.join(results_root, "gemini-flash")
os.makedirs(OUT_DIR, exist_ok=True)

OUT_EMO = os.path.join(
    OUT_DIR,
    f"{input_name}__gemini_emotion_only.jsonl"
)

print("Input file :", IN_PATH)
print("Output file:", OUT_EMO)


# ==========================================
# LOAD FULL EMOTION TAXONOMY
# ==========================================
with open(SCHEMA_JSON, "r", encoding="utf-8") as f:
    EMOTIONS = json.load(f)

POLARITIES = ["positive", "negative", "neutral"]


# ==========================================
# LOAD INPUT DATA (IGNORE GOLD EMOTIONS)
# ==========================================
with open(IN_PATH, "r", encoding="utf-8") as f:
    raw_data = [json.loads(line) for line in f]


# ==========================================
# GEMINI REQUEST WRAPPER
# ==========================================
def ask_gemini(prompt: str) -> str:
    payload = {
        "contents": [
            {
                "parts": [{"text": prompt}]
            }
        ]
    }
    r = requests.post(URL, headers=HEADERS, json=payload)
    r.raise_for_status()
    return r.json()["candidates"][0]["content"]["parts"][0]["text"].strip()


# ==========================================
# SAFE JSON PARSER
# ==========================================
def safe_json_parse(txt: str):
    try:
        return json.loads(txt)
    except:
        pass

    cleaned = txt.replace("```json", "").replace("```", "").strip()
    try:
        return json.loads(cleaned)
    except:
        pass

    cleaned = cleaned.replace(",]", "]").replace(",}", "}")
    try:
        return json.loads(cleaned)
    except:
        return None


# ==========================================
# EMOTION-ONLY PROMPT BUILDER
# ==========================================
def build_emotion_only_prompt(review, aspect, polarity):
    allowed = EMOTIONS[aspect][polarity]

    return f"""
You are performing EMOTION-ONLY annotation following strict official guidelines.

Below are the complete annotation guidelines that you MUST follow exactly:
{GUIDELINES}

### TASK:
You MUST NOT modify aspect or polarity.
Your ONLY task is to choose the correct EMOTION.

### Review:
\"""{review}\"""

### Aspect (DO NOT CHANGE):
{aspect}

### Polarity (DO NOT CHANGE):
{polarity}

### Allowed Emotion Categories:
{allowed}

### STRICT JSON OUTPUT:
{{
  "emotion": "..."
}}

### RULES:
- JSON ONLY.
- Choose exactly ONE emotion from allowed list.
- Do NOT invent categories.
- Do NOT output anything except JSON.

Return ONLY JSON.
"""


# ==========================================
# CALL GEMINI FOR ONE EMOTION
# ==========================================
def annotate_emotion_only(review, aspect, polarity):
    prompt = build_emotion_only_prompt(review, aspect, polarity)

    parsed = None
    for _ in range(3):
        response = ask_gemini(prompt)
        parsed = safe_json_parse(response)

        if isinstance(parsed, dict) and "emotion" in parsed:
            break

    if not isinstance(parsed, dict):
        print("JSON ERROR â†’", response)
        return None

    emo = parsed.get("emotion", "").strip()

    # Capitalize for consistency
    if emo:
        emo = emo[0].upper() + emo[1:]

    # Validate â†’ fallback to first allowed
    allowed = EMOTIONS[aspect][polarity]
    if emo not in allowed:
        emo = allowed[0]

    return emo


# ==========================================
# RUN EMOTION-ONLY ANNOTATION
# ==========================================
emotion_only_results = []

for row in raw_data:
    review = row["input"]
    gold = row["output"]  # use aspect + polarity only

    annotated = []

    for t in gold:
        asp = t["aspect"]
        pol = t["polarity"]

        emo = annotate_emotion_only(review, asp, pol)

        annotated.append({
            "aspect": asp,
            "polarity": pol,
            "emotion": emo
        })

    emotion_only_results.append({
        "input": review,
        "output": annotated
    })


# ==========================================
# SAVE OUTPUT
# ==========================================
with open(OUT_EMO, "w", encoding="utf-8") as f:
    for r in emotion_only_results:
        f.write(json.dumps(r, ensure_ascii=False) + "\n")

print("DONE â†’", OUT_EMO)

Input file : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/data/MAMS-ACSA/raw/data_jsonl/annotated/02_iteration_cleaned_300.jsonl
Output file: /Users/hd/Desktop/RCS-Emotion-Prediction-2025/results/daniel/gemini/gemini-flash/02_iteration_cleaned_300__gemini_emotion_only.jsonl


ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))