In [7]:
import os

PROJECT_MARKERS = ("src", "data", "prompts", "results")

def find_project_root(start_path):
    current = os.path.abspath(start_path)

    while True:
        if all(os.path.isdir(os.path.join(current, m)) for m in PROJECT_MARKERS):
            return current

        parent = os.path.dirname(current)
        if parent == current:
            raise RuntimeError("Project root not found")

        current = parent


# ---- execution directory (cwd) ----
cwd = os.getcwd()

# ---- safe starting point ----
try:
    start_path = os.path.dirname(os.path.abspath(__file__))
except NameError:
    start_path = cwd


# ---- resolve canonical paths ----
project_root = find_project_root(start_path)

src_root     = os.path.join(project_root, "src","daniel","gemini")
data_root    = os.path.join(project_root, "data","MAMS-ACSA","raw","data_jsonl","annotated")
prompts_root = os.path.join(project_root, "prompts", "daniel","gemini")
utils_root   = os.path.join(project_root, "utils")
results_root = os.path.join(project_root, "results", "daniel","gemini")


print(
    f"ðŸ“‚ cwd          : {cwd}\n"
    f"ðŸ“‚ Project root : {project_root}\n"
    f"ðŸ“‚ Source root  : {src_root}\n"
    f"ðŸ“‚ Data root    : {data_root}\n"
    f"ðŸ“‚ Prompts root : {prompts_root}\n"
    f"ðŸ“‚ Utils root   : {utils_root}\n"
    f"ðŸ“‚ Results root : {results_root}"
)

ðŸ“‚ cwd          : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/src/daniel/gemini
ðŸ“‚ Project root : /Users/hd/Desktop/RCS-Emotion-Prediction-2025
ðŸ“‚ Source root  : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/src/daniel/gemini
ðŸ“‚ Data root    : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/data/MAMS-ACSA/raw/data_jsonl/annotated
ðŸ“‚ Prompts root : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/prompts/daniel/gemini
ðŸ“‚ Utils root   : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/utils
ðŸ“‚ Results root : /Users/hd/Desktop/RCS-Emotion-Prediction-2025/results/daniel/gemini


In [5]:
# Emotion prediction script using Gemini API
import json
import os
import requests
from dotenv import load_dotenv

# ==========================================
# API SETUP
# ==========================================
load_dotenv()
API_KEY = os.getenv("GEMINI_API_KEY")

MODEL = "models/gemini-2.5-flash"
URL = f"https://generativelanguage.googleapis.com/v1beta/{MODEL}:generateContent"

HEADERS = {
    "Content-Type": "application/json",
    "X-goog-api-key": API_KEY
}

# ==========================================
# PATHS
# ==========================================
IN_PATH = os.path.join(data_root, "cleaned_300.jsonl")
EMOTION_JSON = os.path.join(data_root, "emotion.json")

OUT_DIR = os.path.join(results_root, "gemini-flash")
os.makedirs(OUT_DIR, exist_ok=True)

OUT_EMO = os.path.join(OUT_DIR, "gemini_emotion_only_cleaned_300.jsonl")
OUT_EMO_R = os.path.join(OUT_DIR, "gemini_emotion_only_reasons_cleaned_300.jsonl")

# ==========================================
# LOAD FULL EMOTION TAXONOMY
# ==========================================
EMOTIONS = json.load(open(EMOTION_JSON, "r", encoding="utf-8"))
POLARITIES = ["positive", "negative", "neutral"]

# ==========================================
# PARSE GOLD INPUT (BUT IGNORE GOLD EMOTIONS)
# ==========================================
raw_data = [
    json.loads(line)
    for line in open(IN_PATH, "r", encoding="utf-8")
]

# Each row["output"] has aspect/polarity/emotion
# but EMOTION is IGNORED â†’ the model predicts new ones


# ==========================================
# GEMINI REQUEST WRAPPER
# ==========================================
def ask_gemini(prompt):
    payload = {"contents": [{"parts": [{"text": prompt}]}]}
    r = requests.post(URL, headers=HEADERS, json=payload)
    r.raise_for_status()
    return r.json()["candidates"][0]["content"]["parts"][0]["text"].strip()


# ==========================================
# SAFE JSON PARSER
# ==========================================
def safe_json_parse(txt):
    try:
        return json.loads(txt)
    except:
        pass

    cleaned = txt.replace("```json", "").replace("```", "").strip()
    try:
        return json.loads(cleaned)
    except:
        pass

    cleaned = cleaned.replace(",]", "]").replace(",}", "}")
    try:
        return json.loads(cleaned)
    except:
        return None


# ==========================================
# EMOTION-ONLY PROMPT BUILDER
# ==========================================
def build_emotion_only_prompt(review, aspect, polarity):
    allowed = EMOTIONS[aspect][polarity]

    return f"""
You are performing EMOTION-ONLY annotation following strict official guidelines.

Below are the complete annotation guidelines that you MUST follow exactly:
{GUIDELINES}

### TASK:
You MUST NOT modify aspect or polarity.
Your ONLY task is to choose the correct EMOTION.

### Review:
\"""{review}\"""

### Aspect (DO NOT CHANGE):
{aspect}

### Polarity (DO NOT CHANGE):
{polarity}

### Allowed Emotion Categories:
{allowed}

### STRICT JSON OUTPUT:
{{
  "emotion": "...",
  "reason": "A single sentence of exactly 20 words explaining your reasoning."
}}

### RULES:
- JSON ONLY.
- Choose exactly ONE emotion from allowed list.
- Do NOT invent categories.
- Do NOT output anything except JSON.
- "reason" MUST contain exactly 20 words.

Return ONLY JSON.
"""


# ==========================================
# CALL GEMINI FOR ONE EMOTION
# ==========================================
def annotate_emotion_only(review, aspect, polarity):
    prompt = build_emotion_only_prompt(review, aspect, polarity)

    parsed = None
    for _ in range(3):
        response = ask_gemini(prompt)
        parsed = safe_json_parse(response)

        if isinstance(parsed, dict) and "emotion" in parsed:
            break

    if not isinstance(parsed, dict):
        print("JSON ERROR â†’", response)
        return None, "Reason unavailable"

    emo = parsed.get("emotion", "").strip()
    reason = parsed.get("reason", "").strip()

    # Capitalize for consistency
    if emo:
        emo = emo[0].upper() + emo[1:]

    # Validate â†’ fallback to first allowed if invalid
    allowed = EMOTIONS[aspect][polarity]
    if emo not in allowed:
        emo = allowed[0]

    return emo, reason


# ==========================================
# RUN EMOTION-ONLY ANNOTATION
# ==========================================
emotion_only_results = []
emotion_only_reasons = []

for row in raw_data:
    review = row["input"]
    gold = row["output"]  # ignore emotion; use aspect/polarity only

    annotated = []
    reasons = []

    for t in gold:
        asp = t["aspect"]
        pol = t["polarity"]

        emo, rtext = annotate_emotion_only(review, asp, pol)

        annotated.append({
            "aspect": asp,
            "polarity": pol,
            "emotion": emo
        })

        reasons.append({
            "aspect": asp,
            "polarity": pol,
            "emotion": emo,
            "reason": rtext
        })

    emotion_only_results.append({
        "input": review,
        "output": annotated
    })

    emotion_only_reasons.append({
        "input": review,
        "details": reasons
    })


# ==========================================
# SAVE OUTPUT FILES
# ==========================================
with open(OUT_EMO, "w", encoding="utf-8") as f:
    for r in emotion_only_results:
        f.write(json.dumps(r, ensure_ascii=False) + "\n")

with open(OUT_EMO_R, "w", encoding="utf-8") as f:
    for r in emotion_only_reasons:
        f.write(json.dumps(r, ensure_ascii=False) + "\n")

print("DONE â†’", OUT_EMO)
print("REASONS â†’", OUT_EMO_R)

ModuleNotFoundError: No module named 'dotenv'

In [None]:
# # Full absa with aspect, polarity, emotion prediction


# import json
# import os
# import requests
# from dotenv import load_dotenv

# # -----------------------------
# # API setup
# # -----------------------------
# load_dotenv()
# API_KEY = os.getenv("GEMINI_API_KEY")

# MODEL = "models/gemini-2.5-flash"
# URL = f"https://generativelanguage.googleapis.com/v1beta/{MODEL}:generateContent"

# HEADERS = {
#     "Content-Type": "application/json",
#     "X-goog-api-key": API_KEY
# }

# # -----------------------------
# # Paths
# # -----------------------------
# IN_PATH = os.path.join(data_root, "daniel_50.jsonl")
# EMOTION_JSON = os.path.join(data_root, "emotion.json")

# OUT_DIR = os.path.join(results_root, "gemini-flash")
# os.makedirs(OUT_DIR, exist_ok=True)

# OUT_ANNOT_PATH = os.path.join(OUT_DIR, "gemini_annotated_aspect_polarity_daniel_50.jsonl")
# OUT_REASON_PATH = os.path.join(OUT_DIR, "gemini_reasons_daniel_50.jsonl")

# # -----------------------------
# # Load emotion taxonomy
# # -----------------------------
# EMOTIONS = json.load(open(EMOTION_JSON, "r", encoding="utf-8"))

# ASPECTS = list(EMOTIONS.keys())
# POLARITIES = ["positive", "negative", "neutral"]

# allowed_lookup = {
#     (aspect, polarity): EMOTIONS[aspect][polarity]
#     for aspect in EMOTIONS
#     for polarity in EMOTIONS[aspect]
# }

# # ----------------------------------------------------
# # Gemini request
# # ----------------------------------------------------
# def ask_gemini(prompt):
#     payload = {"contents": [{"parts": [{"text": prompt}]}]}
#     r = requests.post(URL, headers=HEADERS, json=payload)
#     r.raise_for_status()
#     return r.json()["candidates"][0]["content"]["parts"][0]["text"].strip()

# # ----------------------------------------------------
# # JSON repair
# # ----------------------------------------------------
# def safe_json_parse(txt):
#     try:
#         return json.loads(txt)
#     except:
#         pass

#     cleaned = txt.replace("```json", "").replace("```", "").strip()
#     try:
#         return json.loads(cleaned)
#     except:
#         pass

#     cleaned = cleaned.replace(",]", "]").replace(",}", "}")
#     try:
#         return json.loads(cleaned)
#     except:
#         return None

# # ----------------------------------------------------
# # Build prompt (uses GUIDELINES from previous cell)
# # ----------------------------------------------------
# def build_prompt(review):
#     return f"""
# You are performing ABSA (Aspect-Based Sentiment & Emotion) annotation.

# Follow these official annotation guidelines:
# {GUIDELINES}

# ### Allowed aspects:
# {ASPECTS}

# ### Allowed polarities:
# {POLARITIES}

# ### Allowed emotions:
# {json.dumps(EMOTIONS, indent=2)}

# ### STRICT OUTPUT FORMAT:
# {{
#   "triples": [
#     {{"aspect": "...", "polarity": "...", "emotion": "..."}}
#   ],
#   "reason": "A single sentence of exactly 20 words explaining your reasoning."
# }}

# ### RULES:
# - Output MUST be ONLY valid JSON.
# - No markdown, no natural-language explanation outside JSON.
# - Emotion must belong to allowed list for the given (aspect, polarity).
# - If no aspects appear â†’ return `"triples": []` plus a 20-word reason.
# - `reason` MUST contain exactly 20 words.

# ### Review:
# \"""{review}\"""

# Return ONLY the JSON.
# """

# # ----------------------------------------------------
# # Annotation logic
# # ----------------------------------------------------
# def annotate_full(review):
#     prompt = build_prompt(review)

#     parsed = None
#     for _ in range(3):
#         response = ask_gemini(prompt)
#         parsed = safe_json_parse(response)
#         if isinstance(parsed, dict) and "triples" in parsed:
#             break

#     if not isinstance(parsed, dict):
#         print("JSON ERROR â†’", response)
#         return [], "Reason unavailable"

#     triples = parsed.get("triples", [])
#     reason = parsed.get("reason", "").strip()

#     # Validate triples
#     final = []
#     for item in triples:
#         asp = item.get("aspect")
#         pol = item.get("polarity")
#         emo = item.get("emotion", "")

#         if (asp, pol) not in allowed_lookup:
#             continue

#         allowed = allowed_lookup[(asp, pol)]

#         # Normalize
#         emo = emo.strip()
#         if emo:
#             emo = emo[0].upper() + emo[1:]

#         if emo not in allowed:
#             emo = allowed[0]

#         final.append({
#             "aspect": asp,
#             "polarity": pol,
#             "emotion": emo
#         })

#     return final, reason

# # ----------------------------------------------------
# # Load input
# # ----------------------------------------------------
# raw_data = [
#     json.loads(line)
#     for line in open(IN_PATH, "r", encoding="utf-8")
# ]

# # ----------------------------------------------------
# # Annotate all
# # ----------------------------------------------------
# results = []
# reasons = []

# for row in raw_data:
#     review = row["input"]
#     triples, reason = annotate_full(review)

#     results.append({
#         "input": review,
#         "output": triples
#     })

#     reasons.append({
#         "input": review,
#         "triples": triples,
#         "reason": reason
#     })

# # ----------------------------------------------------
# # Save Files
# # ----------------------------------------------------
# with open(OUT_ANNOT_PATH, "w", encoding="utf-8") as f:
#     for r in results:
#         f.write(json.dumps(r, ensure_ascii=False) + "\n")

# with open(OUT_REASON_PATH, "w", encoding="utf-8") as f:
#     for r in reasons:
#         f.write(json.dumps(r, ensure_ascii=False) + "\n")

# print("DONE â†’", OUT_ANNOT_PATH)
# print("REASONS â†’", OUT_REASON_PATH)


