In [18]:
import json, re, csv, unicodedata
from pathlib import Path
import pandas as pd

INPUT_CSV     = Path("Replies_Tasks.csv")
REWRITES_CSV  = Path("replies_fix_v2_51_120.csv")  # optional; comment out if not used
CSV_OUT       = Path("predictions.csv")

COL_ID     = "Reply_Id"
COL_PROMPT = "Prompt_Id"
COL_COND   = "condition"
COL_REPLY  = "reply_text"

ORDER = [
    "T01_EmpathyGratitude",
    "T02_Apology",
    "T03_PositiveFlex",
    "T04_MitigationHedge",  
    "T05_RefusalMinus",
    "T06_GeneralAnswer",
]

def norm_text(s):
    if not isinstance(s, str): return ""
    s = unicodedata.normalize("NFKC", s)
    s = (s.replace("â€™","'").replace("’","'")
           .replace("“",'"').replace("”",'"')
           .replace("…","...").replace("\u00a0"," "))
    s = s.lower()
    return re.sub(r"\s+"," ",s).strip()

def negated(pattern, text, window=3):
    for m in re.finditer(pattern, text):
        left = text[:m.start()]
        prior = " ".join(re.findall(r"\b\w+\b", left)[-window:])
        if re.search(r"\b(not|never|no)\b", prior): return True
    return False

RX = {
    "T01_EmpathyGratitude": r"""
       \b(?:we|i)\s+(?:truly\s+)?(?:understand|recognize|realize)\b
      |\b(?:we|i)\s+know\s+(?:this|it)\s+(?:is|can\s+be)\b
      |\b(?:i\s+(?:get|see|hear)\s+(?:how|that)|i\s+get\s+it|i\s+hear\s+you|i\s+see\s+why)\b
      |we'?re\s+here\s+to\s+(?:help|support|assist)
      |i'?m\s+here\s+to\s+(?:help|support|assist)
      |(?:thank\s+you|thanks)\s+for\s+(?:waiting|your\s+patience|the\s+details|reaching\s+out|bringing\s+this\s+to\s+our\s+attention)
      |(?:thank\s+you|thanks)(?!\s+for\s+contact(?:ing)?\b)
      |we\s+appreciate\s+(?:your\s+)?(?:patience|understanding|feedback|time|details)
      |we\s+value\s+(?:your|the)\s+(?:patience|understanding|feedback)
      |we\s+acknowledge\s+(?:the\s+)?(?:inconvenience|delay|frustration)
    """,

    "T02_Apology": r"""
       \b(?:sorry|so\s+sorry|truly\s+sorry|really\s+sorry|terribly\s+sorry)\b
      |(?:i|we)\s+(?:sincerely|truly|deeply)\s+apolog(?:ize|ise)\b
      |my\s+apologies\b|i\s+owe\s+you\s+an\s+apology\b
      |apolog(?:y|ies|ize|ise|ized|ised|izing|ising)\b
      |(?:we|i)\s+regret(?:\s+to\s+inform\s+you)?\b
      |we\s+apologize\s+for\s+(?:any\s+)?(?:inconvenience|trouble|delay|confusion)\b
      |(?:sorry|apologies)\s+to\s+hear\b
    """,

    "T03_PositiveFlex": r"""
       \b(?:let|allow)\s+me\s+to\s+(?:check|see|look\s+into|fix|arrange|request|escalate|coordinate|set\s+up|process|refund|replace|exchange|cancel|resolve)\b
      |\b(?:i|we)(?:'| wi)ll\s+(?:try|do\s+(?:my|our)\s+best|make\s+sure|take\s+care\s+of(?:\s+(?:this|it))?|get\s+(?:this|it)\s+sorted
         |look\s+into\s+this\s+now|check\s+this\s+now|follow\s+up|update\s+you|reach\s+out|process\s+(?:a\s+)?refund|issue\s+(?:a\s+)?refund|send\s+(?:a\s+)?replacement)\b
      |\b(?:i|we)\s+can\s+(?:help|assist|check|look\s+into|arrange|issue|send|offer|request|escalate|coordinate|review|investigate
         |process|refund|replace|exchange|cancel|resolve|provide\s+(?:a\s+)?label|arrange(?:\s+(?:a\s+)?)?pickup)\b
      |\b(?:i|we)(?:'| wi)ll\s+help(?:\s+you)?\s+(?:return|cancel|exchange|replace|resolve|start\s+a\s+return|request\s+a\s+refund
         |get\s+(?:a\s+)?refund|process\s+(?:a\s+)?refund|print(?:ing)?\s+(?:a\s+)?label|arrange(?:\s+(?:a\s+)?)?pickup|send\s+(?:a\s+)?replacement
         |issue\s+(?:a\s+)?refund|provide\s+(?:a\s+)?label)\b
      |\b(?:walk|guide)\s+you\s+through\b
      |\b(?:expedite|fast-?track|prioriti[sz]e|rush|speed(?:\s+(?:this|it|up))?|fast(?:\s|-)?refund|quick(?:\s|-)?refund|refund\s+asap|asap)\b
    """,

    "T04_MitigationHedge": r"""
       \b(?:maybe|perhaps|might|may|could|usually|typically|generally|possibly|likely|unlikely|roughly|approximately)\b
      |it\s+(?:seems|looks|appears)\s+like\b
      |\bas\s+far\s+as\s+i\s+can\s+tell\b
      |a\s+(?:bit|little)\b
      |unfortunately\b
      |\bat\s+this\s+time\b|\bright\s+now\b|\bfor\s+now\b
      |\bin\s+most\s+cases\b|\bin\s+general\b|\bshould\s+be\s+able\s+to\b|\bmay\s+be\s+able\s+to\b
      |\bif\b[^.]{0,80}\b(?:available|eligible|in\s+stock)\b[^.]{0,60}\b(?:you(?:'| )?ll|you\s+will)\s+see\b
      |\byou(?:\s+can)?\s+(?:check|see|find|view)\s+(?:it|this|that|details|options|information)\b
      |\bplease\s+(?:check|visit|see|refer\s+to)\b\s+(?:our\s+)?(?:website|help\s+center|faq|support\s+page|policy|terms|your\s+orders|orders)\b
      |\btry\s+again\s+later\b|\bcheck\s+back\s+later\b|\bit\s+should\s+appear\b|\byou\s+may\s+see\b
      |\bsubject\s+to\s+availability\b
      |\bdepending\s+on\b|\bmay\s+vary\b|\bcannot\s+guarantee\b
    """,

    "T05_RefusalMinus": r"""
       \b(?:can'?t|cannot|unable\s+to|not\s+possible|won'?t\s+be\s+able\s+to|not\s+allowed|no\s+longer\s+able\s+to)\b
      |(?:we|i)\s+(?:do\s+not|don't|no\s+longer)\s+(?:offer|allow|provide|ship|price[-\s]?match|accept|support|issue|change)\b
      |\b(?:cannot|can'?t)\s+(?:accept|process|change|modify|refund|cancel)\b
      |\bnot\s+supported\b|\bnot\s+eligible\b|\bineligible\b
      |\boutside\s+(?:the\s+)?return\s+window\b|\bafter\s+the\s+return\s+window\b
      |\bdue\s+to\s+(?:policy|security|carrier\s+restrictions)\b
      |policy\s+(?:prohibits|does\s+not\s+allow|forbids)\b
    """,
}

RX["T01_EmpathyGratitude"] = r"""
   \b(?:we|i)\s+(?:truly\s+)?(?:understand|recognize|realize)\b
  |\b(?:we|i)\s+know\s+(?:this|it)\s+(?:is|can\s+be)\b
  |\b(?:i\s+(?:get|see|hear)\s+(?:how|that)|i\s+get\s+it|i\s+hear\s+you|i\s+see\s+why)\b
  |we'?re\s+here\s+to\s+(?:help|support|assist)
  |i'?m\s+here\s+to\s+(?:help|support|assist)
  |(?:thank\s+you|thanks)\s+for\s+(?:waiting|your\s+patience|the\s+details|reaching\s+out|bringing\s+this\s+to\s+our\s+attention)
  |(?:thank\s+you|thanks)(?!\s+for\s+contact(?:ing)?\b)
  |we\s+appreciate\s+(?:your\s+)?(?:patience|understanding|feedback|time|details)
  |we\s+value\s+(?:your|the)\s+(?:patience|understanding|feedback)
  |we\s+acknowledge\s+(?:the\s+)?(?:inconvenience|delay|frustration)
"""

RX["T03_PositiveFlex"] = r"""
   \b(?:let|allow)\s+me\s+to\s+(?:check|see|look\s+into|fix|arrange|request|escalate|coordinate|set\s+up|process|refund|replace|exchange|cancel|resolve|verify|confirm|review|investigate)\b
  |\b(?:i|we)(?:'ll|(?:\s|^)will)\s+(?:try|do\s+(?:my|our)\s+best|make\s+sure|take\s+care\s+of(?:\s+(?:this|it))?|get\s+(?:this|it)\s+sorted
     |look\s+into\s+this\s+now|check\s+this\s+now|follow\s+up|update\s+you|reach\s+out|process\s+(?:a\s+)?refund|issue\s+(?:a\s+)?refund|send\s+(?:a\s+)?replacement
     |review|confirm|verify)\b
  |\b(?:i|we)\s+can\s+(?:help|assist|check|look\s+into|arrange|issue|send|offer|request|escalate|coordinate|review|investigate
     |process|refund|replace|exchange|cancel|resolve|provide\s+(?:a\s+)?label|arrange(?:\s+(?:a\s+)?)?pickup|confirm|verify)\b
  |\b(?:i|we)(?:'ll|(?:\s|^)will)\s+help(?:\s+you)?\s+(?:return|cancel|exchange|replace|resolve|start\s+a\s+return|request\s+a\s+refund
     |get\s+(?:a\s+)?refund|process\s+(?:a\s+)?refund|print(?:ing)?\s+(?:a\s+)?label|arrange(?:\s+(?:a\s+)?)?pickup|send\s+(?:a\s+)?replacement
     |issue\s+(?:a\s+)?refund|provide\s+(?:a\s+)?label)\b
  |\b(?:walk|guide)\s+you\s+through\b
  |\b(?:expedite|fast-?track|prioriti[sz]e|rush|speed(?:\s+(?:this|it|up))?|fast(?:\s|-)?refund|quick(?:\s|-)?refund|refund\s+asap|asap)\b
"""

# Fold Amazon-style imperatives + “if available you’ll see it” into T04
RX["T04_MitigationHedge"] = r"""
   \b(?:maybe|perhaps|might|may|could|usually|typically|generally|possibly|likely|unlikely|roughly|approximately)\b
  |it\s+(?:seems|looks|appears)\s+like\b
  |\bas\s+far\s+as\s+i\s+can\s+tell\b
  |a\s+(?:bit|little)\b
  |unfortunately\b
  |\bat\s+this\s+time\b|\bright\s+now\b|\bfor\s+now\b
  |\bin\s+most\s+cases\b|\bin\s+general\b|\bshould\s+be\s+able\s+to\b|\bmay\s+be\s+able\s+to\b
  # deflection/conditional UI guidance
  |\bif\b[^.]{0,80}\b(?:available|eligible|in\s+stock)\b[^.]{0,60}\b(?:you(?:'| )?ll|you\s+will)\s+see\b
  |\byou(?:\s+can)?\s+(?:check|see|find|view)\s+(?:it|this|that|details|options|information)\b
  |\bplease\s+(?:check|visit|see|refer\s+to)\b\s+(?:our\s+)?(?:website|help\s+center|faq|support\s+page|policy|terms|your\s+orders|orders|tracking\s+page)\b
  |\btry\s+again\s+later\b|\bcheck\s+back\s+later\b|\bit\s+should\s+appear\b|\byou\s+may\s+see\b
  |\bsubject\s+to\s+availability\b
  # imperative UI verbs → procedural deflection
  |\b(?:open|go\s+to|navigate\s+to|select|choose|click|tap|press)\b\s+(?:your\s+orders|orders|help\s+center|account\s+settings|returns?\s+center|tracking\s+page|payment[s]?|refund\s+center)\b
  |\bdepending\s+on\b|\bmay\s+vary\b|\bcannot\s+guarantee\b
"""


COMPILED = {k: re.compile(v, re.I | re.X) for k, v in RX.items()}

def predict_labels(text: str):
    t = norm_text(text)
    hits = []
    if COMPILED["T05_RefusalMinus"].search(t): hits.append("T05_RefusalMinus")
    if COMPILED["T02_Apology"].search(t) and not negated(r"(sorry|apolog\w+|regret|apolog(?:y|ies))", t, 3):
        hits.append("T02_Apology")
    if COMPILED["T01_EmpathyGratitude"].search(t): hits.append("T01_EmpathyGratitude")
    if COMPILED["T03_PositiveFlex"].search(t):     hits.append("T03_PositiveFlex")
    if COMPILED["T04_MitigationHedge"].search(t):  hits.append("T04_MitigationHedge")
    if not hits:
        return ["T06_GeneralAnswer"]
    return [tag for tag in ORDER if tag in hits]

df = pd.read_csv(INPUT_CSV, encoding="utf-8-sig")

if REWRITES_CSV.exists():
    rw = pd.read_csv(REWRITES_CSV, encoding="utf-8-sig")
    text_col = "new_reply_text" if "new_reply_text" in rw.columns else "reply_text"
    df = df.merge(rw[[COL_ID, text_col]], on=COL_ID, how="left", suffixes=("","_new"))
    df[COL_REPLY] = df[text_col].fillna(df[COL_REPLY])

rows = []
for _, r in df.iterrows():
    txt = str(r.get(COL_REPLY, "")).strip()
    labels = predict_labels(txt)
    rows.append({
        "reply_id": r.get(COL_ID),
        "prompt_id": r.get(COL_PROMPT),
        "condition": r.get(COL_COND),
        "top_label": labels[0],
        "all_labels": "|".join(labels),
        "reply_text": txt
    })

pd.DataFrame(rows, columns=["reply_id","prompt_id","condition","top_label","all_labels","reply_text"])\
  .to_csv(CSV_OUT, index=False, encoding="utf-8-sig")

print(f"wrote {len(rows)} rows → {CSV_OUT}")


wrote 120 rows → predictions.csv
