# Politeness Rule Tagger (Label Studio Predictions)

This notebook loads `tasks_clean.json`, applies **rule-based** tags (no ML), and writes `predictions_fixed.json` for **Settings → Predictions** upload in Label Studio.

**Edit the `CUSTOM_RULES` dict cell** if you want to tweak the regex rules.

In [8]:
import json, re
from pathlib import Path

TASKS_PATH = Path('E:/RUHR Bochum/Research Project 1/Politeness Research 1/data/tasks_clean.json')  # change if needed
OUT_PATH = Path('E:/RUHR Bochum/Research Project 1/Politeness Research 1/data/predictions_fixed.json')
MODEL_VERSION = 'rules-v3-multilabel'

# Load tasks
with open(TASKS_PATH, 'r', encoding='utf-8') as f:
    tasks = json.load(f)
len(tasks)

FileNotFoundError: [Errno 2] No such file or directory: 'E:\\RUHR Bochum\\Research Project 1\\Politeness Research 1\\data\\tasks_clean.json'

In [2]:
# Customize rules here (regex). Keep it simple and transparent; NO ML.
CUSTOM_RULES = {
    'T02_Apology': r"\b(sorry|apolog(?:y|ize|ise|ized|ised|izing|ising)|apologies|regret)\b",
    'T01_EmpathyGratitude': r"\b(thank(?:s| you)|appreciate|i understand|we understand|we'?re here to help|happy to help)\b",
    'T03_PositiveFlex': r"\b(expedite|prioriti[sz]e|urgent|we'?ll (try|do our best)|i'?ll (try|do my best))\b",
    'T04_MitigationHedge': r"\b(might|may|could|usually|typically|generally|possibly|perhaps|unfortunately)\b",
    'T05_GuidancePolicy': r"\b(go to|open|select|click|choose|visit|navigate|tap|your orders|help center|policy|return within|terms|start a return|request a refund)\b",
    'T06_RefusalMinus': r"\b(can'?t|cannot|unable to|not possible|won'?t be able)\b",
}

patterns = {k: re.compile(v, re.I) for k, v in CUSTOM_RULES.items()}

In [3]:
def predict_labels(text: str):
    t = (text or '').lower()
    labels = [tag for tag, pat in patterns.items() if pat.search(t)]
    return labels or ['skip/unclear']

predictions = []
for item in tasks:
    task_id = item.get('id')
    text = item.get('data', {}).get('reply_text', '')
    labels = predict_labels(text)
    predictions.append({
        'task': task_id,
        'model_version': MODEL_VERSION,
        'score': 0.5,
        'result': [{
            'from_name': 'label',
            'to_name': 'reply_text',
            'type': 'choices',
            'value': {'choices': labels}
        }]
    })
len(predictions), predictions[0]['result'][0]['value']

NameError: name 'tasks' is not defined

In [4]:
with open(OUT_PATH, 'w', encoding='utf-8') as f:
    json.dump(predictions, f, ensure_ascii=False, indent=2)
print('Wrote', len(predictions), 'predictions to', OUT_PATH)
predictions[:2]

FileNotFoundError: [Errno 2] No such file or directory: '\\mnt\\data\\predictions_fixed.json'