In [1]:
import os
import json
from tqdm import tqdm

In [5]:
# Input paths
rubrics_dir = "outputs/rubrics"
ref_dir = "outputs/ref_rubrics"
prompt_dir = "outputs/prompts"

# Read the list of conversation_id in the training set
with open("outputs/splits/train/train_indices.json") as f:
    train_indices = json.load(f)
    
# Output file 
output_path = "outputs/dpo_pairs/rubrics_preference_pairs.jsonl"
os.makedirs(os.path.dirname(output_path), exist_ok=True)

In [6]:
count = 0
skipped = 0

with open(output_path, "w") as writer:
    for i in tqdm(train_indices, desc="🔧 Building DPO pairs"):
        # Check file existence
        gen_path = os.path.join(rubrics_dir, f"rubrics_{i}.json")
        ref_path = os.path.join(ref_dir, f"ref_rubrics_{i}.json")
        conv_path = os.path.join(prompt_dir, f"conversation_{i}.txt")
        refinfo_path = os.path.join(prompt_dir, f"reference_{i}.txt")

        if not (os.path.exists(gen_path) and os.path.exists(ref_path) and os.path.exists(conv_path)):
            skipped += 1
            continue

        # Load conversation
        with open(conv_path) as f:
            conversation = f.read().strip()

        # Optional: Add reference info to prompt if exists
        reference_info = ""
        if os.path.exists(refinfo_path):
            with open(refinfo_path) as f:
                reference_info = f.read().strip()
        
        # Prompt = conversation + reference
        prompt = f"Conversation:\n{conversation}\n\nReference Info:\n{reference_info}"

        # Load rubrics
        with open(ref_path) as f:
            reference_rubrics = json.load(f)

        with open(gen_path) as f:
            generated_rubrics = json.load(f)

        # Filter out empty generated rubrics
        if not isinstance(generated_rubrics, list) or len(generated_rubrics) == 0 or all(r == {} for r in generated_rubrics):
            skipped += 1
            continue

        # Format rubrics as text
        def format_rubrics(rubrics):
            lines = []
            for r in rubrics:
                if isinstance(r, dict) and "criterion" in r:
                    criterion = r["criterion"]
                    # get points
                    point = r.get("point", r.get("points", None))
                    # get axis （tags / axis）
                    tags = r.get("tags", [])
                    axis = next((tag.split(":")[1] for tag in tags if tag.startswith("axis:")), r.get("axis", None))

                    if point is not None and axis is not None:
                        lines.append(f"- Criterion: {criterion}\n  Axis: {axis}\n  Point: {point}")
            return "\n".join(lines)


        ref_text = format_rubrics(reference_rubrics)
        gen_text = format_rubrics(generated_rubrics)

        # Write DPO pair
        writer.write(json.dumps({
            "prompt": prompt,
            "chosen": ref_text,
            "rejected": gen_text
        }, ensure_ascii=False) + "\n")

        count += 1

print(f"\n✅ Saved {count} DPO preference pairs to {output_path}")
print(f"❌ Skipped {skipped} examples due to missing or empty files.")



🔧 Building DPO pairs:   0%|          | 0/1861 [00:00<?, ?it/s]

🔧 Building DPO pairs: 100%|██████████| 1861/1861 [00:00<00:00, 6060.28it/s]


✅ Saved 1861 DPO preference pairs to outputs/dpo_pairs/rubrics_preference_pairs.jsonl
❌ Skipped 0 examples due to missing or empty files.



